Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, post-index, 16B)

Test 1: uops

Code:

  ld2 { v0.16b, v1.16b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.004

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.004

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6400529507237601003000006004692288510017143500410002004200010002000200050001000023828002285529128293703105000200020003000400029274292751161001100010000200004200000002000404000133659537697631410282040532383816736362851810001639913171145892000200010002938529326293072927229333
64004292412350010030000073004715288970017175500410002004200010002000200050001000023810092285729106292643105000200020003000400029181293241161001100010000200004200000032002404000130769398690831570392049532173816538362865210001625512972144992000200010002940529297293422931529313
64004293202360030010000040045782894400170245004100020042000100020002000500010000237900422858293292940231050002000200030004000292352919811610011000100002000062000000020000060001325892036910319213620424329138171239332866610001599513289143662000200010002928329298294102938729306
64004292422350010020000040047432887100171925004100020042000100020002000500010000238600022848292142932531050002000200030004000292082935411610011000100002000042000000020000040001305793466935313414020388330838161036352855210001605413045143252000200010002935829413294142927929300
64004293082360030010000070045832889600172105004100020042000100020002000500010000238620322857292602934131050002000200030004000293412917311610011000100002000042000000020004000001331794896994320704320377330738101035282863610001590113350144282000200010002941429431293772939629408
64004292782360030010000000047012882900171915004100020042000100020002000500010000238540022867290852929031050002000200030004000292552923511610011000100002000042000000020004040001315694706967312304220337336338171037322844510001623113263141532000200010002924129329292272931829381
6400429386235002000000000004594289000017280500410002004200010002000200050001009823850002281429142292933105000200020003000400029273291281161001100010000200004200000232002600000132249460697331361312024633383816637332864310001614513219143952000200010002930329490295252930029329
64004294352350010010100000047072890200172015000100020042000100120002000500010000238560122889292022936831050002000200030004000291212930211610011000100012000042000000320004040001316892696968314613320288332938121129282862210001625112793142962000200010002931329372293442936229403
64004293682350020000000000047102893600171715004100020002000100020002000500010000238780422856290472942331050002000200030004000293042924411610011000100002000042002000020004040001332695526955312902920309335138121531332862310001615913066144372000200010002935229435294242925229285
6400429412235012102100006004667288790017092500610002004200010002000200050001008223851012285629043293833105000200020003000400029340291451161001100010000200000200000002000400000131299478691631650372036633293816633292871610001608613176144892000200010002928329336294442936829265

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.16b, v1.16b }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f23243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
802051400531085000000001400011400321273631296862510010050100300002000040100300002000016078534669192412182337014002301400511400511295053129936901003020020000300006020030000500001400351400351150201100991004010010000100000100200000220000000620000200032106163613978750000060200002000050100140048140055140054140114140049
80204140047108500000000620000140081128671129692251001035010030007200004010030000200001607664066922121218124201400270140047140047129489312993690100302002000030000602003009350000140051140047115020110099100401001000010000010020000022000000032000022003210624661397915000010610200002000050100140052140052140063140091140052
8020414014310850000000028800114003312869812968625100103501003000320000401003009020000160785346692068121823370140011014005114005112950631299349010030200200003000060200300005000014014414003511502011009910040100100001000001002000400200000003200002000321061666139775500000610200002000050100140138140052140048140115140048
802041400511086000000001400011400321287271296862510010350100300032000040100300002000016078534669378812182337114002701400491400541294893129992901003020020000300006020030000500001400511400512150201100991004010010000100000100200040020000000020002220032107166313979150000101210200002000050100140054140142140105140074140052
80204140051108500001000200011400361273261296882510010350100300032000040100300002000016078534669350012182337014002301400511400511295053129935901003020020000300006020030000500001400511400351150201100991004010010000100000100200000220000000020000220032103166613979150011101010200002000050100140049140036140200140059140036
8020414003510850000100014000114012312730112973877100121501323000320000401003000020000160766406691876121825090140011014005114005112950531299309010030200200003000060200300005000014005114004711502011009910040100100001000001002000002200000103200002200321061663139865500001066200002000050100140054140036140106140049140150
80204140051108500000100140001140039128671129769251001035011230003200004010030000200001607853466920681218233701400230140035140051129540312993490100302002000030093602003000050000140054140051115020110099100401001000010000010020000022000002045200002200321061667139787500000010200002000050100140048140052140048140117140050
80204140051108500000000140000140020128671129689251001215010030003200004010030000200001607664066912901218233701400270140047140131129507312993090100302002012830000602003000050000140051140047115020110099100401001000010000010020000022000000002000022003210616361397915000061010200002000050100140048140399140048140077140052
802041400511085000011102000114002012870812968651100103501003000020000401003000020000160786506691290121823370140027014013914005212949131299409010030200200003000060200300005000014005114004711502011009910040100100001000001002000002200040000200002200321031636139775500001060200002000050100140052140052140048140134140036
8020414003510860001001013400011403191255481298151261001195013430011200064046430180201501609178467141721228012901405840140342140224129574241300909088230295201863018660572301865015514032314013641502011009910040100100001000001002005022200500104751052000422203232103241013999950010101010200002000050100140036140036140125140074140048

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
800251400531086100100004002140042127311129692251000165001030006200004001030000200001607860066923561218340701400330140057140057129597313002790010300202000030000600203000050000140057140053115002110910400101000010000110200022220003002200002222003140023216262913980950000666200002000050010140095140054140058140058140054
800241400531085110000002200214004412730612969325100016500103001020000400103000020000160786006692356121830510140033014005514005712960231300309001030020200003000060206300005000014005714005311500211091040010100001000001020002222000200220000222200314002102413291397975000010610200002000050010140058140058140058140059140058
800241400581085110000001600114004212730612969225100016500103000620000400103000020000160786006695116121830510140038014005714005812960131300309001030020200003000060020300005000014005614005311500211091040010100001000001020002522000200520000222210316002261615311397995000061010200002000050010140058140058140058140058140054
80024140057108511000010400214004212730612969250100016500103000620000400103000020000160786006692356121830510140033014005714014712960131300309001030020200623000060020300005000014005714005711500211091040010100001000001020003222000201220000222200314000311631121398005000061010200002000050010140059140058140058140058140054
800241400531085110000001600214004312730612969225100016500103000620000401303000020000160787126692164121830510140033014005714005712960231300309001030020200003000060020300935000014005714005311500221091040010100001000001020003322000200220000222200314000302427121397975000010611200002000050010140058140056140054140054140060
8002414014610861000000048801140042127306129692251000165001030006200004001030090200001607813666921641218305101400330140057140057129601121300309001030020200003000060020300005000014005714015311500211091040010100001000001020002322000201220000222210314000281624121397975015010810200002000050010140058140058140060140058140061
800241400571086100000001600214004412730612969225100016500103000620000400103000020000160787166692356121830510140033014014314005312960131300309001030020200003000060020300005000014006114005711500211091040010100001000001020003222000702220000222200314000281631271397985000010610200002000050010140058140060140060140058140058
80024140057108610000000400214004212730612969225100016500203000620000400103000020000160786006692356121830510140033014005714005812959731300309001030020200623000060020300005000014005314005711500211091040010100001000011020002422000210220000222210314001331713321398675000010610200002000050010140061140058140058140058140058
8002414005710851102000016000140042127306129692251000165002030006200004001030000200001607860066923561218305101400330140057140060129597313003090010300202000030093600203000050000140059140057115002110910400101000010000010200022220002002200002222103140012616232613979750000101010200002000050010140058140059140058140058140059
8002414005710851100000040021400421273061296932510001650010300062000040130300902000016084396669461412191409014003401400571401441296532213014290270302062006230186603923027950310140240140239415002110910400101000010000010200082220004035112200062222003181003825620281398775003010610200002000050010140234140314140319140345140151

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.16b, v1.16b }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
802051400471086000001003350011400361273001296862510010350100300032000040100300002000016078534669431212182426142764140098140229129508312993490360302002006230186609443009350000140047140047115020110099100401001000010000110020000022000010020000202000321011611139775500106910200002000050100140052140036140142140052140054
8020414003510850000100020011401231266941296702510010350100300032000040100300002000016078534669221212186344140095140103140058129503312993090100302002000030000602003000050000140049140047115020110099100401001000010000010020000022000010020000202000321011611139775500106610200002000050100140052140052140149140052140052
802041400511086000001002000140032127300129686251001035010030003200004010030000200001607875866920681218233714002814009214009212950131299379010030200200003000060200300005000014005214003511502011009910040100100001000011002000002200004002000020200032101161113979150000660200002000050100140036140100140048140036140052
802041400511085000010008001140032127297129686251001035010030003200004010030000200001607807066920201218233714002314012814005012948931299189010030200200003000060200300005464614427414307011502011009910040100100001000001002000402200000002000020200032101161113979150000666200002000050100140036140048140036140049140048
8020414014610860000000014620800140032127300129670251001035010030003200004022030000200001607973266920681218242614002714012114004912950231299349010030200200003000060200300935000014004714003511502011009910040100100001000001002000002200000002000020200032101161113979150000660200002000050100140048140037140048140048140052
80204140051108500000000200014003212730012967025100103501103000320000401003000020000160788706692068121823371400271400851400601295083129930901003020020062300006020030000501551400481400471150201100991004010010000100000100200000020000003200002020003210016111397915000010100200002000050100140048140048140048140048140090
8020414005110860000000120011400321272971296715110010350100300032000040100300002000016078766669206812182337140027140231140051129501312993590100302002000030000602003000050000140051140047115020110099100401001000010000010020000022000000020000200000323011611139775500211066200002000050100140136140319140232140234142879
802041403161087000000223962255201140315125169129771131100155501103001520006403443036020150160877906697304122011451401731404311403401295644412998890880303902018630093609463027950310140423140335315020110099100401001000010000010020006002000600620000200000321011611139793500000010200002000050100140052140048140052140036140052
80204140054108500000000180011400361273001296862510010350100300032000040100300002000016078650669206812182337140027140088140054129505312993890100302002000030000602003000050000140051140047115020110099100401001000010000010020000022000000320000202000321011611139791500001060200002000050100140055140048140052140036140052
8020414005110860000110020011400361273021296822510010350100300032000040100300002000016078534669206812182693140027140094140073129505312993490100302002000030000602003000050000140035140047115020110099100401001000010000010020000022000000020000202000321011611139791500001066200002000050100140247140052140052140054140053

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)0309l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cdcfd2d5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
8002514004710851000000140000114003612730012968625100010500103000020000400103000020000160760106693692121825170014001114009914007212959531300259001030020200003000060020300005016414005114003511500211091040010100001000011020000022000000020002220003161031601116139934500300610200002000050010140225140238140225140239140324
80024140232108800101322643521000140302124962129916768100413502603012820056431323018020050160838126702588121913050014033114040814032012969523130135910503020620124303726057830186506201402321404053150021109104001010000100000102000002200000202000002200314003161441397875000010010200002000050010140039140048140048140052140048
8002414005110861001100001001140036127300129686251000135001030003200004001030000200001607790466920681218251700140028140047140076129583313002090010300202000030000600203000050000140035140047115002110910400101000010000110200000220000000200002200031400416043139791500001086200002000050010140052140052140036140080140052
800241400351086000000020001114002012730212968625100013500103000320000400103000020000160760106691876121825170014003014004714009812959631300249001030020200003000060020300005000014005514005111500211091040010100001000001020000022000000020000220103140041603313979950000101010200002000050010140052140052140052140052140052
8002414005110860000000000001140020127297129670251000135001030003200004001030000200001607790466920681218251710140011140047140128129595313000890010300202000030000600203000050000140051140035115002110910400101000010000010200000020000010200002000031400316033139791500000611200002000050010140052140052140048140048140052
80024140035108600000002000001400361272971296862510001350010300032000040010300002000016076010669216412181422001400231400471401151295953130024900103002020000300006002030000500001400531400511150021109104001010000100000102000000200000002000002000314004160441397915000010010200002000050010140036140052140048140052140036
80024140051108610011002600000140020128671129670251000135001030003200004001030000200001607755266920681218251700140011140047140129129595313003190010300202000030000600203000050000140051140052115002110910400101000010000010200000220000000200002000031400416043139775500001060200002000050010140052140036140055140052140036
80024140051108510000001400001140020127300129670251000105001030000200004001030000200001607790466920681218142200140011140087140077129595313002690010300202000030000600203000050000140051140051115002110910400101000010000010200000220000003200002000031400416033139791500000610200002000050010140036140036140037140052140049
80024140051108600000002000011400201272971296702510001050010300002000040010300002000016077904669187612182517001400271400871400681295953130008900103002020000300006002030000500001400511400511150021109104001010000100000102000002200000032000022000314007320741399735002010010200002000050010140337140241140319140234140342
800241402431087000112629330015840100140294125391129800102100029500303001120004403703025620100161004226696286121898660014022714020514034012962634130067905303020620062302796039430186503101401461404173150021109104001010000100001102000000200000032000002000314003160331397915000010610200002000050010140052140055140036140052140052

Test 4: throughput

Count: 8

Code:

  ld2 { v0.16b, v1.16b }, [x6], x8
  ld2 { v0.16b, v1.16b }, [x6], x8
  ld2 { v0.16b, v1.16b }, [x6], x8
  ld2 { v0.16b, v1.16b }, [x6], x8
  ld2 { v0.16b, v1.16b }, [x6], x8
  ld2 { v0.16b, v1.16b }, [x6], x8
  ld2 { v0.16b, v1.16b }, [x6], x8
  ld2 { v0.16b, v1.16b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4346494c4e4f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6067696d6emap stall dispatch (70)simd prf full (72)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320205800706430010000038001800272120000025400136801001600361600008010016000016000048049996237120807680080023800428004200032440010020016000016000020024000032000080042800421180201100991001008000080000010016000002516000000211600216103305123134118003908000066160000160000801008004380043801298004380043
3202048004264300001100340008002721212000025400136801001600341600008010016000016000048049996651420807640080023800428004200032440010020016000016000020024000032000080042800421180201100991001008000080000010016000002516003200211600216100051091171280039180000126160000160000801008004380043800438004380043
3202048004264300101000000180027212120070254001388010016003616000080100160000160000480498960490208068600800238004280042000324400100200160000160000200240000320000800428004211802011009910010080000800000100160000025160029003016002261213305109217218003908000066160000160000801008004380043800438004380043
32020480042643000000003101180027201200002540010080100160036160000801001600001600004804969602832081034008002380042800420003244001002001600001600002002400003200008004280042118020110099100100800008000001001600000251601100022160029602933051091171180039080000100160000160000801008004380043800438004380043
3202048004264300000000300018002721212000025400128801001600001600008010016000016000048049996432720807800080023800428004200032440010020016000016000020024000032021680042800421180201100991001008000080000010016010202516000000301600226103305109117218003918000076160000160000801008004380043800438004380236
320204800426430000000039001800272121200010254001288010016003816000080100160000160000480499960490208094000800238004280042000324400370200160000160000200240000320000800428004211802011009910010080000800000100160000025160030003216000060033051092252180039180000106160000160000801008015280043800438004380043
3202048004264300000000300180027201200002540039680100160128160000801001600001600004804999636272081044008002380042800422300324400100200160000160000200240000320000800428004211802011009910010080000800000100160000025160030002116013161223705109117128003908000066160000160000801008004380043800438004380043
3202048004264300100000000080027212120000254001368010016004216000080100160000160000480499967735208031600801158004280042000324400100200160000160000200240000320000800428004211802011009910010080000800000100160000025160131003116002100210051091171180039180000106160000160000801008004380043800438004380043
3202048004264300000000380018002721212000025400138801001600281600008010016000016000048049996871120808660080023800428004200032440010020016000016010820024000032000080042800421180201100991001008000080000010016000002516002100331600306100051092171280039080000610160000160000801008004380043800438004380043
32020480042643000000014200180388012120070010140089880208160358160324802651602161602164913459710162086464008002380042800420003244001002001600001600002002400003200008004280042118020110099100100800008000001001600000251600330029160000612225051092171280039180000106160000160000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)dcddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200258005660010000000360000080027312120254000628001016003616000080010160000160000480049960330208057400800238004280042032440001020160000160000202400003200008004280042118002110910108000080000010160000040160029002216002261302501501961704780039180000126160000160000800108004380043800438004380043
320024800426210100000036000018002731212025400010800101600461600008001016000016000048004996033020805680080023800428004203244000102016000016000020240000320000800428004211800211091010800008000001016000004016002100321600306102500501951706480039080000010160000160000800108004380043800438004380043
3200248004262001000100540000180027015002540001080010160048160000800101600001600004800499603282080044008002380042800420324400010201601141600002024000032000080042800421180021109101080000800000101600000351600471029160021612433005019317034800391800001010160000160000800108004380043800438004380043
32002480042620010000003600000800272012025400010800101600001600008001016000016000048004996017920805740080023800428004203244000102016000016010820240000320000800428004211800211091010800008000001016000000160047002516002261223300503341705680039180055106160000160000800108004380043800438004380043
3200248004262100001000198000018002721212025400046800641600361600008001016000016000048004996022020805840080023801518004203244000102016000016000020240000320000800428004211800211091010800008000001016000002516004800301600006103300501952504480039080000106160000160000800108004380043800438004380043
3200248004262000001100480000080027212120254000488001016003216000080010160000160000480049959996208053800800238004280042032440001020160000160000202400003200008004280042118002110910108000080000010160000027160000006701600296121330050199170448003908000006160000160000800108004380153800438004380043
32002480042620000000003500001801372121202540003880010160036160000800101600001600004803719603302080584008002380042800420324400010201600001600002024000032000080042800421180021109101080000800000101600000251600220030160000612933005019317034800390800541010160000160000800108004380043800438004380043
32002480042620000001003500000800272120025400048800101600361600008001016000016000048004996033820805740080023800428004203244000102016000016000020240000320000800428004211800211091010800008000001016000002516003010291600306022330050196170438003908000009160000160000800108015480043800438004380043
32002480042621000000005300002800272121202540004680010160036160000800641600001600004800499609032081772008002380042801520324400010201600001600002024000032000080042800421180021109101080000800000101600000251600210001600300022330050195170338003918000009160000160000800108004380043800438004380043
3200248004262100000010660000180027212120254000488001016000016000080010160000160000480049960237208177200800238004280042032440001020160000160000202400003200008004280042118002110910108000080000010160000025160022004716003061472500501971704880039080000109160000160000800108004380043800438004380043