Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, post-index, 2D)

Test 1: uops

Code:

  ld2 { v0.2d, v1.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.004

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.004

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
64005295162361111111110040004706289200001733250061000200220001000200020005000100262380682284502917129441310500020002000300040002930129276116100110001000120000020000005720004242101291293646955319614220511323238122351512862210001612313318143192000200010002961529401294232942029390
64004293672360111111100010000464828986022172925006100020062000100020002000500010000238668228460292122938631050002000200030004000293192934111610011000100002000062002020220026240001315895636944315104820426327638161650452861010001631513081144212000200010002939729452294502930129362
64004293382360020010000080004685289360001726450061000200020001000200020005000100002386862284402940629378310500020002000300340002926229392116100110001000020000620000300320024000001316092476986317205120347331238151547502862010001606113169145922000200010002938629437293472955129498
64004295282360020020000020000469928911002172765008100020042000100020002000500010000238027228670291722934231050002000200030004000293252919011610011000100002000062004090020000060001301993677016308204720439327538171455542867410001630313025141602000200010002942229441293312948129422
6400429423236101000000006010463429032000173235004100020002000100020002000500010000238100228790292482936131050002000200030004000293532927111610011000100002000062004070220004240001330493257012313814820442324538151448562857510001617213242141132000200010002928629424293762934929425
6400429304235002000000006000471228975202172805004100020062000100020002000500010000238082228340292462935133050002000200030004000292522922711610011000100002000042002090020000040001304993906996319215220419328138181052532867510001597413279144892000200010002951429386294322936929476
64004294052361000010000050004704289200001736650061000200420001000200020005000100002384812282702921729388310500020002000300040002939729238116100110001000020000620000340020004000001320494026887317914620421334638121548502863610001613413048143542000200010002947729335294102930229396
64004293452360030010000018000470728903020172925004100020062000100020002000500010010239043227860291572949533150002000200030004000292702932311610011000100002002002004080620024000001297893127013321204820412322038211451472863210001635613174143652000200010002940729311293812947729381
64004293822360010010000060104750289540201719750041000200420001000200020005000100002380562285702914329382310500020002000300040002923429193116100110001000020000020020310320004040001348194606987315805320426315238131354512857310001610713225143632000200010002947329491294482933729504
64004293802370010120000060004728289830021715250081000200420001000200020005000100002386232287702922729507310500020002000300040002926229243116100110001000020000620040300820024260001319196037010318604820343334638181348462865710001643613338144962000200010002939329343296602935329379

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.2d, v1.2d }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
802051400351125000110077901011400981286711296732510011950100300032000040100300002000016078534669462412182337140027140051140035129505312993690100302932000030000602003000050000140052140047115020110099100401001000010000010020000222000012220002220032301241113985950000666200002000050100140052140228140052140052140148
80204140051112510000012000114009012729712971151100100501103000320000407023009020050160780706694374121812421400111401471400351295053129988901003020020000300006020030093500001401421400472150201100991004010010000100000100200040020000102565200002000323112411140189500620100200002000050100140689140304140782140800140704
802041409821130111001010801352001140083127300129670155100137501303000020000402203000020151160780706692740121861721400251400471401461295063129986901003029320000300936039430000501551400511401461150201100991004010010000100000100200000220004500200002200321011612139775500000610200002000050100140052140052140036140052140036
802041400511086000010000000140064127300129686251001035010030003200004010030000200001607664066912901218233714003014004714003512948931299349010030296200003000060200300005000014003514004711502011009910040100100001000001002000000200000002000002003229116111398615001001210200002000050100140748140495140397140143140131
8020414003510861001110688001140247125741129719251001195010030007200004022030000200001607853466920681218619514009814012914005112952031299869010030296200623000060388300005000014014414005121502011009910040100100001000001002000222200040022020002220032311241113979150000706200002000050100140146140052140052140122140052
80204140141108600100012880001402091253641297255210010050100300072000040222300002005016078534669485412182337140027140144140053129507312993490100302002000030000602003000050000140052140047115020110099100401001000010000010020000002000000020000020032101161113977550000600200002000050100140053140052140048140052140048
80204140035108500000002000114007812730012968625100103501003000320000402203000020000160785346692068121823371400271400521400531295053129934901003020020066300006020030000500001400351400511150201100991004010010000100001100200042220002001520000220032101161113978950000666200002000050100140058140052140049140039140052
8020414003511310000000140001140032126666129670251001165010030007200004010030000201001607853466945481218250914008414005114005512961014129991901003020020062300006038630000501551400511400471150201100991004010010000100000100200000020000103200002200321011611139791500101060200002000050100140036140052140053140052140048
802041400511135000002027500021400321259521296865110010350112300032000040100300002000016078534669427812182337140028140051140051129506231299369010030295200003000062607300005015514003714004411502011009910040100100001000001002000002200001032000222003210116211397785002210610200002000050100140054140150140054140052140053
8020414005111340000130146880011400361273031296861011001035010030003200024010030180200001607664066920681218242314002714005414005112950531299339166230295200003000060200313955690114402714258241502011009910040100100001000001002000002200061050782000022003210124111398625002161010200002000050100140049140145140048140147140145

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
800251400531125101040105000014003812730012972825100013500103000320000400103000020000160779046692068121827841140030140035140054129596141300309001030020200003000060020300005000014004714003511500211091040010100001000011020002222000200220000222200314031603313979850000101010200002000050010140053140052140048140149140053
80024140047112400000000186000014003812730612969225100016500103001020000400103000020000160786006692164121831370140033140058140058129602313003090010300202000030000600203000050000140059140053115002110910400101000010000010200000220000103200002020003140216022139791500101160200002000050010140052140048140052140052140048
8002414005111250000110050001140240127306129692251000165001030022200024001030000200001608428666915881218305111400331400571400571296013130030900103002020000300936002030000500001400571400531150021109104001010000100000102000002200000002000020200031402160331397945000010010200002000050010140052140052140053140052140037
800241400521125000000005000114002612730412969225100016500103000620002400103000020100160790486694998121971360140077140057140041129585313003090010300202000030000600203000050000140041140041115002110910400101000010000010200000220000000200000000003140316022139794500001067200002000050010140049140052140048140037140048
800241400471125000000001400001400451273061296882510001650010300062000040010300002000016078600669235612183051114003314005714005712960131300319001030020200003000060020300005000014005914005711500211091040010100001000001020000022000010320000200000314031602213979250000101010200002000050010140054140058140042140042140058
80025140057112511000000700021400391273001296862510001350010300032000040010300002000016078016669206812182517114002714005214005212957931300259001030020200003000060020300005000014005114004711500211091040010100001000001020002353220002102200002222103140216022139799500001060200002000050010140052140052140053140048140052
800241400511124000000005000114002612730612969225100016500103000620000400103000020000160789367799170121835671140027140052140064129601313007490010300202000030000602063000050000140142140047215002110910400101000010000010200020220000103200042020003160224033139866500101066200002000050010140250140058140135140056140058
8002414005811251000000113688002140036125166129805124100074500303006420006404903018020150160944706701964121870400140172142583140237129660331301779079030113201863000060578300935015514033914005141500211091040010100001000001020015222001320765220006222230336342530331400155002010910200002000050010140315140341140243140401140334
800241403081125102010422682640021400421273421296922510001650010300062000040010300002000016078600669235612183140014003314005714005312960131300309001030020200003000060020300005000014005714005711500211091040010100001000001020003222000200220000222200314021603313979750000101010200002000050010140058140058140058140042140058
80024140057112511010000400011400331273001296702510001350010300032000040010300002000016076010669206812182603014003014005114003512959131300249001030020200003000060020300005000014003514003621500211091040010100001000011020003322000010320000220000314021604613980350000101014200002000050010140155140062140063140062140056

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.2d, v1.2d }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
8020514004710860000000201114003912730012968625100103501003000320000401003000020000160785346692068121823370140027140035140035129489031299309010030200200003000060200300005000014006614004711502011009910040100100001000001002000002200000002000020003210116111397755000010610200002000050100140052140052140052140052140048
8020414004710860000000600114003612730012967025100103501003000320000401003000020000160766406692068121823370140023140051140051129489031299309010030200200003000060200300005000014003914009711502011009910040100100001000001002000002200000032000002003210116111397915000010610200002000050100140052140036140052140052140052
8020414005110860000000200114003612730012968225100103501003000320000402223000020000160785346691290121823370140027140051140051129505031299349010030200200003000060200300005000014005114004711502011009910040100100001000001002000002200000002000022003210116111397915000010106200002000050100140052140052140052140052140052
80204140047108600000002001140038127300129686251001035010030003200004010030000200001607807066920681218233701400271400511400521295050312993490100302002000030000602003000050000140039140054115020110099100401001000010000110020000022000000320000020032102161113978750000101010200002000050100140052140048140052140052140052
80204140051108500000003500114003612730012968625100103501003000020000401003000020000160780706691876121823370140027140051140051129505031299189010030200200003000060200300005000014005214005111502011009910040100100001000001002000002200000002000020003210116111397875000010610200002000050100140036140052140036140052140052
80204140051108600000002001140036127300129670251001005010030003200004010030000200001607664066920681218233701400271400471400511295050312993490100302002000030000602003000050000140051140051115020110099100401001000010000010020004002000000020000220032101161113979150000101010200002000050100140052140036140052140048140048
802041400351086000000020011400361273001296862510010050100300032000040100300002000016076640669206812182337014002714003514005312950503129934901003020020000300006020030000500001400351400511150201100991004010010000100000100200000220000000200002200321011611139791500001066200002000050100140052140052140052140052140036
8020414005110850000000200014003612730012968225100103501003000320000401003000020000160785346692068121823370140023140051140051129505031299349010030200200003000060200300005000014005114004711502011009910040100100001000001002000002200001002000020003210456231401855005210610200002000050100140478140585140524140497140529
80204140538108901000257588980114003212595713117625100103501003000720000402223027320000160787586708482123601681140027140094140052129506439301299469010030200200003000060200300005000014003514004711502011009910040100100001000011002000002200000032000022003210116121397875000010010200002000050100140036140048140054140052140036
802041400511124000010020001400361273001296702510010350100300032000040100300002000016078534669206812181500114002314005114005112950603129934901003020020000300006020030000500001400521400381150201100991004010010000100000100200180220000203200000200321011601139792500006610200002000050100140052140052140052140140140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
800251400411086000114238000114003612730012968625100013500103000320000400103009020000160779046692068121815081400271400801400521295973130031900103002020000300986002030000500001401371400471150021109104001010000100000102000200200001032000022003140316221397915000010610200002000050010140052140052140052140052140052
80024140051108500010002010014003612730012972740100013500103000320000400103000020000160779046692068121826061400301400781400541295953130024900103002020000300006002030000500001400521400471150021109104001010000100000102000002200000002000022003140216331397915000010100200002000050010140052140036140052140052140036
8002414005110860000000200011400321273001296862510001350010300032000040010300002000016077904669206812182775140027140053140051129596313002490010300202000030000600203000050000140047140053115002110910400101000010000010200000220000000200002200314031633139775500000910200002000050010140096140052140052140036140048
800241400511086000000020001140036127301129686251000135001030003200004001030000200001607744066920681218251714002714005314005112959531300249001030020200003000060020300005000014005114003611500211091040010100001000001020000022000000020000200031403163313979250000101010200002000050010140052140052140052140052140052
8002414005110860000000140001140036125829129782102100026500403001520002403703027020100160949626696728121919071402481404621401261296892413011690530302992018630093605783018650310140426140317315002110910400101000010000110200020220006026319520038220032014241413986450031101010200002000050010140331140238140232140052140036
800241400511086000004014000114002012730012968625100013500103000320000400103000020000160779046692068121825171400271400841400511295913130024900103002020000300006002030000500001400511400511150021109104001010000100000102000002200000032000022003140216331397915000010611200002000050010140052140053140052140052140048
80024140051108600000001400011400361273031296862510001350010300032000040010300002000016077904669211612181422140027140051140047129595313002490010300202000030000600203000050000140051140047115002110910400101000010000010200000220000000200002200314031632139791500001060200002000050010140052140052140052140052140052
800241400471086000010029000114003612730612968625100013500103000320000400103000020000160780206692068121825171400271400511400531295953130024900103002020000300006002030000500001400511400511150021109104001010000100000102000002200001002000002003140316221397915000010010200002000050010140052140052140052140052140048
80024140047108600000000000114003612730012968625100013500103000320000400103000020000160779046692068121825171400291400511400511295923130024900103002020000300006002030000500001400511400471150021109104001010000100000102000002200000002000002003140216331397915000010610200002000050010140052140052140051140052140052
800241400511085000000020010140036127300129686251000105001030003200004001030000200001607790466920681218251714002814005114005112959531300249001030020200003000060020300005000014005114005111500211091040010100001000001020000022000000020000220031403162313979250000101012200002000050010140052140052140048140052140053

Test 4: throughput

Count: 8

Code:

  ld2 { v0.2d, v1.2d }, [x6], x8
  ld2 { v0.2d, v1.2d }, [x6], x8
  ld2 { v0.2d, v1.2d }, [x6], x8
  ld2 { v0.2d, v1.2d }, [x6], x8
  ld2 { v0.2d, v1.2d }, [x6], x8
  ld2 { v0.2d, v1.2d }, [x6], x8
  ld2 { v0.2d, v1.2d }, [x6], x8
  ld2 { v0.2d, v1.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)606167696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32020580070621101100004301038002721212025400142801001600421600008010016000016000048049996478820838420008002308004280042032440010020016000016000020024000032000080042800421180201100991001008000080000010016000003316003700037160029613033051110051754800390800001410160000160000801008004380043800438004380043
3202048004262110010000430003800272121202540014280100160036160000801001600001600004804999607612082724000800230800428004203244001002001600001600002002400003200008004280042118020110099100100800008000011001600000331600360003016003701370051110051746800390800001415160000160000801008004380043800438004380043
3202048004262110010000430001800272121202540014280100160042160000801001600001600004804999640712086754000800230800428004203244001002001600001600002002400003200008004280042118020110099100100800008000001001600000411600370204016003061041051270041753800390800001410160000160000801008004380043800438004380043
32020480042620100100004300038002701212025400142801001600421600008010016000016000048049996428920854780008002308004280042032440010020016000016000020024000032000080042800421180201100991001008000080000010016000003316003600040160037013041051110041754800390800001414160000160000801008004380043801288004380043
320204800426201001000043000380027212002540014280100160042160000801001600001600004804999646392080268000800230800428004203244001002001600001600002002400003200008031180428118020110099100100800008000001001600000331600370003716003761294105111005173580039080000014160000160000801008004380043800438004380043
3202048004262010010100430003800272012025400142801001600421600008010016000016000048049996110620829840008002308004280042032440010020016000016000020024000032000080042800421180201100991001008000080000010016000003316003700046160000613040051110061754800390800001410160000160000801008004380043800438004380043
3202048004262010010000420003800272121202540013680100160042160000801001600001600004804999649682084852100800230800428004203244001002001600001600002002400003200008004280042118020110099100100800008000001001600000331600370003716003760300051110051775800390800001410160000160000801008004380043800438004380043
3202048004262010010000550001800272121202540014280100160042160000801001600001600004804999639032082602000800230800428004203244001002001600001600002002400003200008004280042118020110099100100800008000001001600000411600370103716000061304105111005175680039080000140160000160000801008004380043800438004380043
32020480042620100100004300138002721212025400100801001600421600008010016000016000048049996454620836360008002308004280042032440010020016000016000020024000032000080042800421180201100991001008000080000010016000003316003701037160029613740051110351745800390800001414160000160000801008004380043800438004380043
32020480042621100100004300038002721212025400142801001600421600008010016000016000048049996410320981940008002308004280042032440010020016000016000020024016232000080042800422180201100991001008000080000010016000003316000000037160037603033051110061755800390800001414160000160000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5d6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch indir (93)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)inst barrier (9c)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)dadbddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32002580057643000000005400001800270121202540001080010160000160000800101600001600004800499603302080584000800238004280042032440001020160000160000202400003200008004280042118002110901010800008000001101600101125160030020501600006104011050193170033800391800001010160000160000800108004380043800438004380043
32002480042643000000005300001800270121202540005880010160036160000800101600001600004800499603312080588000800238004280042032440001020160000160000202400003200008004280042118002110901010800008000000101600101225160022000221600360022401105019217003380052180000106160000160000800108004380043800438004380043
320024800426430000000068000018002721213025400060800101600361600008001016000016000048004996036220805620008002380042800420324400010201600001600002024000032000080042800421180021109010108000080000001016001211016000000001600366121401015019217003280066080000010160000160000800108004380043800438004380043
32002480042642000011002300001800272121202540004680010160036160000800101600001600004800499603712080574000800238004280042032440001020160000160000202400003200008004280042118002110901010800008000000101600111240160021000301600006133401115019317003380039080000106160000160000800108004380043800438004380043
320024800426430000000066000008002721212025400062800101600461600008001016000016000048004996036120805840008002380042800420324400010201600001600002024000032000080042800421180021109010108000080000001016001113401600000103616000061223310050192170022800390800001010160000160000800108004380043800438004380043
32002480042643100000005300000800272151202540005480010160046160000800101600001600004800499603382080568000800238004280042032440001020160000160000202400003200008004280042118002110901010800008000000101600001240160047010211600006124331105019217002280039080000610160000160000800108004380043800438004380043
3200248004264310000000520000180027015120254000568001016001216000080010160000160000480049960340207999600080023800428004203244000102016000016000020240000320000800428004211800211090101080000800000010160000040160046000331600210122350150192170023800500800001010160000160000800108004380043800438004380043
320024800426431000000010000018002721212025400046800101600361600008001016000016000048004996033820805740008002380042800420324400010201600001600002024000032000080042800421180021109010108000080000001016000004016003000030160037612233005019252700338003918000060160000160000800108004380043800438004380043
3200248004264300000000000001800270158025400046800101600481600008001016000016000048004996017920805540008002380042800420324400280201600001600002024000032000080042800421180021109010108000080000001016000002516003002036160030612925005019217003280047180000106160000160000800108004380043800438004380043
320024800426430000000036000018002700120254000468001016000016000080010160000160000480049960338208062800080023800428004203244000102016000016000020240000320000800428004211800211090101080000800000010160000025160030000331600306130330050193170024804171800001010160000160000800108015480043800438004380043