Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, post-index, 2S)

Test 1: uops

Code:

  ld2 { v0.2s, v1.2s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.004

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.004

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
63005286752220160116000003000049572828600164774004100020041000100020001000500050002384707227542844328652310400010002000200020002841828595116100110001000100002100200054100121201320898097005328583920053318638161635332810710001524612562136091000200010002869428671287582858228649
630042848122201200900000200004643285590016495400410002006100010002000100050005000238200822658286212868831040001000200020002000284622839611610011000100010001210000100100000001328095797007325510432003331783823939372806710001545212467136301000200010002869028621288202863428680
63004284472210230011000001500004792283020016631400410002004100010002000100050005000238600422731284012862431040001000200020002000286422855611610011000100010000210000304100000001338096807079316110401993631803822936372807010001534112418135671000200010002866628645287122866128581
6300428625221012001700000200004811282840016505400410002006100010002000100050005000238750222718284962869331040001001200020002000285192856411610011000100010000210000100100000001312696497031319383619870314038211237412810010001519612537136211000200010002858528761285652862228660
630042869522201700100000070000476928325011654740061000200610001000200010005000500023854012274928510287603104000100020002000200028588285821161001100010001000021000050010002000131109544706531816351998332813818744412807910001496612675136221000200010002868528707287242866828733
6300428746223017001700000000004864283730016469400410002004100010002000100050005000238080622706285812874631040001000200020002000284832852711610011000100010000210000103100100201341394757009318183619980314838201540342815010001554812399134001000200010002866428693287132866328683
6300428407222013001600000200004854283810016698400410002000100010002000100050005000238060122681285252860831040001000200020002000284592860411610011000100010000210000100100020201348996527010322173220004307938221639392814510001515812536135521000200010002867228682286942865228641
63004287362210120017000001400004797283580016531400610002004100010002000100050005000238100022691284942864231040001000200020002000285912854811610011000100010000210000103100020201320195666979313753920066319638201335352807310001532312048136111000200010002861328717286442868228605
63004287632221170011000000000049452826100165084004100020041000100020001000500050002385405227232853828613310400010002000200020002856928614116100110001000100002100003001000002013279959270163184113319977324738181036372806610001543012323139001000200010002860628751286472857828659
6300428709220010001300000001004890282770016589400410002006100010002000100050005000238120922678285502853831040001000200020002000284782844511610011000100010000210000200100020001336996417096324383819973321538211137352810610001555712156134591000200010002864328641284672861128582

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.2s, v1.2s }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f233f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514004710490000001300140032139577259010350100300041000040100300001000012369035330042161127391400260140048140035130726031311568010030200100003000060200200803000014004714004711502011009910040100100001000001001000001100001001000011000032101801113972250000609100002000050100140036140036140036140051140048
70204140050108600110010114003513955225901005010030000100004010030000100001236921533082516112739140011014005014005013071103131141801003020010000300006020020000301211400501400501150201100991004010010000100000100100000110000003183100001100003210112111139717500002699100002000050100140036140150140051140036140048
702041400501086000000130014003513955225901035010030003100004010030000100001236903533120316112739140026014005014005013078703131194801003020010040300006020020000300001400501400371150201100991004010010000100000100100000010000100100000100003210212111139721500002000100002000050100140051140051140051140053140036
7020414013510860000001001400351395772590103501003000310000401003000010000124478653300421612032414002601400501400501307230313119480404302001000030000602002000030000140035140047115020110099100401001000010000110010000001000010010000110000321018011139722500002690100002000050100140051140051140051140051140036
70204140098108600101022001400351395522590120501003000310000401003023210118124478653312031611410914002601400501401501307290313119480100302001004130000602002000030000140050140047115020110099100401001000010000010010000011000014010000110000321018011139713500002660100002000050100140038140036140048140038140051
7020414005010860001001001400361395522590103501003000310000401003000010000124478653300421611273914001101400501400501307260313113880100302001000030000602002008230000140050140050115020110099100401001000010000010010000011000000010000110000321011211113972250000669100002000050100140048140051140051140037140051
7020414004710850000001450014003513955225901005010030000100004010030000100001244786533004216110398140097014005014005013072603131194801003020010000300006020020000300001400501400471150201100991004010010000100000100100000110000100100001100003210180111397225000011566100002000050100140051140055140036140051140051
70204140047108500000013001400351395702590100501003000010000401003000010000123690353307461611273914002601401471400501307260313113880100302001000030000602002000030000140035140050115020110099100401001000010000010010000011000100010000110000321018011139722500002660100002000050100140055140036140052140036140036
7020414003510860000100011400351395525290103501003000310000401003000010000124478653300421611469114002901400351401431307260313115080100302001004130000602002000030000140050140035115020110099100401001000010000010010000011000000010000110000321018011139801500002666100002000050100140048140051140051140052140036
702041400411086000001600140037139552259012950100300031000040100300001003912447865330042161127391400260140050140140130723031311948010030321100003000060200200003000014004814003511502011009910040100100001000001001000001100002064551000211000033021118111398795022619069100002000050100140242140239140126140318140230

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140047112510100000040001400351396352590013500103000010000400103023610160124591653332851611477910140026014005214005013075031313558001030020100003000060020200003000014005014004711500211091040010100001000011010000011000000020410000100031400038733139722500004669100002000050010140051140051140052140053140051
700241400501125000000000400014003513965025900135001030003100004001030000100001245916533328516114896101400260140051140047130749313120980010300201000030000600202000030000140050140047115002110910400101000010000110100000110000000010000110031400048733139722500001169100002000050010140051140051140048140051140081
70024140050112500000000010001400351396502590013500103000310000400103000010000124590753332851611512100140028014005014005013075031312098001030020100003000060020200003000014005014004711500211091040010100001000001010000011000000031000010003140004874313972250000699100002000050010140052140051140051140051140051
700241400501125000000000430001400351396512590013500103000310000400103000010000124584753332851611517610140027014005314005113074931312098001030020100003000060020200003000014005014004711500211091040010100001000011010000011000000001000001003140003873313972250000609100002000050010140048140051140051140051140048
70024140050112400000000000001400351396472590013500103000310000400103000010000124590753332851611477910140024014004714005013074931312098001030020100003000060020200003000014005014004711500211091040010100001000001010000011000000031000011003140003873413973350000699100002000050010140037140051140052140052140036
70024140050112500000000040001400351396502590013500103000310000401513000010000124590753332851611477900140026014005014005013074931311948001030020100003000060020200003000014005014004711500211091040010100001000001010000011000000001000011003140004873313972250000660100002000050010140098140036140036140036140051
70024140050112500000000010101400321396502590013500103000310000400103000010000124590753327061611477900140034014005014005013074931312098001030020100003000060020200003000014005014005111500211091040010100001000001010000011000000001000011003140004874313972450000796100002000050010140052140051140051140048140051
7002414005011250000010004000140035139647259001350010300031000040010300001000012459075333285161150090014002901400501400501307493131209800103002010000300006002020000300001400501400511150021109104001010000100000101000001100000002410000010031400038733139722500009109100002000050010140146140051140139140051140051
70024140141112400000000110001401291396412590025500103000710000400103000010048124808053347391611991410140105014005014014413077715131214800103014310000301216002020080300001400471402152150021109104001010000100000101000301100000003185100021100316300310443139722500001009100002000050010140051140144140051140141140141
700241401251126001010001265000140042139599259004450020300031000440010302361004012483345333285161147790014017501400501402351307822913120680010301401004030238600202008030245140047140239261500211091040010100001000001010004211000200212810100031102321000311036140013500209911100002000050010140222140330140242140237140316

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.2s, v1.2s }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0075

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch call (8e)inst branch taken (90)inst branch indir (93)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140057105010001002000140060139630539010650100300061000040100300001000012371605332259161164801140034140075140075130751313117880100302001000030000602002000030141140058140068115020110009901004010010000100001100100013010001070110000010103210112111139745500001069100002000050100140076140058140076140076140076
70204140075108510101001400014006013961925901065012930013100004010030000100001237232533225916117505114003414007514007513071431311788010030200100003012160200200003000014005714006811502011000990100401001000010000110010003101000201032851000011111324121211113973850000900100002000050100140058140058140076140076140076
70204140057108610100001101114004213961725901065010030006100004024330000100001237178533225916117309114005114007514007513075131311788010030200100003000060200200003012414007514006811502011000990100401001000010000110010002111000300032311000011110321011211113972950000999100002000050100140076140076140077140122140058
702041400751086110000010001400601396202590106501003000610000401003000010000123717853320621611730901400521400751400751307513131180801003020010000300006020020000300001400571400752150201100099010040100100001000011001000110100010001100011111232102802113972950000969100002000050100140076140175140122140092140078
7020414007510861110000201014006213961725901065010030006100004010030000100001237178533297916117440014012214008014007513075131312018010030200100003000060200200003014214011614007811502011000990100401001000010000110010001111000200111000011111321011211113974550000999100002000050100140058140160140069140059140076
70204140075108510000002000140060139611259010350100300031000040100300001000012392135332297161173090140051140076140124130751341312688010030200100003000060200200003000014005714007021502011000990100401001000010000110010002111000201241000011010321011211113974550000099100002000050100142768144093143558140757140076
7020414007810851200000200014010913963025901035010030006100004010030000100001239968533237616116480014004414007514007513075131311788010030200100003000060200200003000014007514006811502011000990100401001000010000110010002111000202171000001110321011211113974550000969100002000050100140076140079140076140076140167
70204140075108611100001301014004213963025901215010030006100004010030000100001237108533225916116480114003314007514007513075131311788010030200100403000060200200003000014007514006811502011000990100401001000010000010010002211000100111000011011321011211113974550000979100002000050100140058140076140076140076140076
702041400751125100001010001400621396172590106501003000610000401003000010000123717853322981611753701400511400781400761307513131178801003020010000300006020020000300001400751400761150201100099010040100100001000011001000221100010312210000111103210112111139745500009610100002000050100140077140076140058140076140076
702041400751125101000019000140053139617259010650100300061000040100300001000012372235333359161175441140052140166140257130796321313078039833288110903011960696200823024514025414017521502011000990100401001000010000010010004111000222397521000211110325621252114179250036969100002000050100140250140361140344141205140162

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0052

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f233f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140057104900000001300140037139652259001350010300031000040010300001000012459255333361161135770140028014005214005213075131312188001030020100003000060020200003000014005214004911500211091040010100001000001010000001000013001000011314003874313972450000006100002000050010140054140053140053140053140053
700241400361085000000010014003913964925900105001030003100004001030000100001245925533336116115004114002801400361400361307513131195800103002010000300006002020000300001400521400521150021109104001010000100000101000001100000061000001314003873313972450021969100002000050010140057140054140053140037140053
700241400521086000000010014003913958925900135001030003100004001030000100001245925533274516120611014002801400361400521307513131211803093002010000300006002020000300001400521400491150021109104001010000100000101000001100000031000011316305874313972450000090100002000050010140053140040140053140053140053
7002414005210850010000120014003713965225900135001030007100004001030000100001245925533336116115004014002801400551400491307553131211800103002010000300006026620000300001400521400521150021109104001010000100000101000000100001031000011314003875413972450009699100002000050010140053140053140053140059140053
7002414005710850000000188014003713965225900135001030003100004001030000100391245925533340116115004014002801400521400521307513131211800103002010000300006002020000300001400361401411150021109104001010000100000101000201100000031000011314003874413972450000069100002000050010140151140053140037140053140055
700241400521086000100010014002113965225900135001030003100004001030000100001245934533507316115004014001201400521400521307513131260800103002010000300006002020000300001400521400502150021109104001010000100000101000001100001001000011314003914413970850010960100002000050010140053140053140146140037140041
7002414005210860100100130014003913963625900305001030003100004001030000100001245925533336116115004014002801400521400521307513131211800103002010000301196002020000300001400521400521150021109104001010000100000101000000100000001000011314003874313972450000069100002000050010140053140037140053140037140053
70024140053108500000001300140037139652259001350010300031000040151300001000012459255333361161150040140028014005214005213073531312118001030020100003000060020200803000014005314005211500211091040010100001000001010000011000020310000113140041094413972450000660100002000050010140053140053140053140053140053
70024140144108600000001330014003713963625900135001030003100004001030120100001245856533336116115004014002501400521400521307513131211800103002010000300006002020000301221400521400491150021109104001010000100000101000001100001032601000010314004873313972650000969100002000050010140053140058140053140053140053
7002414014810860000000088014003713963625900135001030003100004001030118100001245943533336116115004014002801400521400521307533131211800103002010000300006002020000300001400521401451150021109104001010000100000101000120100040096701000401321005963513993450018909100002000050010140313140233140342140335140336

Test 4: throughput

Count: 8

Code:

  ld2 { v0.2s, v1.2s }, [x6], x8
  ld2 { v0.2s, v1.2s }, [x6], x8
  ld2 { v0.2s, v1.2s }, [x6], x8
  ld2 { v0.2s, v1.2s }, [x6], x8
  ld2 { v0.2s, v1.2s }, [x6], x8
  ld2 { v0.2s, v1.2s }, [x6], x8
  ld2 { v0.2s, v1.2s }, [x6], x8
  ld2 { v0.2s, v1.2s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402058004162021200011147880008002616644683204668019216021680091802381602658026844104513762093983488000801288018180320499482457500803211572028013016026020016026216000080041801802180201100991001008000080000010081058214810591141169800966110140515313312801441801856680000160000801008018380042801828032480182
24020480181621011100211600008002616602532014080100160032800008010016000080000440821537583749826026008002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080000014800130001080009609180511011611800381800006680000160000801008004280042800428004280042
240204800416430010110019000080026166025320130801001600328000080100160000800004408199375837298262900080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800000168001200016800096110140511011611800381800007780000160000801008004280042800428004280042
24020480041643000000003401008002616602532014080100160032800008010016000080000440821537583039826026008002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000110080000014800100001480009600140511011611800381800006680000160000801008004280042800428004280042
240204800416430010000016000080026166025320132801001600408000080100160000800004408215375826398263080080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800000148001002010800116010180511011611800381800006680000160000801008004280042800428004280042
2402048004164300101000270000800261660253201308010016004080000801001600008000044082153758370982651900800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000000800000001480010500140511011611800381800006680000160000801008004280042800428004280042
24020480041643001011002700008002616602532013280100160040800008010016000080000440821537583469826135008002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080000014800100001380010601200511011611800381800009680000160000801008004280042800428004280042
24020480041643000000001800008002616602532014080100160032800008010016000080000440821537583619825990008002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080000014800000001380010609140511011611800381800000980000160000801008004280042800428004280042
24020480041643001000001500008002616642532013880100160032800008010016000080000440821537583689826154008002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080000014800090001380010009140511011611801631800008680000160000801008004280042800428004280042
24020480041642001000001600008002616602532013080100160040800008010016000080000440821537583729826022008002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080000014800100001080012009140511011611800381800009680000160000801008004280167800428016680042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f243f4346494c4f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2400258004162000000023008002610600025320010800101600488000080010160000800004407696375837898259798002208004180041499470350021320010208000016000020160000160000800418004111800221091010800008000001080000008001800178001361182205020415458003818000013080000160000800108004280042800428004280042
240024800416200010002400800261660002532005880010160048800008001016000080000440769637583779825741800220800418004149947035002132001020800001600002016000016000080041800411180021109101080000800000108000001880018101880018611422054555163380038180000131480000160000800108004280042804348004280042
24002480041643000000540080026166155302532005080010160038800008001016000080000440768337583769825739800220800418004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000000800180014800000102205020415578003818000001080000160000800108004280042800428004280042
240024800416210000002400800261600002532001080010160000800008001016000080000440769637583789825741800220800418004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000001880018003580014611822050204153580038180000101080000160000800108004280042814968442180763
2400248004162100000012018002616603025320050800101600388000080010160000800004407684376229698348098013108004180480499474635002232001020800001600002016000016000080041800411180021109101080000800000108571001884749006149385723611822050205153380038180000131080000160000800108004280042800428004280042
24002480041599000000240080026166000253200588001016004680000800101600008000044076963758376982574180022080041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800000188001800148001360022050204153380038180000101380000160000800108004280042800428004280183
24002480041599000000230080026106013053320010800101600488000080010160000800004407684375837498257688002208004180041499810350022320010208000016028520160000160000800418004111800211091010800008000001080000018800000013800146113005020315338003818000010080000160000800108004280042800428004280042
240024800416000000002400800260600002532005080010160048800008001016000080000440769637583619825741800220800418004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000001880017001880014000005020315358003808000001380000160000800108004280042800428004280042
2400248004160000000000080026166000253200568001016004880000800101600008000044076963758382982575180022080041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800000188000000080014611822050203154280038180000131080000160000800108004280042800428004280042
2400248004162000000024008002610600025320058800101600468000080010160000801284407696375837198257418002208004180041499470350022320010208000016000020160000160000800418004111800211091010800008000001080000018800180008001400018050203154480038080000101080000160000800108004280042800428004280042