Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (post-index, 4S)

Test 1: uops

Code:

  ld2r { v0.4s, v1.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.004

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.004

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f181e233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6300529291219360210102046192884200172174004100020041000100020001000500050002386202272829073293333104000100020002000200029143291211161001100010001000210000010002021305695187027318204320612313338211551452841710001616813367142011000200010002933829302292982933329291
6300429340219000000002046682887800171484004100020041000100020001000500050002386622269429094292633104000100020002000200029186292011161001100010001000210000010002021318092776864318204920606322538241550542860410001621313260145761000200010002925429243291942926429198
63004292672201000000011046402877400171194004100020041000100020001000500050002386402274029055292453104000100020002000200029130291151161001100010001000210000010002021321193956947319504520534305038231054562858010001635113273144041000200010002926129182291982927629241
6300429301219000000000047242876000172384004100020041000100020001000500050002384902271229090292963104000100020002000200029153292961161001100010001000210000010002021286291786988310614420579312138231751462837210001593913348144951000200010002936729308292412929229346
6300429293220000000100047642876300171274004100020041000100020001000500050002386802270929056291923104000100020002000200029122291241161001100010001000210000010002021290691456870320905620637309138251054512852010001625413284144821000200010002918729262292502926029329
6300429300219100001110045872878700171404004100020041000100020001000500050002385802273228994293223104000100020002000200029137292611161001100010001000210000010002021291790976950310204820677310738171348552835910001627613346144281000200010002933129302292082927529256
6300429321219100000000046312878600171544004100020041000100020001000500050002386002275629066292913104000100020002000200029144291991161001100010001000210000310002021290892326884314704420562311538241254522851310001631113044143111000200010002924229242292232932529297
6300429262219000000002045582876700171104004100020041000100020001000500050002385602275129055292303104000100020002000200029211291811161001100010001000210000010002021301991086923319804520572309438151244462837010001610013110143191000200010002930629331292182924529256
6300429309227000000000045472877800171904004100020041000100020001000500050002385602268929099293673104000100020002000200029106290941161001100010001000210000010002021280593846857309104720524308938241046772852810001628013309143631000200010002929429327293182934929233
6300429247219000001000046692877500172194004100020041000100020001000500050002384802272229067291963104000100020002000200029103292281161001100010001000210000010002021317291856908299435020586308038221257462836910001632013282144341000200010002936329341292322931029265

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.4s, v1.4s }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051401981087100000032503176001402371396028390121501003000710002401003023010040124059553315491612034501415501421801419371313930431312918101830567101213023461180200803036614025014022731502011009910040100100001000001001000321100041398501000310100321011211113973050000151013100002000050100140058140058140058140061140036
7020414005410850100000001001140045139603549010650100300031000040100300001000012369905331699161153051140030140060140063130736031311578010030200100003000060200200003000014006014005711502011009910040100100001000001001000001100020011000011111321011211113973050000131313100002000050100140063140061140061140061140060
7020414006010850000011001400014004513960225901065010030006100004010030000100001236903533147116115305014003014006014006213073603131157804453020010000300006020020000300001400601400601150201100991004010010000100000100100000010001000100000110032101121111397245000013130100002000050100140061140061140061140061140055
702041400561085011011000100014004513960225901065010030006100004010030000100001236990533147116114560014003014006014006013073603131158801003020010000300006020020000300001400601400571150201100991004010010000100000100100022110000101100001011132101121111397305000001213100002000050100140061140062140061140061140055
702041400541085000000000100014004513960225901065010030006100004010030000100001236990533147116114560014003014006014004113071703131185801003020010000300006020020000300001400611400601150201100991004010010000100000100100000110001101100001110032101121111397245000013150100002000050100140055140061140042140042140042
7020414005510860000000002001140020139604259010350100300001000040100300001000012370445332385161153050140036140060140060130730031311578010030200100003000060200200003000014003514005111502011009910040100100001000001001000221100010001000010011321011211113970550000101013100002000050100140055140055140058140065140061
70204140051108600101000013001140045139596259010350100300031000040100300001000012370445331471161145600140036140035140054130736031311608010030200100003000060200200003000014005414005511502011009910040100100001000001001000201100000001000000100321011211113973050000131013100002000050100140055140055140055140061140055
7020414005410850000000001300114002013959625901035010030003100004010030000100001239824533169916114560014003614005514005413073603131163801003020010000300006020020000300001400601400571150201100991004010010000100000100100022110001101100001110032101121111397305000001013100002000050100140243140430140239140246140333
702041403771087000000023398176001400451397031129013050137300161000340527302361011612443345336145161243190140258140392140253130792029131329807143044410120303666069020244304871402241403352150201100991004010010000100000100100001110001003100001011132101121111397245000013120100002000050100140061140061140061140061140055
7020414006110860110000001000140046139602259010650100300031000040100300001000012369995331471161153050140011140041140060130736031311578010030200100003000060200200003000014006014005211502011009910040100100001000001001000001100020164351000111100321011211013972450000141313100002000050100140055140055140055140055140061

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0061

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f43494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514006110860101001100150000014004601139653259001650010300061000040010300001000012460155333251161182330014004314006114006113075831312178001030020100003000060020200003000014006214005311500211091040010100001000001010002121000300210001120110314003873313972850000066100002000050010140054140057140057140057140054
700241400561086010011110020000014004800139663259001650010300061000040010300001000012460065333251161163090014002514004914005613076031312158001030020100003000060020200003000014004914006111500211091040010100001000001010001221000200210000021110314003873313972950000066100002000050010140062140050140050140050140062
7002514006110860100000000201001140034001396612590016500103000610000400103000010000124589853332511611502000140032140061140056130748313121580010300201000030000600202000030000140056140049115002110910400101000010000110100031210003001100011201003140028743139733500009100100002000050010140062140057140050140050140050
7002414004910860101010000300000140042001396612590016500103000610000400103000010000124593453335201611541500140025140056140056130748313121580010300201000030000600202000030000140063140061115002110910400101000010000010100032110002002310000011110314004874313973350000069100002000050010140050140062140062140062140062
7002414006110850100001000140000014003400139649259001650010300061000040010300001000012459345333251161154150014002914004914005613076031312088030930020100003000060020200003000014006114006311500211091040010100001000001010002221000312510001111110314002872213973350000666100002000050010140062140050140050140062140062
7002414006110850101000000150000014004610139651259001950010300091000040010300001000012459885333707161150200014003714006114004913074831312208001030020100003000060020200003000014005614006111500211091040010100001000011010002101000201210001121100314002872213972850000006100002000050010140050140062140062140144140057
700241400491085010210010030010014004410139649259001650010300091000040010300001000012459615333290161150200014002514015414006113075531312208001030020100003000060020200003000014004914006111500211091040010100001000001010002101000201210000121100314002872213973350000906100002000050010140062140054140054140062140063
700241400591086010000100020000014003711139661259001650010300061000040010300001000012458985333517161154150014003214014914006113074831312158001030020100003000060020200003000014004914004911500211091040010100001000001010002221000602210001121130314002874213973350000066100002000050010140057140050140062140062140054
700241400491085010011001020000014004611139661539001950010300061000040010300001000012460065333290161154150014003714014414006113074831312208001030020100003000060020200003000014006114005311500211091040010100001000001010002111000101110000021100314002873213973450000069100002000050010140062140057140063140062140062
70024140056108601002100001348800001401331013963883900345002330014100014001030120100791269583538095716278069001401751403461402381308272913140780909305071008030362607502008030365140334140335315002110910400101000010000010100341010008039707100051211003233051043413988150000606100002000050010140062140062140063140050140062

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.4s, v1.4s }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0063

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005510860100010100014004813961353901035010030003100004010030000100001244902533149916116236140042140064140156130739031312078010030200100403000060200200003000014009914009311502011009901004010010000100000100100000110000010310000110321011211113973550000969100002000050100140065140064140064140064140064
70204140063108600100002500014005113956525901035010030003100004024130000100001244911533064716116236140039140063140063130739031311698010030200100003000060200200003000014017314006511502011009901004010010000100000100100030010000010310000110321011211113972550000999100002000050100140067140064140066140064140056
70204140066108600000004300014005113956525901035010030007100004010030000100001244911533064716116236140041140066140063130731031312078010030200100003000060200200003012314012114006611502011009901004010010000100000100100000110000000010000110321011061113973550000969100002000050100140064140064140064140064140067
70204140063108600000001300014004813956525901035010030003100014010030000100001244902533053016107932140039140063140063130739031312078010030200100003000060448200003000014015314006911502011009901004010010000100000100100000110000000010000110321011071113973550000999100002000050100140064140064140064140064140067
702041401121085000000013300014004813956525901035010030003100024010030000100001244902533053016116236140039140065140055130739031312098010030200100003000060200200803000014014614006511502011009901004010010000100000100100000110000000310000110321021211113973550000999100002000050100140064140065140064140064140072
7020414006310860000000101014004913959725901035010030006100004010030000100001237096533149916114980140039140063140063130754031312078010030200100003000060494200003000014013914007411502011009901004010010000100000100100000110000000010000110321011211113972550000969100002000050100140064140064140064140064140067
7020414006310860000000100014004813956525901205010030003100004010030000100001244902533053016116236140031140063140063130739731311698010030200100003000060200200003000014009914043411502011009901004010010000100000100100000110000000010000110321011211113972550000960100002000050100140064140065140056140064140056
702041400631085001014239617600140328139785829010350110300041000240243302381000012502245333267161224681401801403211403151307810451312628071330321101603035860690202443048214029114034241502011009901004010010000100000100100010010003200319410000110325611211113973650000900100002000050100140102140064140056140064140064
7020414006710860000000100014004813956525901035010030003100004010030000100001244902533053016116236140039140055140063130739031312078010030200100003000060200200003000014008514006311502011009901004010010000100000100100000110000020010000110321011211113973650000969100002000050100140056140067140056140064140067
7020414006610850000000000014004813956525901035010030003100004010030000100001237096533053016116236140031140063140071130739031312078010030200100003000060200200003000014013814007611502011009901004010010000100001100100000110000000310000010321011211113973650000969100002000050100140067140064140064140064140095

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0063

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140046108510001100140000140048013966325900165001030006100004001030000100001246024533378216116528014003701400631400631307303131223800103002010000300006002020000300001400631400601150021109104001010000100000101000311100011011000011110031405914413973250000101313100002000050010140045140064140064140064140064
700241400631085101000001000014004801396442590016500103000610000400103000010000124602453337821611652801400360140063140044130730313119980010300201000030000600202000030000140060140044115002110910400101000010000010100031110001001100001101003140591541397165000013100100002000050010140045140064140064140045140061
700241400441085111200002000014004901396632590016500103000610000400103000010000124602453337821611652801400390140044140063130762313121980010300201000030000600202000030000140047140060115002110910400101000010000110100041010002011100000111203140487441397375000001013100002000050010140045140045140045140064140061
70024140066108610010000140000140049013963025900165001030006100004001030000100001246789533366816182519014003601405121404551311613131199800103002010000306106050220000300001400631400441150021109104001010000100000101000111100020011000011010031404874413973550000131313100002000050010140064140065140064140064140061
7002414004510851011000011000014004801396442590013500103000610000400103000010000124602453343171611617701400390140063140063130762313122280010300201000030000600202000030000140064140061115002110910400101000010000010100011010001107100001111003140487551397355000013013100002000050010140064140064140045140065140045
70024140044108610010000200001400450139644259001650020300061000040010300001000012460245333782161161770140039014006014006313076831312228001030020100003012460020200003000014006314004411500211091040010100001000001010001111000102110000111100314058754139816500001300100002000050010140061140064140064140064140061
7002414005410861002010120000140048013966325900335001030003100004001030000100001246024533378216116294014002001400441401621307593131222800103014310000300006002020000300001400441400631150021109104001010000100001101000121100022111000011110031404875513973550000131313100002000050010140066140064140064140064140061
70024140063108510000000140000140048013966325900135001030006100004001030000100001246789533366816116177014003901400631400631307623131222800103002010000300006002020000300001400631400601150021109104001010000100000101000121100010111000011111031404914513973550010131313100002000050010140068140064140164140062140065
70024140063108510000000110000140146013962425900165001030006100004001030000100001246024533378216119423014003901400631400641307623131222800103002010040300006002020000301221401581400632150021109104001010000100000101000211100010011000011113031645107441399475002201013100002000050010140162140343140165140155140247
70024142167110310120143398176000140333013966325900135001030006100004001030000100001246024533172516116294014002301400451400601307623131224800103002010000300006002020000300001400441400631150021109104001010000100000101000121100020111000011111031404875513973650000131013100002000050010140064140045140064140045140061

Test 4: throughput

Count: 8

Code:

  ld2r { v0.4s, v1.4s }, [x6], x8
  ld2r { v0.4s, v1.4s }, [x6], x8
  ld2r { v0.4s, v1.4s }, [x6], x8
  ld2r { v0.4s, v1.4s }, [x6], x8
  ld2r { v0.4s, v1.4s }, [x6], x8
  ld2r { v0.4s, v1.4s }, [x6], x8
  ld2r { v0.4s, v1.4s }, [x6], x8
  ld2r { v0.4s, v1.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402058004162110110000020010180026166425320140801001600408000080100160000800004407946375836298262000800228004180041499240349999320642200800001600002001600001600008004180041118020110099100100800008000011008000001580013001488001461101825112051675800380800009680000160000801008004280042800428004280042
240204800416201001100001900018002606602532014080100160176800008010016000080000440821537583769826353080022800418004149924034999932010020080000160000200160000160000800418018111802011009910010080000800000100800000148001000171800006010005112051655800381800009680000160000801008004280042800428004280181
2402048004162010010110000101800261600253201008010016004080000801001600008000044081723758361982635508002280041800414992403249999320100200800001600002001600001600008004180041118020110099100100800008000001008000001480110001848000061018051120316548003808000010680000160000801008004280042800428004280042
240204800416201001001003100018002610602532010080100160040800008010016000080000440820337583609825411080022800418004149924034999932010020080133160000200160000160000800418004111802011009910010080000800000100800000148001310178800136091805113051655800381800009980000160000801008018480042800428004280042
24020480041624100101000190101800261000253201408019616004080000801001600008000044082153758375982635308002280041800414992403499993201002008000016000020016000016000080041800412180201100991001008000080000110080000008001410188001301101805112041655800380800006680000160000801008004280042800428004280042
2402048004162010010110019010180026066025320100801001600408000080100160000800004408194375837198263370800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000001480000001378001400141805112051635800380800006680000160000801008004280042800428004280042
2402048004162210010000031000180026066025320140801001600388000080100160000800004408199375835998263530800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000001880000001878000961141805112051655800381800009680000160000801008004280042800428004280042
240204800416211101000001200018002610644253201388010016003280000801001600008000044081943758361982541108002280041800414992403499993201002008000016026920016000016000080041800411180201100991001008000080000010080000008001400145800006091805112031665800380800006680000160000801008004280042800428004280042
2402048004162010010000031000180026166025320140801001602168000080100160000800004408212375836198254110800228004180041499240349999320100200800001600002001600001600008004180181118020110099100100800008000001008000001480013001568000001101805112041635800381800006980000160000801008004280042800428004280042
2402048060362010010000012001180026166025320140801001600008000080100160000800004408215375837698261810800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000001480010701558001001141805112051635800381800009680000160000801008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2400258004162010001003200080182166122532007280010160062800008001016000080000440769237583739826285080022800418004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000772380026012980018612623600502000115118003808000010980000160000800108004280042800428004280042
24002480041620110100032000800261661225320072800101600648000080010160000800004407692375837498272180800228004180041499470350022320010208013316000020160000160000800418004111800211091010800008000001080008723800271011908001861252370050200011531800381800009980000160000800108004280042800428004280042
240024800416201001000310008002616616253200728010216006480000800101600008000044076923758377982721608002280041800414994703500223200102080000160000201600001600008004180041218002110910108000080000010800067238002500268001961262360050200011711800380800009980000160000800108004280042800428004280042
2400248004162110000003200080026166110253200728001016006480000800101600008000044076923758374982721608002280041800414994703500213200102080000160000201600001600008004180041118002110910108000080000010800076238002500258001961252371050200011511800381800009980000160000800108004280042800428004280042
240024800416201100000320018002616612253200728001016006280000800101600008000044076923758377982706108002280044800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800067238003210298001861262360050200021511800381800009980000160000800108004280042800428004280042
2400248004162010000003100080168166157703208028019716024280090806801650588240644222023761944985374908013680179803334997520315010332054120802691602652016053816026580183803223180021109101080000800001108000713238002600268010761262363050390013711801440801969980000160000800108004281778800428004280182
24002480181621110001016388008016616612253200748010216006280000800101600008026344076923761830982617308002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800077238002601268001861262370050200021511800380800009980000160000800108004280042800428004280042
240024800416211101000320008002616612253200748001016006480000800101600008000044076863758373982721608002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800086238002600268001861262370050200011511800381800009980000160000800108004280042800428004280042
240024800416201000000420008002616610253200728001016006280000800101600008000044076923758377982721608002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800077238002611268002361252370050200011511800381800009980000160000800108004280042800428004280042
240024800416211100000310008002616612253200728001016006480000800101600008000044076763758377982721608002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800068238002501258001961262361150200011511800380800009980000160000800108004280042800428004280042