Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (post-index, 8B)

Test 1: uops

Code:

  ld2r { v0.8b, v1.8b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.004

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f191e22233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
63005293672283340003004682287890017200400610002000100010002000100050005000238760227210290962922931040001000200020002000290872876411610011000100001000210007410003030131219451696831190392064233263815631402838910001606213045143831000200010002933829178292142929329360
63004292242270100003004666287680017124400610002006100010002000100050005000238780227510290392922231040001000200020002000291422921211610011000100001000310001911000313174131319339686731481372072632813820935422843310001609913347142981000200010002932029155291992927529549
6300428664222123000000479928438001678640061000200810001000200010005000500223852022626028569291123104000100020002000200028621283731161001100010000100021000082100120001377910096721233860391950733073809632312788710001445512116129621000200010002836728352283672808428250
630042832321001000040050142805310161404006100020061000100020001000500050012386922227360282222836531040001000200020002000282462838411610011000100001000010006110002130135579993714333150341974132933811835382798710001409012209128891000200010002841728361285002840928281
6300428386212011000400513227976001613940061000200610001000200010005000500223854522774028299283973104000100020002000200028287282621161001100010000100021000071000212013974100837125335503419593323338061134352787610001414912040132191000200010002821627740283832846128555
630042816921100100020051612806200162854004100020041000100020001000500050002386615227910283072831731040001000200020002000282722834511610011000100001000210000410002020135789906707433610311944233223814537362785810001410611881131581000200010002834928452282622846828483
630042858421201000030050782796211161514004100020001000100020001000500050002384602270102801228324310400010002000200020002795528041116100110001000010002100004100000001401310448725634350381957634343820728352795910001484511981132481000200010002835928502282902817828386
63004284112110110000005068280700016330400010022004100010002000100050005005238588227650284362831631040001000200020002000283252832911610011000100001000210001910011104501323795597019317203320046320838161234412825610001565812698138081000200010002880328733290782882428265
63004283412200210002700486528039001627140041000200610001000200010005000500023864522809028095282773104000100020002000200028269283901161001100010000100021001031000000013790103087175334603419693323638131137402785410001451511869130001000200010002818828339284302815028284
63004282322140100000004997279660016154400410002004100010002000100050005000238641022738028127284343104000100020002000200028275282511161001100010000100021000031000202013844103197231336714019765333538081035352794810001464311835126681000200010002830928199281972830828279

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.8b, v1.8b }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051405421090120010043140000014196413967879901235010030010100014025930000100791237044533135016119601214024714035714025513079957131304810023044210122301246117420164303631403401402474150201100991004010010000100000100100081110031550706561000211113000328211112113988350030131015100002000050100140061140061140061140066140110
70204140057108510001000011000011400451396022590106501003000610000401003000010000123704453317381611530511400391400601400601307173131165801003020010000300006020020000300001400601400571150201100991004010010000100000100100011110001137141000001110000321011211113973050000131713100002000050100140061140061140061140061140097
702041400601086110001100100000140045139603259010350100300031000040100300001000012370445331699161157261140036140060140060130717313116380100302001000030000602002000030000140060140060115020110099100401001000010000010010001111000283211000001110000321011211113973050000101313100002000050100140042140061140061140061140104
702041400571086110010000200000140045139602259010650100300061000040100300001000012370445331699161153050140036140060140060130736313116380100302001000030000602002000030000140060140041115020110099100401001000010000010010001111000199341000011111000321011211113973050000131313100002000050100140042140061140061140062140073
7020414004110861100110002000001400451395992590106501003000610000401003000010000123707153316991611530501400361400571400601307363131185801003020010047300006020020000300001400601400601150201100991004010010000100000100100011110002133111000011110000321011211113973050000101313100002000050100140058140042140061140042140110
702041400411085100010000200000140046139599259010650100300061000040100300001000012370575331585161153050140036140060140062130736313116080100302001000030000602002000030000140060140061115020110099100401001000010000010010002111000139041000011110000321011211113973050000131313100002000050100140061140058140058140042140079
70204140041108610001000020010014004513960225901065010030003100004010030000100001237044533169916115726014001714006014006013073331311638010030200100003000060200200003000014006014006011502011009910040100100001000001001000111100017011000011110000321011211113973050000131013100002000050100140042140061140061140042140106
70204140060108611001010020000014004513960225901065010030006100004010030000100001237044533238516115305014003614006014005713073631311638010030200100003000060200200003000014006014006011502011009910040100100001000001001000121100025611100001111000032101121111397325000013130100002000050100140064140043140061140042140082
7020414006010861100000002000011402801397315490138501493001410003403953024110079125258753327641612056701416741422991420981312733013124180996305691012230368609322016230364140236140332315020110099100401001000010000010010004411000670126361000311110000325611211113973050000131313100002000050100140063140061140062140042140073
70204140060108611001000010000014004713961425901065010030006100004010030239100781240757533478116107675014003614006014006013073631311638010030200100003000060200200003000014005714006011502011009910040100100001000001001000131100021291410000111110003210180111398135002813100100002000050100140061140061140061140042140129

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0048

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140053108600101000130000014003213964725900135001030003100004001030468100001245910533321316114779140011140174140394130746313120680010300201000030000600202000030000140047140048115002110910400101000010000010100000010000013610000110314003874413971950000606100002000050010140048140048140036140036140048
70024140050108600000100100000140032139647259001350010300031000040010300001000012458795333173161147791400231400471400471307463131206800103002010000300006002020000300001400471400471150021109104001010000100001101000001100000044410000100314004873313971950000600100002000050010140048140052140036140036140048
700241400611085000000001880000140032139647259001350010300001000040010300001000012458795333173161133461400111400471400471307463131206800103002010000300006002020000300001400471400471150021109104001010000100001101000001100000014810000110314003874313971950000667100002000050010140048140048140036140048140048
70024140049108600001100100000140032139647259001350031300031000040010300001016012458795333173161148951400231400471400471307343131206800103002010000300006002020000300001400351400351150021109104001010000100001101000001100000023110000110314003873313971950000666100002000050010140048140048140048140048140048
7002414009010860000000010000014003213963525900135001030003100004001030000100001245847533317316114779140023140036140047130746313120680010300201000030000600202000030000140047140035115002110910400101000010000110100000110000012710000110314004943313972250000666100002000050010140048140036140048140048140048
7002414008510860000000060100014003513964725900105001030003100004015130472100001245879533278416114779140023140144140035130746313120680010300201000030000600202008030000140035140047115002110910400101000010000110100000110000013221100001103140031024413972550000666100002000050010140050140054140051140048140048
7002414018010850000010013300100140032139647259001350010300031000040010301181000012458795333173161147791400261400481400471307463131206800103016410000300006002020000300001401231400471150021109104001010000100001101000000100000017510000110314003874313971950000676100002000050010140048140049140048140048140048
700241400791085000000001880100140033139647259001350010300031000040010301171000012458885333173161133461400231400531400501307343131206800103002010000300006002020000300001400501401381150021109104001010000100000101000001100000016810000010314004874413970750000060100002000050010140036140048140048140036140048
700241400941086000000001880100140032139729259001350010300031000040010300001004012458885333173161148991400261400491400351307463131206800103002010000300006002020000300001400351401261150021109104001010000100000101000201100030037021000211031630387131414012950030066100002000050010140225140238140335140229140297
70024140211108800000032330124640001140317139733809004550040300121000240436303551011812490655338535161199161400111400351400351307463131194800103002010000300006002020000300001400471400471150021109104001010000100000101000001100000028210000100314004873413971950000660100002000050010140048140048140036140036140038

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.8b, v1.8b }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0063

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f434d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch ret (8f)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051400591086000000001300014006001396172590103501003000010000401003000010000123717953323761611684111400510140075140076130751313116980100302001000030000602002000030000140075140065115020110009910040100100001000001001000000100000103100001010000321011211113973550000131013100002000050100140076140076140076140076140076
702041400751086000000001300014006001396172590103501003000310000401003000010000123708353322621611612301400510140075140076130751313115880100303451000030000602002000030000140055140065115020110009910040100100001000001001000001100000000100001010000321011211113972550000131613100002000050100140066140076140079140077140076
70204140075108600000001000014006001396162590120501003000310000401003000010000123717953322621611612301400500140074140167130751313117880100303251000030000602002000030000140075140065115020110009910040100100001000001001000001100010000100001010000321011211113974550000131013100002000050100140076140076140078140076140076
70204140076108500100001401014006001396192590103501003000310000401003000010000123717953322621611612301400510140078140075130751313117880100302001000030000602002000030000140075140065115020110009910040100100001000001001000001100000103100001010000321011211213974550000131013100002000050100140076140076140066140066140076
70204140078108500000000100014006001396172590103501003000310000401003000010000123717953314991611612301400510140075140075130751313117880100302001000030000602002000030000140075140065115020110009910040100100001000001001000001100000100100001002000321011211113974550000131313100002000050100140076140076140076140066140066
70204140075108600100010900014006001396172590106501003000310000401003000010000123717953322621611612301400310140075140075130751313115880100302001000030000602002000030000140055140075115020110009910040100100001000011001000001100000203100001010000323311211113974550000131313100002000050100140076140079140076140158140066
70204140075108600000000101014014601396742590103501003000310000401003000010000123717953314991612201901400720140084140075130751313117880100302001000030000602002000030000140055140055115020110009910040100100001000001001000021100000103100001010000321011211113974550000131010100002000050100140056140076140076140076140056
7020414007510860000000040880014004001396172590103501003000310000401003000010039123717953322621611612301400510140075140075130753313118080100302001000030000602002000030121140075140065115020110009910040100100001000001001000001100000003195100001010000321011211113974550000101013100002000050100140076140076140076140076140076
702041404051087000100202640001402200139708108901345014130011100034038230235101171247582533237316126224014022601402461402281307744213129180413305651012230245609422016430364140357140334315020110009910040100100001000001001000221100052109640100031002000328111211113974550000131315100002000050100140076140076140056140076140076
702041400751085000000001300014006001395972590103501003000310000401003000010000123717953314991611612301400510140055140075130732313117880100302001000030000602002000030000140055140075115020110009910040100100001000001001000001100000000100000010000321011211113974450000131313100002000050100140076140076140076140076140076

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0058

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d2d5map dispatch bubble (d6)d9ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514005310850000000010014002113965825900135001030003100004001030000100001245979533366716116060140034140058140059130757031312178001030020100003000060020200003000014006014005411500211091040010100001000001010000011000023225100001100031400038704813973005000001113100002000050010140150140059140059140037140055
7002414005810850000000010014004313965825900135001030003100004001030000100001245979533359116115517140034140058140058130757031312178001030020100003000060020200003000014003914005411500211091040010100001000001010000011000013100001100031400048707413973005000013010100002000050010140059140059140059140059140059
70024140058108500000000130014002113965825900135001030000100004001030000100001245979533359216115517140034140058140058130757031312178001030020100003013960020200003000014005814005811500211091040010100001000001010000011000010100001100031400058706713973005000013130100002000050010140059140055140059140059140059
7002414005810860000000013001400431396584590013500103000310000400103000010000124597953337091611551714003414005914006313075703131217800103002010000300006002020000300001400581400581150021109104001010000100000101000000100001310000010003140006102077139726050000131313100002000050010140059140059140059140059140059
700241400581085000000001600140044139658259001350010300031000040010300001000012459795333591161155171400361400581400581307570313121780010300201000030000600202000030000140060140058115002110910400101000010000010100000110000001000010000314000387054139730050000131313100002000050010140064140109140037140257140062
70024140058108500000000900140021139658259001350010300901003740010300001000012459795333592161155171400351400581400581307570313121780010300201000030000600202000030122140058140058115002110910400101000010000010100000110000031000011000314000487044139850050000131313100002000050010140038140059140055140059140059
700241400581086000000001600140043139658259001350010300031000040010301181000012459795333591161155171400341400581400361307350313127180010300201000030000600202000030000140061140054115002110910400101000010000110100000110000031000011000314000587077139730050000131413100002000050010140061140059140059140059140059
70024140057112400000000250014003913965825900135001030003100014001030000100001245988533359116115517140034140107140060130757031312178001030020100003000060262200003000014005814005811500211091040010100001000001010000011000020100001100031400078707513973005000013100100002000050010140059140059140059140059140060
7002414005810860000100013300140043139636259001350010300031000040151300001000012459795333591161155171400401400581400581307570313119580010300201000030000600202000030120140058140036115002110910400101000010000010100000110000032651000011000316300412008813979205002001013100002000050010140060140248140059140158140154
7002414015510870001003226526401402291408438090044500503000810004404333042210040125537753390341612545414018814025414044613082004313128480908302641016330240602602016230479140159140354415002110910400101000010000010100000110000101000001000314000587067139730050000101013100002000050010140059140059140060140059140059

Test 4: throughput

Count: 8

Code:

  ld2r { v0.8b, v1.8b }, [x6], x8
  ld2r { v0.8b, v1.8b }, [x6], x8
  ld2r { v0.8b, v1.8b }, [x6], x8
  ld2r { v0.8b, v1.8b }, [x6], x8
  ld2r { v0.8b, v1.8b }, [x6], x8
  ld2r { v0.8b, v1.8b }, [x6], x8
  ld2r { v0.8b, v1.8b }, [x6], x8
  ld2r { v0.8b, v1.8b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch call indir mispred nonspec (ca)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24020580041621000000000150000800261660425320140801001600308000080100160000800004408215375837698260750800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000001480014000528001460918000512811611800381800009680000160000801008004280042800428004280042
2402048004162100000000020000080026166002532014080100160032800008010016000080000440821537583759826337080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800000148001301016800106100000511011611800381800009680000160000801008004280042800428004280042
2402048004162000000000015200008002616600253201408010016004080000801001600008000044082153758372982635508002280041800414992403499993201002008000016054220016026616000080321800411180201100991001008000080000010080177014801010001186801896010189005146134118025208162081380000160000801008032480182803228018280465
240204801806230000000002000008002610604468320142801001619908000080100160536806674408215376186898351511802338004180181499510325007932063120080136160000200160000160000803238004111802011009910010080000800000100800902188001410011998000960018200511011611800381800000680000160000801008004280042800428004280042
240204800416200000000002000008002616600253201408012516000080000801001600008000044082113758376982596718002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080000014800130001680013611018000511011611800380800000980000160000801008004280042800428004280042
2402048004162000000000026000080026166032532049380100160040800008010016000080000440821537583769826353180022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800001100800000148001401039800136110180005110116418003818000012680000160000801008004280042800428004280042
24020480041621000000000290000800261660025320140801001600008000080100160000800004408215375837698263531800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000001480000000178001360918000511011611800380800009980000160000801008004280042800428004280042
24020480041620000000000190000800261060025320140801001600388000080100160000800004408215375837598259691800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000011008000000800130001680013610180005110116118003818000010980000160000801008004280042800428004280042
24020480041620001001000200000800261600325320138801001600408000080100160000800004408215375837298263371800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000001480013000168001461918000511011611800380800009980000160000801008004280042800428004280042
24020480041621000000000000008002616600253201408010016003080000801001600008000044082153758376982596518002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080000018800140960148001461018000511011611800381800009980000160000801008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240025800416211100000044000008002616615253200728001016006280000800101600008000044076923758374982523508002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800089238002500128800196125237105020341665800380800009980000160000800108004280042802018035680042
2400248004162011000000430000080026000132532002480010160064800008001016000080000440769237583739825235080022800418004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000660800270016800190125237005020931564800380800000980000160000800108004280042800428004280042
2400248004162111000000600000800261601225320074800101600628000080010160000800004407692375837398272140800228004180041499470350022320010208000016000020160000160000800418004111800211091010800008000001080008723800270012880019612607005020661553800381800009980000160000800108004280042800428004280042
2400248004162010000000600000800261661125320072800101600628000080010160000800004407692375837498272140800228004180041499470350022320010208000016000020160000160000800418004111800211091010800008000001080006723800070002480019616237105020631547800381800009080000160000800108004280042800428004280181
24002480041620110100006000008002616616253200748001016006480000800101600008000044076963758373982721608002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800078238002600025800196125236205038361546800381800009080000160000800108004280042800428004280042
240024800416221101000031010008002616612253200748010216006480000800101600008000044076763758373982611808002280181800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800071008002600026800186126237105020661645800380800009980000160000800108004280042800428004280042
240024800416201000000033000008016716612253200748001016001480000800101602658000044076863758373982616708012880041800414994703500223200102080000160000201600001600008018180041118002110910108000080000010800077278002900133800226170700502205164480038180000131380000160000800108004280042800428004280042
2400248004162011010000490000080026060432532008280010160068800008001016000080000440751937583649826666080022800418004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000772780030001780022613027710502006168680038080000131380000160000800108004280042800428004280042
2400248004162011000000360000080026166072532007880010160072800008001016000080000440761737583659826287080022800418004149947035002232001020801361600002016000016000080041800411180021109101080000800001108000882780029001308002261292771050240516648003808000013080000160000800108004280042800428004280042
240024800416201001000048000008002616643253200788001016007280481834971672138347044585563773785982522508002280041800414994722350021320010208000016000020160000160000800418004111800211091010800008000001080007727800310003380022613027710502006165580038080000131380000160000800108004280042800428004280042