Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 2 regs, 4S)

Test 1: uops

Code:

  ld1 { v0.4s, v1.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
62005288282231241129011002010048012840100238243000100020001000200050001000071605728302286453103000200030002862028801116100110001000020033020030012200000242101320694676966321017682011431833815186664281291000156681304714390200010002865128820287422871028748
62004287652221240129000006000046682844400236763000100020001000200050001000061605828287288263103000200030002860128612116100110001000020033420040002200060242001338895276952313113782020931323818106568282811000156661294114458200010002869428788287942883528791
62004288042231240128100003000048042841500237223000100020001000200050001000051606828251288363103000200030002866128488116100110001000020033420030012200020402201331593446972318712682015832153817146061281741000153691269614155200010002868428834287482870528767
6200428744223029112610000400004866283820023641300010002000100020005000100004160512827128730310300020003000286352858811610011000100002003202003001220024026200132609588695131799752007731533816156360281721000154901314414240200010002874428826287002870128672
62004287592231310121000003000047862837800236193000100020001000200050001000031607028286287163103000200030002870428597116100110001000020034420020002200000202101322995046936314418642010132013815156867281471000152031310714228200010002885328689289292881028759
62004287802241231128100003000048012837400236753000100020001000200050001000031604328260287823103000200030002859628725116100110001000020032020030002200000202001313695466974313516672021531933813186066282181000154171288114140200010002884928762288592866828694
62004288662231281127000003010047822838700235893000100020001000200050001000071605428292287993103000200030002875328744116100110001000020023420020012200040242101317696056971316511682013131373818146759281241000153721303714530200010002880628742287842878728731
6200428676222120102210000107000047852841300237113000100020001000200050001000031608028211287953103000200030002860428644116100110001000020023420040012200040242101323595506910318613702010431753814166266282761000152821278414030200010002872128863287472882028762
62004287482221261133000002000048192843600236333000100020001000200050001000071607628260287613103000200030002853928658116100110001000020033020030012200000202001316896236974316513692015331253810156463282151000153191289914339200010002870228726286692868328762
62004288112231200127100008010049002850500236323000100020001000200050001000031607328268287333103000200030002863328632116100110001000020023420030012200000242101334096286931323312752019431983816146867281621000155681285514316200010002864828762287902881128740

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.4s, v1.4s }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0057

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
602051200659320000002010001200409876710974425801035010210001200004010010000200001385158257335323465654012003112005512005511214331125197010030200200001000060200300001000012005112003811502011009910040100100001000001002000222200020152000022000321011611119828500021414132000050100120036120056120056120138120036
6020412003593101000014000011200409890510974525801035010210001200004010010000200001385475057327623465626012003212014612005511214331125137010030200200001003260200300001000012005512005211502011009910040100100001000001002000402200000002000022000321011611119828500021413132000050100120056120056120056120036120056
60204120055931000000200001120037987691097442580103501021000420000401001000020000138576665733628346524201200311200551200551121443112565701003020020000100006020030000100001200551200551150201100991004010010000100000100200000220000000200002200032101161111982850013010142000050100120056120056120057120059120036
602041200559310000002000011200419876710974425801035010210001200004010010000200001385532957339163465626012003312005512005511214531125137010030295200001000060200300001000012005512005511502011009910040100100001000001002000002200001002001222000321011611119828500021414132000050100120056120056120056120056120056
602041200559310000002000011200409890510974425801005010010001200004010010000200001385171157337243465626012003112005512005511214331125137010030200200001000060200300001000012014612005211502011009910040100100001000001002000002200002002000022000321011611119829500021410162000050100120036120057120056120036120036
602041200579300000002000011200409876710974425801035010010001200004010010000200001385941457341563465798012008912005112005511214331125137010030200200001000060200300001000012005512005111502011009910040100100001000001002000002200001002000022000321011611119833500021010132000050100120036120038120036120059120056
602041200359310000002000001200439880810974425801035010210001200004010010000200001385782857347803465771012003112005512013811213931125137010030200200001003160200300001000012005512005311502011009910040100100001000001002000002200000002000022000323211611119911500021410132000050100120056120056120056120056120056
60204120035931000000200001120040987671097442580100501131000020000401001000020000138536395734012346565401200111200351200521121433112513701003020020000100006020030000100001200551201391150201100991004010010000100000100200000220000000200000200032101161111980850002141402000050100120036120056120036120058120052
602041200559310000002000011200409890510974525801005010210001200004010010031200001386231757337243465742012003112005512005611214331125137010030200200001000060200300001000012005512003511502011009910040100100001000001002000000200000002001022000321011611119828500021414132000050100120056120036120061120052120056
602041200519310000422660000112013598645109789488011850122100042000440219100312005013868980573681834689460120113120241120316112257151126267029930394201901006460778300931009412021412022941502011009910040100100001000001002000622200100222952000622200325123212120066500331410132000050100120210120234120330120151120248

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60025120051930000000000020001120037965611097282580013500121000120000400101000020000138487875733436346523401200111200521200511121583112530700103002020000100006002030000100001200511200491150021109104001010000100000102000002200000100200002020031400192420131198105000010692000050010120052120036120116120175120145
6002412005193100001100001320000120020965051097452580013500121000120000400101000020000138487875733436346523401200271201441200511121643112530700103002020000100316002030000100311200511200471150021109104001010000100000102000002200000003200002020031400251719241199065000210092000050010120048120134120105120087120036
60024120051930000000001020001120036965301097452580013500121000120000400101000020000138487875733436346697901200271200511200511121623112526700103011620000100006002030000100331200351200511150021109104001010000100000102000002200000000200002020031610231723231219725000010092000050010120052120055120124120165120052
6002412005193100000000010000012003696504109741258001350012100012000040130100002000013848787573266634652340120027120051120051112162171125307021430020200001000060020300001000012005212003511500211091040010100001000001020000022000000002000020200314002126191911981050002121092000050010120149120048120111120055120129
6002412005193100000000002880011200369651910974548800135002310001200004001010000200001384878757334363465234012006612005112005111215831125307001030020200001000060020300001000012023312004711500211091040010100001000001020000022000001002000000200314002117221511982650002101092000050010120052120228120052120127120052
60024120051931000000001060001120020965051097402580010500121000120004400101000020000138469115736386346535001200341200471200361121623112530700103002020000100006002030000100001200511201361150021109104001010000100000102000002200000002250200002020031400211724201198995000210692000050010120052120036120104120093120052
60024120035930000000000020001120036964761097303680031500121000120000400101000020000138483235733772346537701200271200361200521121623112514700103002020000100006002030000100001200511200472515002110910400101000010000010200000220000002620000202003140024172122119826500021010112000050010120052120052120116120085120141
6002412003593000000000002880011200369655810974146800105001210000200004001010000200001384691157334843464880012001112005112005111214631125307001030020200001000060020300001000012013712004711500211091040010100001000001020000022000001032000020000314002317202011982650002101052000050010120036120142120113120078120052
60024120035930010000001020002120036965311097412580010500101000120000400101000020000138545545733436346523401200271200511200511122053112530700103002020000100006002030000100001200351200471150021109104001010000100000102000002200040000200002020031400221721201198265000210692000050010120052120052120120120084120142
600241200519310000000000688002120042964851097484880016500141000220000400101000020000138494835733724346558201200331200581200571122093112536700103002020000100006002030096100001201461200571150021109104001010000100000102000320200030005200002222031400131724211198325000401002000050010120143120058120134120426120042

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.4s, v1.4s }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0055

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
602051200579310011002000120036989051097242580103501021000120000401001000020000138485835733628346722011200291200551200351121393112515701003029620000100006020030000100001200551200511150201100991004010010000100000100200000220000003200000200323011611119828500021010132000050100120097120056120056120056120056
60204120151930000000200112004298763109744258010350102100002000040223100002000013851155573343634656541120031120057120055112143311249370100302002000010000602003000010000120035120035115020110099100401001000010000010020000022000000020000220032101161111982450012014132000050100120052120036120056120052120036
6020412005593000000014001120040987671097972580103501021000120000401001000020000138509235733628346562611200271200551200511121803112513701003020020000100006020030093100001200551200511150201100991004010010000100000100200040220000003200002200321011611119828500021614132000050100120052120052120056120056120052
60204120035930000000200112004198767109744258010350102100012000040220100002000013850574573362834651561120027120055120055112123311257070100302002000010000602003000010000120058120051115020110099100401001000010000010020000022000010320000220032101161111982450002111002000050100120052120052120056120060120036
60204120035930010000140011200409876710974025801035010210001200004010010000200001385092357336283467259112003112005312005511214331125137010030294200001000060200300001000012005512005111502011009910040100100001000011002000002200000002000022003210116121198085000214002000050100120059120059120056120056120056
6020412227793300110020011200409876710974425801035010210001200004022310000200001384858357326663465655112003112005112005111214431125137010030200200001000060200300001000012005512005111502011009910040100100001000001002000002200000002000022003210116111198285001300132000050100120056120036120056120052120056
602041200579300000002001120040987681097442580103501131000120000401001000020000138485835733436346562611200271200571200551121433112513701003020020000100006020030000100001200351200552150201100991004010010000100000100200000220000200200002200321011611119828500021414132000050100120057120056120036120057120056
602041200559300000001200112003698905109724258010050100100012000040100100002000013856835573266634651561120011120055120056112143311251370100302962000010000602003000010000120055120054115020110099100401001000010000010020006022000000020000220032102161111980850002010132000050100120056120056120056120057120056
6020412005293100000014881012002098767109740258012150102100012000040100100002000013851483573367634656261120031120139120216112139311251570100302002000010000602003000010000120136120056115020110099100401001000010000010020000022000010020000220032102161111982850000140132000050100120052120148120056120056120056
602041200359301000100011120240986711098324580179501131001320004405831003020102138639975737984347078011201781201451206991131731411269370509302982025410031609683009310127120186120258315020110099100401001000010000010020008222000470446820006220032532331111997050036101092000050100120142120409120227120329120211

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
600251200489300010001401001200329647610973625800135001210001200004001010000200001384832357332443465234120023120049120035112158311253270010300202006610000600203000010000120051120047115002110910400101000010000010200000220000003200002203140051744119822500006992000050010120048120048120053120048120050
60024120047930000100200001200369647610973625800135001210001200004001010000200001384866857332443465234120023120048120053112162311252670010300202000010000600203000010000120047120047115002110910400101000010000010200000220000000200000203140041744119822500026652000050010120048120048120052120052120052
600241200359310000002000012003496483109736258001350012100012000040010100002000013848323573343634652341200231200511200511121583112526700103002020000100006002030000100001200511200471150021109104001010000100000102000002200001002000002031400517441198265000266102000050010120053120048120036120052120048
600241200509310000002000012003296477109740258001350023100012000040010100002000013849761573324434652341200231200471200471121581011252170010300202000010000600203000010000120047120047115002110910400101000010000010200000220000100200002203140051744119822500026692000050010120042120052120048120052120036
60024120051931000000134000012003296349109736468001350012100012000040010100002000013848323573324434652341200231200471200471121593112526700103002020000100006002030000100001200471201371150021109104001010000100000102000002200000032000022031400417441198105000261052000050010120048120051120048120150120048
600241200479310000002000012002096326109724258001350012100012000040010100002000013849577573324434652921200251200471201341121583112526700103002020000100006007630000100001200471200471150021109104001010000100000102000022200000032000022031400317431198105000060102000050010120048120048120048120052120052
6002412005193100000012000012003296476109737258001350012100012000040010100002000013854355573324434652341200941200511200351121593112526700103002020000100006002030000100001200471200351150021109104001010000100000102000002200000002007822031400317451198225000210652000050010120048120052120048120052120048
60024120047931001000200001200369651210973625800135001210001200004001010000200001384832357357143465234120023120048120047112158311252670010301142000010000600203000010000120035120047115002110910400101000010000010200000220000000200002203140041744119823500026602000050010120048120048120048120048120048
600241201409300010002000012003696476109736258001350012100012000040129100002000013848323573353234653771200231200511200351121583112526700103002020000100006002030000100001200351200351150021109104001010000100000102000002200000032000022031400417441198265000010692000050010120048120048120052120048120048
6002412005193100000012500001201229647610972525800135001210001200004001010000200001384691157332443465234120023120035120047112158311252670010300202000010000600203000010000120125120047115002110910400101000010000010200000220000003200000203140052444119822500026652000050010120048120049120125120052120048

Test 4: throughput

Count: 8

Code:

  ld1 { v0.4s, v1.4s }, [x6], x8
  ld1 { v0.4s, v1.4s }, [x6], x8
  ld1 { v0.4s, v1.4s }, [x6], x8
  ld1 { v0.4s, v1.4s }, [x6], x8
  ld1 { v0.4s, v1.4s }, [x6], x8
  ld1 { v0.4s, v1.4s }, [x6], x8
  ld1 { v0.4s, v1.4s }, [x6], x8
  ld1 { v0.4s, v1.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16020580041620000000003000008002521212542524010080100160000801001600002006800367595408001580040800405995335999824010020016000020024000080040800401180201100990100100800008000001001600000421600230032160024612427005110216228003718000066160000801008004180041800418004280041
1602048004062000001100290000800262121296252401008010016000080100160000203346336693330800158004080040599533599982401002001600002002400008004080040118020110099010010080000800000100160000027160024000160032612400051103162380037180000106160000801008004280041800418004180041
16020480040621000010002900008002521212145252401008010016000080100160000204011036698920800158004080040599533599982401002001600002002400008004080040118020110099010010080000800000100160000027160023003216002661232700511021633800370800001010160000801008004180041800418004180041
16020480040620000000003000008002521212148252401008010016000080100160000201948236726520800158004180043599543599982401002001600002002400008004080042118020110099010010080000800000100160000042160031003616003261313700511021643800371800001410160000801008004180041800448004480043
16020480043620101110007000028002936612225240100801001600008010016000020129233666126080017800428004259955360001240100200160000200240000800428004311802011009901001008000080000010016001312431600510052160039615143131511031632800390800001313160000801008004680043800448004580043
16020480042620110100005800028002726614025240100801001600008010016000019863933666133080018800408004059956360004240100200160000200240000800428004211802011009901001008000080000010016001213421600520051160038615243120511031622800390800001314160000801008004580045800448004380043
16020480042620110000005810800280027366432524010080100160000801001600001996353366618008001780042800435995636000124010020016000020024000080042800421180201100990100100800008000001001600141243160051015216003961520130511021633800400800001313160000801008004480043800458004380044
1602048004362011010000870002800282662925240100801001600008010016000020129743670735080017800438004259956360000240100200160000200240000800438004611802011009901001008000080000010016001213431600520052160039615243120511021632800390800001313160000801008004380044800448004580043
1602048004562011110000790012800282669925240100801001600008010016000020129233662807080017800438004759955360001240100200160000200240000800428004211802011009901001008000080000010016001414421600521051160039615242131511021623800400800001313160000801008004380042800448004580045
1602048004362011100000580002800323663125240100801001600008010016000019968923656146080017800428004259955360000240100200160000200240000800428004311802011009901001008000080000010016001413431600520155160039615243131511021622800390800001313160000801008004480043800438004380044

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f3a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1600258015762100000042028002520122025240010800101600008001016000020245703669355108001508004180040599763600202400102016000020240000800408004011800211091010800008000001016000035016003810351630586132400050200001016108800370800001410160000800108004180041801598004180041
16002480040620000100440280025201216252400108001016000080010160000203245136693550080015080040800415997536002024001020160000202400008004080040118002110910108000080000010160000350160036003516003661363500502000081699800370800001410160000800108004180041800418004180041
16002480040621000000420280025012121625240010800101600008001016000020245703656164008001508012080040599753600202400102016000020240000800408004011800211091010800008000001016000040016003600321600366131400050200001016812800401800001410160000800108004180043800418004180041
1600248004062000002242008002721212192524001080072160000800101600002001884367268500800150800408004059975360020240010201600002024000080155800401180021109101080000800000101600003501600380034160036613240005020040101689800370800001414160000800108004180041800418004280041
1600248004062000000042008002521212162524001080010160000800101600002024570368098200800150800408004059976360020240010201600002024000080041800401180021109101080000800000101600003501600360032160036613200050200001016812800370800001414160000800108004180041800418004180041
1600248004062000000042028002521212172524001080010160000800101600002031163366935500800150800408004059976360020240261201600002024000080040800401180021109101080000800000101600003501600380039160036610400050200001016109800370800001414160000800108004380041800418004180041
1600248004062100000042028002521212162524001080010160000800101600002039153367266200800150801578004059975360020240010201600002024000080040800402180021109101080000800000101600003501600360001600320136400050200009161011800370800001414160000800108004180041800418004180041
1600248004062100000042028002521212162524001080010160000800101600002024570366935500800150800408004059975360020240010201600002024000080040800401180021109101080000800000101600003501600361036160000613140005020000915129800370800001410160000800108004180041800418004180041
1600248015762000000038008002521212112524001080010160000800101600002039665367266200800150800408004059975360022240258201600002024000080040800401180021109101080000800000101600003501600360036160036613840005020540925109800370800001410160000800108004180042800418004180041
1600248004062000000017602800252121216252400108001016000080010160000201122036593501080016080040800405997636002024001020160000202400008004080040118002110910108000080000010160000350160036008511600386132350050200008161010800370800001410160000800108004280041800418004180041