Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (single, post-index, B)

Test 1: uops

Code:

  ld2 { v0.b, v1.b }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.006

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch ret indir mispred nonspec (c8)c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6300529608230122002311100501045652876501117190400610002006100010002000100050005000239232300226752913629328310400010002000200040002921229220116100110001000010022210020221001213110013052944368143128115320682315537952552542851810001605913235145251000200010002940929408292712936329314
6300429514229126002111000100046872894100017409400210002002100010002000100050005002239405002266729213294613104008100020002000400029355292091161001100010000100224100121185100032416001299493246894305184620805323637992344482863710001621913145144121000200010002933329420295992932729477
630042958922812510170000040004548288110011726140061000200610001000200010005000500123859200022699290422947531040001000200020004000292722922011610011000100001001141002001100101311001305193406877314285320962341238092547462870910001607113159143611000200010002986429615296332957529612
6300429702239124001910000300045992910700017326400010002006100010002000100050005000239186002274129179294153104000100020002000400029206292511161001100010001100124100103881001324110013061932169083105114720760324337972451422848610001620413368144961000200010002941429313293372930629497
63004295332281161010000003000465128886000174114006100020061000100020001000500050002379550522716291052942331040001000200020004000292622926811610011000100001002041002022100120010001294192406833305664720803323038092550422830910001628613261143771000200010002950729474293832948829459
63004294312281190021011003000462228894000172194002100020061000100020001000500050002397200022768293172942531040001000200020004000292312925011610011000100001002241001001100032411001312792646979313285320657320837952948482854910001602413036146051000200010002936329373294632923529369
6300429446228120102301100300046752877000117289400610002006100010002000100050005000238956002275029098294193104000100020002000400029296291481161001100010000100104100300257100021011001313092426879309974420684324138022849532850010051742714035144021000200010002947929384293722933429455
630042942322811610200000050004608288120001732640001000200610001000200010005000500123902110022752291882941031040001000200020004000292262917911610011000100001002101001101100001211001307293066912309494820671322638091948542857710001639513352145621000200010002948829352293692939129349
630042934122812700240000050004575289451101742840061000200210001000200010005000500323864000226902921829458310400010002000200040002934829254116100110001000010010410031225710010101008512774930668853141104420735325738041847452859210001651413621147131000200010002943729333293072937429460
63004295112321220019000001590004552288230111720540021000200610001000200010005000500023897500227462915029273310400010002000200040002924329272116100110001000010022410001101000310110013136924768703056114020736324038072748492852110001634313441146141000200010002934929337294702944629368

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.b, v1.b }[1], [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f4c4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005310861001010000470011400380139598259010650100300061000040100300001000012370085331584161148120140029140053140056130807313115980100302001000030000602002009850000140041140041115020110099010040100100001000010010001111000110110000111110321011211113971750000690100002000050100140057140054140042140051140042
70204140035108510000000001100014004101395982590103501003000610000401003000010000123690353300421611572601400261400561400561307273131159801003020010000300006020020000500001400561400413615020110099010040100100001000010010001111000100110001111110321011211113972350009999100002000050100140057140045140152140042140042
7020414005610851011000000530001401374139649259010650108300061000040100300001000012370085330042161141091140029140117140058130736313116280400302001000030000602002000050000140047140047115020110099010040100100001000010010001111000201221000111110032101801113971350000960100002000050100140057140057140057140144140057
7020414005610861011000100100014013001397502590103501003000310000401003000010000123700853315471610682201400321400531400561307423131159803993020010000300006020020000500001400561400471150201100990100401001000010000100100021110001001610000111100321011211113972650000906100002000050100140054140057140057140157140054
702041400561085001101000020001401240139612259010350100300031000040100300001000012370085331625161204410140032140050140056130751313115980100302001000030000602002000050000140053140053115020110099410040100100001000010010002211000201110000111120323511211113972950000669100002000050100140057140057140057140057140054
7020414004111251010010010100014004201395522590106501003000610000401003023410000123700853338931611572601400321401541400501308333131138801003020010000300006020020000500001400561400531150201100990100401001000010000100100022110001001100001111003236112114139723500000010100002000050100140057140057140059140057140042
702041400531086101200100020001400390139615259010650100300061000040100300001000012370085331667161148120140032140112140074130757313115980100302001000030000602002000050000140056140056115020110099010040100100001000010010002201000210110000011100321011211313972550000966100002000050100140057140052140051140149140054
70204140053108510000000003980001400360139598259010650100300061000040100300001000012398245331547161196840140032140056140056130718313115980100302001000030000602002000050000140056140053115020110099010040100100001000010010001111000400110000111101321001211113970550010969100002000050100140409140152140143140048140057
70204140145108711100200223988800140331013974982901375014830011100044052530357100801252423533668116119974014021914015614034113083836413256788517304451004030358606962016250402140323140243415020110099010040100100001000010010005301000700644110003111000321011211113972650000969100002000050100140057140057140054140042140042
702041400571086101001000020001400410139595259010650100300061000040100300001000012370085331547161157260140032140056140056130746313115980100302001000030000602002000050000140050140053115020110099010040100100001000010010001111000110110000111000321011211113971350000999100002000050100140058140042140042140057140057

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140047112400100000016000014003513965025900135001030003100024001030000100001245916533317316114779101400261401341400821307463131209800103002010000300006002020000500001400501400471150021109104001010000100001101000001100000001000011031403873213972250021969100002000050010140036140048140051140036140039
700241400501125000000000130100140036139650259001350010300031000040010300001000012459075332706161143990014002314010714008913074931312098001030020100003000060020200005000014005014004711500211091040010100001000001010000011000000010000010314048744139722500009119100002000050010140036140051140051140051140051
70024140047112400000000013000014003513965025900135001030003100014001030000100001245907533501316114779001400261400471401601307493131206803093002010000300006026620000500001400501400501150021109104001010000100000101000001100000032251000011031643873313978350000099100002000050010140051140051140052140051140051
7002414005111250000000000001014003513965325900135001030003100004001030236100791245907533328516119238001400241402041400541307813131194800103002010000300006026020000500001400501401311150021109104001010000100000101000001100000031000011031403872213972250000909100002000050010140051140051140037140036140238
70024140052112500000001010000140032139650259001350010300031000140161300001000012458985333096161147790014002314013114005613073431312068001030020100003000060020200005000014005014003511500211091040010100001000001010000011000020121000011031402873313972250000666100002000050010140142140149140054140140140053
7002414014111250000011001000014013413965354900135001030003100004001030000100391245907533328516114779001400261401231400531307513131215800103002010000300006002020000500001400511400511150021109104001010000100000101000001100001031000111031403873313972350000941696100002000050010140051140098140145140054140051
700241400501125000000010112000014003513964725900135001030003100004001030000100001245879533328516114779001400231401881401461307343131209800103002010000300006002020000500001400501400471150021109104001010000100000101000001100000001000011031403872213971950000969100002000050010140051140051140148140051140049
70024140047112400000000013000014003313965025900135002030003100004001030000100001245925533569416114779001401051401511400401307493131209800103002010048301246002020000500001400471400471150021109104001010000100000101000001100001001000011031402873213972250000969100002000050010140051140052140152140052140051
7002414005011250000011011000014003313965125900135001030003100004001030000100001245906533332716114779001400111401001401071307513131212803043002010000300006002020000500001400511400471150021109104001010000100000101000001100001063501000001031403873213972250000709100002000050010140051140051140036140051140036
7002414010611250000000001000014003513965025900135002130000100004001030000100001248314533317316114779001400111401401400501307493131208800103002010041300006002020082500001400501400501150021109104001010000100000101000001100002091000011231863873414193050031969100002000050010140245140329140232140328140428

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.b, v1.b }[1], [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0066

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514006311251101000088001400601395972590103501003000310000401183002710010123138453327631611883801400501400551400751308308131270801553022710010300276025420020500451400751400551150201100991004010010000100001100100000110000009100001100321011211113974550000131010100002000050100140076140066140079140066140066
702041400751086000010010001400601396182590100501003000310000401003000010000123720653322621611731201400311400751400751307523131178801003020010000300006020020000500001401771400651150201100991004010010000100000100100000010000100100001100321011211113974550000101013100002000050100140066140076140076140076140076
702041400751086000000013000140040139617259010350100300031000040100300001000012371795332262161161230140031140075140075130751313117880100302001000030000602002000050000140075140055115020110099100401001000010000010010000011000010010000110032101121111397455000013013100002000050100140076140076140076140076140056
702041400751125000000070001400501396172590103501003000310000401003000010000124592453314991611648001400311400751400751307313131178801003020010000300006020020000500001400751400751150201100991004010010000100000100100000110000100100001000321011211113974550000131010100002000050100140076140066140076140076140076
7020414007511250000000130001400601396172590103501003000310000401003000010000123717953322621611612301400311400751400751307513131178801003020010000300006020020000500001400751400651150201100991004010010000100000100100000110000003100001100321011211113973550000131413100002000050100140076140080140066140076140066
7020414007511250000000130001400601396172590103501003000310000401003000010000123717953315771611612301400511400751400651307313131178801003020010000300006020020000500001400751400651150201100991004010010000100000100100000110000003100000100321011211113974550000131013100002000050100140076140076140075140076140076
7020414005511250000000120001400601395972590103501003000310000401003000010000123717953322621611727101400511400651400751307513131178801003020010000300006020020000500001400651400671150201100991004010010000100001100100000110000001810035100032101121111397455000013130100002000050100140076140066140076140078140076
70204140075112500000001688101400601396204390103501003000010000402433000010000123721453322621611612301401171400761400751307513131178801003020010000300006020020000500001400751400751150201100991004010010000100000100100002110000000100001100321011211113972550000131313100002000050100140076140076140076140076140076
70204140065112501000004000140069139617259010350100300001000040100300001000012370885333777161161230140041140157140055130751313117880100302001000030000604422000050000140075140055115020110099100401001000010000010010000011000010310000100032101121111397465000001013100002000050100140076140076140076140076140076
702041400761125000000010001400601396172590103501003000310000401003000010000123708353322211611612301400311400751400651307593131169801003020010000300006020020000500001401641400721150201100991004010010000100000100100000110000000100000100321011211113974550000131013100002000050100140076140056140076140076140076

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0058

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700261402531085111000322652641014042613974425900455002030007100014015130000100801245979533431716125796140175140058140158130757441312178001030143100403024360020201625020514014714005531500211091040010100001000001010004011000200232801000211031425874613970850000131113100002000050010140059140063140059140059140059
7002414005810861000010010001400431396582590010500103000310000400103000010000124597953335911611570814003414005814005813075731312178001030020100003000060020200005000014005514005511500211091040010100001000001010000011000000001000011031425873613973050000131114100002000050010140059140060140059140059140059
7002414005810861000000010001400431396582590013500103000010000400103000010000124597953335911611597414003414005814005813075831312178035730020100003000060020200005000014005814005811500211091040010100001000001010000011000000001000011031425876413970850000131113100002000050010140059140059140061140037140059
7002414005810851000100010001400431396362590013500103000310000400103000010000124595253336691611570814001214005814005813075731312178001030020100003000060020200005000014005814005511500211091040010100001000001010000011000000001000001031424874513972950000131713100002000050010140059140059140037140037140059
7002414005810851000000010001400431396582590013500103000310000400103000010000124597953327451611570814001214005814005913075431312178001030020100003000060020200005000014005814005511500211091040010100001000001010000011000001031000011031425874413973050000131113100002000050010140059140059140056140062140059
7002414005810851000000060101400431396582590013500103000310000400103000010000124597953335911611594214003414005814005813075731312178001030020100003000060020200005000014005814005811500211091040010100001000001010000011000000001000010031425875613970850000131313100002000050010140059140061140059140059140059
7002414005910861000000010001400431396582590010500103000010000400103000010000124597953335911611570814003414005814005813075731312178001030020100003000060020200005000014005814005511500211091040010100001000001010000001000008001000011031425876513972750000111413100002000050010140059140059140059140059140059
7002414005810861000000010101400431396382590013500103000310000400103000010000124585653335911611346014001214003614005813075731312178001030020100003000060020200005000014005814005711500211091040010100001000001010000011000000001000011031424873413973050000131313100002000050010140154140136140153140132140292
7002414034110861001003240526400142392139811109900435003030012100064029230236100781255247533885516126496140318140251140340130832431313658090730143101213035660508202405060414023014033321500211091040010100001000001010000011000000091000011031426874413972750000131113100002000050010140059140059140059140037140059
700241400601085100011001000140043139658259001350010300031000040010300001000012458565332745161157081400341400581400581307593131217800103002010000300006002020000500001400581400581150021109104001010000100000101000001100000000100021003142487461397305000013016100002000050010140059140037140037140059140038

Test 4: throughput

Count: 8

Code:

  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2224373f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4002058004262101011002200008002716604263201488010016000080000801001600008000044471333757696998359100800230800428004229924032999932010020080000160000200160000320000800428004211240201100991001008000016000080000010080000020800170101780016611523015110031633800391800000080000320000801008004380043800438004380043
400204800426200001100350002280027166002632014280100160045800008010016000080000444714237576979984206008002308004280042299241333000032010020080000160000200160000320000800428004211240201100991001008000016000080000010080000021800180000800176117001511003163380039080000131080000320000801008004380043800438004380043
400204800426200001100220009980027166002632016180100160045800008010016000080000444712637576769983185008002308004280042299240329999320100200800001600002001600003200008004280042112402011009910010080000160000800000100800000198001700020800160115210151100316338003918000010080000320000801008004380043800438004380043
400204800426200001100220003800271600426320145801001602558000080100160000800004447142375768099831280080023080042800422992419329999320100200800001600002001600003200008004280042112402011009910010080000160000800000100800000238001700017800176102101511003163380039180000131380000320000801008004380043800438004380043
4002048004262000010003400008002716600263201618010016010280000801001600008000044471423757692998422100800230800428004229924027299993201002008000016000020016000032000080042801861124020110099100100800001600008000001008000001980017000178001660152101511003163380039080000131080000320000801008004380043800438004380177
40020480042621000100000100800271660026320165801001600628000080100160000800004446269375769799836320080023080042801842990503299993204412008000016000020016000032000080042800421124020110099100100800001600008000001008000000800170101480015610214151100316338003918000001080000320000801008004380043800438004380043
4002048004262000011002200038002716600713201458010016000080000801001600008000044471423757669998318505800230800428004229924032999932010020080000160000200160166320000800428004211240201100991001008000016000080000010080000019800170000800170102101511003163380039180000131080000320000801008004380043801858004380043
400204800426200001000220001680169060002632010080100160045800008010016000080083444661637576769983574158002308004280042299240329999320100200800001600002001600003200008004280042112402011009910010080000160000800000100800000198001700017800166102101511003163380039080000131080000320000801008004380043800438004380043
4002048004262000011002500008002716605263201478010016000080000801001600008000044471423757676998416000800230800428004229924123329999320100200800001600002001600003200008004280042112402011009910010080000160000800000100800830198001700020800166102101511003253380159080000131380000320000801008004380043801878004380043
40020480183620000111115488001880170166044263205608018416046580000801001600008008444471423757479998095100801330803288018329868224329313863270822028141116277120016149432033280042801842124020110099100100800001600008000001008016921980100002119680182600021513604233380039180166131080000320000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22373a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696b6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025801916212010110213217616612800270660442632074880509163401814118142416281381329441215737679819981601121080133080187803282989022027301843203432080166160166201605003203308018380467212400211091010800001600008000001080083019802670023628008361002121511863391944284263010444480152180084249080000320000800108018580184804738018680325
40002480478621101100224188801971480027166042632007180010160060800008001016000080000444666937576959982800111580023080042800422994703300213200102080000160000201600003200008004280042112400211091010800001600008000011080000008001700080017610000150736343264717332311484680039080000247080000320000800108004380043800438004380043
400024800426210000000012001610800271660026320052800101600448000080010160000800004446669375769799836881115800230800428004229947033002132001020800001600002016000032000080042800421124002110910108000016000080000110800000198018400080000011521001506063371847172724114628800390800002461080000320000800108004380043800438004380043
40002480042621101000002200088002700600263200778001016000080000800101600008000044466513757703998329111158002308004280042299470330022320010208000016000020160000320000800428004211240021109101080000160000800001108000001980016002080016610000150586338194617282512454480039080000302080000320000800108004380043800438004380043
40002480042620101000002100088002716600263200108001016006080000800101600008000044466523757676998398391580023080042800422994703300213200102080000160000201600003200008004280042112400211091010800001600008000001080000008001500080000611521001506263361843172824114745800390800002361380000320000800108004380043800438004380043
40002480042620100000002300010800271060426320075800101600478000080010160000800004446669375767699832901015800230800428004229947033002132001020800001600002016000032000080042800421124002110910108000016000080000010800000198001700168000061152100150626337194517282411432580039080000230080000320000800108004380043800438004380043
400024800426210010000000010980027166042632007780010160045800008001016000080000444666937576769982624815800230800428004229947033002132001020800001600002016000032000080042800421124002110910108000016000080000010800000198001500268001961152100150657036194617292511444480039080000252080000320000800108004380043800438004380043
400024800426201000000023000108002716600263200698001016005380000800101600008000044466693757695998391810158002308004280042299470330021320010208000016000020160000320000800428004211240021109101080000160000800000108000002180016001780000611521001506260442445172724142343800390800002341080000320000800108004380043800438004380043
400024800426200010000000001480027166002632005480010160048800008001016000080000444666937577039982662121580023080042800422994703300213200102080000160000201600003200008004280042112400211091010800001600008000001080000008001700168000060152100150606340214917282612264880039080000255080000320000800108004380043800438004380043
40002480042621100000002300098002716600263200108001016000080000800101600008000044466693757676998400312158002308018380042299470330021320010208000016000020160000320000800428004211240021109101080000160000800000108000002180017001780017601719001506263361943172624114642800391800002301080000320000800108004380043800438004380043