Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (single, post-index, D)

Test 1: uops

Code:

  ld2 { v0.d, v1.d }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.006

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.003

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f4346495051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
63005288282311241019100004000475728458010016812400210002002100010002000100050005001239521222708286692873331040001000200020004000286102866311610011000100001002221003001821000213100131419405687931601052200713159381019545612818710001549812709139311000200010002879628907288862884028813
630042884123002110170000050004683284090000167274008100020081000100020001000500050002385611227772860528742310400010002000200040002870328738116100110001000010012010040029710012101114913253941969423106855201873310380218545312820210001549012584136041000200010002885528811287772889928777
6300428897231121001510000100047312838000101676040061000200010001000200010005000500023932822680287232883631040001000200020004000287132872811610011000100001002231003001821003320110131259423691331261251201223226380817565312826210001574013004138681000200010002884628805287922895628973
63004288352310201022110003000472028414110016826400810002006100010002000100050005036240015227582869728809310400010002000200040002871528680116100110001000010022310020004100111011013197933769763204854201163197381624565822822310001577412407139861000200010002867128795287482890628830
63004287962311221019100001301047052849200001670940021000200610001000200010005000500223888222650287942870831040001000200020004000286402873111610011000100001000241001010971003242110134699414695732211060203283245380816585612821110001559012531138961000200010002873928841288362887328899
6300428912231122101600000100047112851001101673740021000200610001000200010005000500023820322741286572894731040001000200020004000287552881911610011000100001002031002002106100021001013014942469333134449201773158381819515112825210001542912550138761000200010002881328821289602889228829
6300428856232113002200000100046692852100001672640031000200610001000200010005000500023904022734286542880531040001000200020004000287162876611610011000100001002231003001971000323100132919435692931231254203083183381314515512836310001539712633138171000200010002883228730288082884428828
6300428841232120002010000300048192838500001667640061000200610001000200010005000500223900522721286402879531040001000200020004000287472877011610011000100001002201001000113100021011013178944069043140954201453221381318495222812310001576812564138561000200010002867128767288502881328695
63004289452301221019100004000466128328001016615400610002002100010002000100050005089238531622685287002903531040001000200020004000287552874911610011000100001002201000000141100031010013074954468983118854202223229381015595212820110001565912636138021000200010002879528830288952885228804
630042878623111910190000010004756284730000167634002100020081000100020001000500050012389412271028652288553104000100020002000400028835286421161001100010000100720100500189100221412013166930969433164946202423172380720544912819410001581112672137051000200010002870428848287542871928923

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.d, v1.d }[1], [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f43494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051401511085001000001880000140040001410342590103501003000310000401003000010049123730053309021611490814003014003514005613071131311578039930200100003000060200200005000014005414003511502011009910040100100001000001001000000100000031000010000321011211113978750000131313100002000050100140036140036140055140055140036
702041400351086000000001300000140039001395962590103501003000310000401003000010000123699053314711611456014003014005414003513073031311578010030200100003000060200200005000014005414005111502011009910040100100001000001001000001100000001000010100321011211113974450000131313100002000050100140055140055140055140388140055
70204140054108600000000100000140036001396412590103501003000310000401003000010000123699053314711611456014007914005114003513071131311578039630200100003000060200200965000014005414007111502011009910040100100001000001001000000100010011000111100321011211113971450011131013100002000050100140036140055140148140052140056
7020414005410850000000010000014013300139620259010050100300031000040100300001000012369905331509161139681401021400351400541307123131157801003020010000300006020020000500001400541400542150201100991004010010000100000100100032110003003255100011010732571136131418655002001013100002000050100140225140334140239140215140248
702041403351087001200223968800021403110013964511190151501333001610002404083024110078124872053335591611893514009914003814005113073031312608010030200100003000060200200005000014005414005411502011009910040100100001000001001000001100001001000010100321011211113972950000111313100002000050100140055140055140055140055140055
70204140054108600000000100000140039001395932590103501003000310000401003000010000123699053314711611456014003014005414005413073031311578010030200100003000060200200005000014005414005111502011009910040100100001000001001000001100000001000010100321011211113977250000101313100002000050100140053140055140036140052140055
702041400551085000000001300000140039001395962590103501003000310000401003000010000123699053313571611456014003014003614005413071131311388010030200100003000060200200005000014005414003511502011009910040100100001000001001000000100000001000010100321011211113988350000111010100002000050100140060140062140060140060140044
70204140059108600000000200000140039001395962590100501003000310000401003000010000123697253307461611456014003014006014005413073031311588010030200100003000060200200005000014005414005111502011009910040100100001000001001000001100000031000010100321011211113972850000131010100002000050100140055140055140055140055140055
7020414005410860000000010000014003700139593259010350100300031000040100300001000012369995330746161145601400271400541400541307303131138801003020010000300006020020000500001400551400511150201100991004010010000100000100100000110000000100001010032101121111397285000013100100002000050100140036140036140055140055140036
70204140054108600000000130000014003900139596259010350100300031000040100300001000012369905331471161145601400301400541400551307113131157801003020010000300006020020000500001400541400511150201100991004010010000100000100100000110000103100001010032101121111398065000001310100002000050100140055140055140055140055140055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140051108501000001000001400391396352590013500103000310000400103000010000124594353335611611518110140030140054140054130753313121380010300201000030000600202000050000140051140051115002110910400101000010000010100000110000000100001100031400048744139726500001000100002000050010140055140036140055140055140055
700241400511085000000010000140036139635259001350010300001000040010300001000012459435332706161151810014003014005414005413075331312138001030020100003000060020200005000014003514005411500211091040010100001000001010000011000000010000010003140004873413973850000151313100002000050010140036140055140055140055140036
700241400351086000000010000140039139635259001350010300031000040010300001000012459435333327161136970014003014005414003513073431312138001030020100003000060020200005000014005414005211500211091040010100001000001010000011000010010002110003140003874413972650000131313100002000050010140058140036140056140055140037
700241400541085010000010000140040139654259001050010300031000040010300001000012459435333441161192981014003014005414005413075531312138001030020100003000060020200005000014011114004411500211091040010100001000001010000011000000610000110003140005873513972950000131013100002000050010140036140055140055140055140036
700241400541086010000019000014003913965525900105001030003100024001030000100001245961533344116115181001400301400541400541307343131213800103002010000300006002020000500001400541400351150021109104001010000100000101000001100001001000011000314000487121513972650000131313100002000050010140055140055140055140053140052
700241400541086000001013010014003913965725900255001030000100004001030000100001245943533344116115181101400301400351401501307533131213800103014310000300006002020000500001400541400541150021109104001010000100000101000001100000001000011000314000487551398015000001313100002000050010140055140055140055140058140055
700241400541086000000097010014003913965425900135001030007100004001030000100001245847533344116115181001400301400541400351307773131213800103002010000301216002020000500001400541400541150021109104001010000100000101000001100001001000010001314000487561397295000013013100002000050010140057140055140055140055140149
7002414005410860000000100001400201396352590013500103000710000400103000010000124594353334411611518100140030140054140054130753313121380010300201000030000602662000050000140054140054115002110910400101000010000010100020110000009100001100031400048744139726500000100100002000050010140052140055140037140036140135
7002414005110860000100100001401331396541349001350020301301002240292300001019712502895335263161151810014003014015914022913077732013232580917302661012130362605062016250805140251140323415002110910400101000010000010100052110001023215100021120031860051105413994350010131013100002000050010140150140414140240140242140307
700241400381086010000010000140040139654259001350010300031000040010300001000012459435333517161151811514003014005414005413075331312148001030020100003000060020200005000014005414005411500211091040010100001000001010000011000000010000010003140505875313972650000131313100002000050010140056140055140055140055140036

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.d, v1.d }[1], [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0075

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514007010860100000100014005313956525901035010030007100004025430120100001244902533157916116236114004201400631400661307393131207801003020010000300006020020000500001400631400661150201100991004010010000100000100100000110000003100001100321011211113973550000990100002000050100140064140064140066140064140064
7020414005510860000002541000140040139565259010350100300031000040100300001000012449025331917161162361140031014006314006313073931312078010030200100003000060200200005000014006314006711502011009910040100100001000011001000001100001503100001100321011211113973550000999100002000050100140064140067140064140057140064
7020414006310860000000130001400481395655490103502703016410001401003000010078124493853314991611623601400420140063140063130809313120780100302001004030000602002000050000140063140067115020110099100401001000010000110010000011000000310000100032101801113979950000999100002000050100140070140064140067140064140064
7020414006310850000000000014004813956510990103501003000010000401003000010000123709653314991611498001400310140056140063130739313120780100302001000030000602002000050000140063140063115020110099100401001000010000010010000011000600010000110032101801113980250000069100002000050100140065140056140064140064140064
70204140063108500000101300014004813956553901005010030000100004010030000100001237083533053016116236014003901401501400631307393131207801003020010000300006020020000500001400631400551150201100991004010010000100001100100000110000000100001100323611211113973550000969100002000050100140064140064140064140064140057
702041400631086100000010001400481395632590103501003000310000401003000010000124490253305301611623601400400143390141010130739313120780100303251000030000602002000050000140055140066115020110099100401001000010000110010000011000100010000010032101801213973550000969100002000050100140056140064140064140064140064
7020414006310860001010100014004813956525901035010030003100004010030000100001237088533053016114980014003901400661400551307393131207801003020010000300006020020000500001400631400551150202100991004010010000100001100100000010000000100001100321011211113973850000966100002000050100140064140064140064140064140056
702041400631086000000010001401331395652590103501003000310000401003000010000124490253320561611647001400450140063140066130739313123780100302001000030000602002000050000140055140063115020110099100401001000010000110010002011000010010000110032101801113973550000999100002000050100140064140119140107140064140064
70204140066108500010001300014005113956525901035011430003100004010030000100001237083533053016116236014004101400631400661308133131208804373047010000300006020020000500001400631400661150201100991004010010000100001100100000010000000100001100325711082113980050060999100002000050100140256142584142779142392140246
702051403311087000003213335801142406139702140901305014730011100044054430232101181250318533249716127126014026501402731403431308292913133780721305621008130241609402015850603140157140346315020110099100401001000010000110010000011000010010000110032101801113973550000909100002000050100140064140066140067140066140066

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4d5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251400561086000000001000014004013965202590013500103000310000400103000010000124592553327451611500414002514005214005713075103131198800103002010000300006002020000500001400491400491150021109104001010000100000101000001100000001000010100000314068710913972450000000100002000050010140053140050140053140037140050
700241400521085000000000000014003713964902590013500103000310000400103000010000124592553333611611500414002814005214005213075103131208800103002010000300006002020000500001400521400491150021109104001010000100000101000001100000001000000000000314010877913972150000009100002000050010140053140053140053140053140053
7002414005210850000110010000140037139652025900135001030003100004001030000100001245925533324916113460140028140052140052130751031312118001030020100003000060020200005000014005214004911500211091040010100001000001010000011000000010000101000003140118710713972450000660100002000050010140050140053140053140053140037
70024140036108500000000100001400371396520259001350010300031000040010300001000012459255333553161197471400281400571400521307350313121180010300201000030000600202000050000140052140036115002110910400101000010000010100000110000000100001000000031408876913972450000669100002000050010140053140053140053140053140050
700241400491086000000001000014003713965202590013500103000310000400103000010000124592553333611611500414002814005214005313074803131198800103002010000300006002020000500001400521400361150021109104001010000100000101000001100000001000010100000314010878813972150000066100002000050010140050140050140050140050140192
700241400491085000000001000014003713965002590044500103002410014439663165010000124592553339961611628814007014004914005213075103413121880010300201000030000600202000050246140103140175415002110910400101000010000010100000110000000100001010001031409879913972550000669100002000050010140050140037140050140037140050
700241407331089000000016000014003713965202590013500103000310000400103000010000124589753327451611500414010314007914005013075103131211800103016110000300006002020000500001400521400491150021109104001010000100000101000021100023091000010100000314010879913972650000989100002000050010140053140054140053145094145300
70025145807114201011171735809000014003713965202590010500103000010000400103000010000124592553333611611500414002814004914010513079803131211800103002010000300006002020000500001400521400491150021109104001010000100000101000001100000091000010000000314078761013972550000999100002000050010140050140053140053140053140050
7002414005210490000110013000014003413965202590013500103000010000400103000010000124589753358161611535214002814010814005913074803131211800103002010000300006002020000500001400591400491150021109104001010000100000101000001100000031000010100000314068710713972450000996100002000050010140038140041140050140050140053
70024140052104900000000100101400371396490259001350010300031000040010300001000012458975333361161150041400281400491401261307510313121180010300201000030000600202000050000140050140049115002110910400101000010000010100000010000106100001000000031409879913972450000009100002000050010140053140037140053140098140050

Test 4: throughput

Count: 8

Code:

  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f222324373a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4002058004262010101100310000270800271660426320161801001600778000080100160000800004446566375748499844650800238004280042298932032999932010020080000160000200160000320358800428004211240201100991001008000016000080000010080007723800250112580019612523711511011621800390800009980000320000801008004380043800438004380043
4002048004262010010000330000270800271660026320160801001601268000080100160000800004446566375735499845480800238004280042299202632999932010020080000160000200160000320000800428004211240201100991001008000016000080000010080007723800250103080018612823711511011621800390800009080000320000801008004380043800438004380043
4002048004262010010000310000100800271600426320112801001600908000080100160000800004446522375705999844080800238004280042299242432999932010020080000160000200160000320000800428004211240201100991001008000016000080000010080008823800250102680000002623601511021612800390800009980000320000801008004380178800438004380043
400204800426211010000031010047080027106015526320190801001602988000080100160000800004447142375731499831970800238004280042299202432999932010020080000160000200160000320000800428004211240201100991001008000016000080000010080007723800070002680018612623701511011612800390800000980000320000801008004380043800438004380043
400204800426201000000060100108002716604263201798010016007780000801001600008000044471423757677998431308002380042800422992424329999320100200800001600002001600003200008004280042112402011009910010080000160000800000100800087238000701126800186170701511011621800390800000980000320000801008004380043800438004380043
4002048004262111100000310001670800271600426320170801001600928000080100160000800004446076375707599845770800238004280042299042032999932010020080000160000200160000320000800428004211240201100991001008000016000080000010080008823800250012580019612623701511011611800391800000980000320000801008004380043800438004380043
40020480042620100001003100010080027006002632018780100160014800008010016000080000444652237574779982757080023800428004229924032999932010020080000160000200160000320000800428004211240201100991001008000016000080000010080007723800270112680018012523601511011621800391800009980000320000801008004380043800438004380043
40020480042620101000003100001618002710000263202128010016012780000801001600008000044464443757551998356008002380042800422990020329999320100200800001600002001600003200008004280042112402011009910010080000160000800001100800088238002600268001861260701511011621800391800009980000320000801008004380043800438004380043
400204800426201001000031000101800270660026320197801001600908000080100160000800004447132375767899845430800238004280042299202032999932010020080000160000200160000320000800428004211240201100991001008000016000080000010080006723800260102580018612623701511021621800391800009980000320000801008004380043800438004380043
40020480042621101000004300010080027166002632011480100160077800008010016000080000444713537577109984547080023800428004229904432999932010020080000160000200160000320000800428004211240201100991001008000016000080000010080007723800250002580018612523701511021622802651801669980000320000801008032780330801868033080325

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2224373a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025800426201110101100360003798002710000263201178001016010780000800101600008000044463343757460998105341580023800428004229947033002132001020800001600002016000032000080042800421124002110910108000016000080000010800067288003000033800230130277201505359291402717302513262580039180000215080000320000800108004380043800438004380043
400024800426201121110100370000780027166046932002480010160101800008001016000080000444666937576809984763515800238004280042299150330021320010208000016000020160166320000800428004211240021109101080000160000800000108000772880031002318010760302860015052532817027162620101329800391800001851880000320000800108004380043801858004380043
400024800426201110101100190003478002706600263201068001016010580000800951600008000044452373757180998446461580134801838004229908033002132001020800001600002016000032000080042800421124002110910108000016000080000010800067288003000048800246130277101505253291602817241910132780039180000185080000320000800108004380043800438004380043
400024800426211100111100700007801690000026320022800101600148008380010160000800004446649375772599848305158002380042800422992203300213200102080000160000201600003200008004280042112400211091010800001600008000001080008708000700136801070131287001505253291602717241910272680039180000185080000320000800108004380043800438004380043
4000248018662111021011013788102878002700008263201188001016008980000800101600008000044466403757680998350951580023800428004229947533002132067720800001600002016000032000080042800421124002110910108000016000080000010800066288003000033800006130286101505253291602726241910261480039180000213180000320000800108004380043800438004380043
4000248004262011101011001680004078016710600263200228001016010580084800101600008000044466693756673998273151580023800428004229947203300223200102080000160000201600003200008004280042112400211091010800001600008000001080008627800070003080023003007101505268361602516241910252680039180000198080000320000800108004380043800438004380043
4000248004262011011011003600012078002706600713200818001016001480083800101601668000044445583755698998155941580133801988018429860132263002232034320801671600002016033232000081642826431812400211091010800001600008000001080090102780198203122080107616286301508056291701525262011282780262180165234080000320000800108020480328801858032780473
400024801866451102101022271000216780332166454411932051080176160539801668009416016580166444134237570309985525515801348032480184298182172730184320341208016616033220160332320662801838032821240021109101080000160000800000108009382880007013308002361302860015053562917016162620112728800391800002331380000320000800108004380043800438004380043
40002480042620110110110019000387800271060026320024800101600998000080010160000800004446149375768099834275158002380042800422992920330021320010208000016000020160000320000800428004211240021109101080000160000800000108000780800310107800230172870015053562917015172620122615800391800002171880000320000800108004380043800438004380043
400024800426211111111000700007800270660426320024800101601168000080104160000800004446669375772499848284158002380042800422992720330021320010208000016000020160000320000800428004211240021109101080000160000800000108000662780030001780024613007101505356291701416262011262880039080000205080000320000800108004380043800438004380043