Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 2 regs, 2S)

Test 1: uops

Code:

  ld1 { v0.2s, v1.2s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f3a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9branch mispred nonspec (cb)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6200529676237111102000111047812913901124423200010001002100010005000500022001603628802295938292002200230032917629255216100110001000110024010050033421005024110013238936668903171056208603380380016626422887410001636813790149271000100010002965429571295982958029701
620042955223801120221122136884634292520002453520041001100010011000501050004001603729085294673102000200030002956929493116100110001000410022010020111000013110013168916269083116062209443411380913616132874710001647013322150021000100010002943029372293532930829353
620042940823501110110000404615287720002432520001000100010001000500050004001605128846294943102000200030002914029144116100110001000110032310021041000323110013143918669523148055208493191380014595842855810001653413858148601000100010002941029392293102936929474
62004295492380100000000019046562865900026466200010001000100010005000500320016070287752932531020022000300029221291941161001100010000100213100101476100031310432013188952469613192154208063364380714595642857810011628113731150651000100010002938429489294462942529452
6200429565238011102000101904644288290002431120001000100010001000500050006001605528674294083102000200030002935929388116100110001000010022310020211000010100013145933269503110059208873299381212646132871910001621013833150211000100010002949529754295622934629479
62004297662350122021000090104677292420002436620001000100010001000500052914001607128816294093102000200030002952929495116100110001000010021010020111000013100013220933370073071161207673193380918616222856410001653313619151631000100010002935029459292882937129407
620042925922601200110010404620289170002430320001000100010001000500050004001605428955294593302000200030002935329410116100110001000110012410022151001222110013231939069383151161206563154381418606142860310001595713773151621000100010002931729358293942940829345
620042939222701210100000404762290100112439620001000100010001000500050002001604828827294393102000200030002930629230116100110001000010022310020121001313110013066944369133168056207993227380419566632865610001622913319148541000100010002930229416293272939129323
620042940822801201210000504684289010102431820001000100010001000500050007001605428830293533102000200030002919029225116100110001000010052310010311001213110013070951969273215064206013241381020646132855910001597513683148591000100010002943529340293822936429408
620042938922701511410000404760287440112430720001000100010001000500050005001606728810293133102000200030002927429225216100110001000010022410030221001313110013371938469403184152205943301380823605732859910001611313525150251000100010002935929461293252940329337

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.2s, v1.2s }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60205120051931000101000100012004611971225701035011410001100004010010000100001062221454608345730440120030012005412005411202631124886010030200200001000060200300001000012005412005111502011009910040100100001000001001000001100001001000010100003212683361197575000213012100001000050100120055120055120055120036120055
6020412005493000001100110001202281198285907044450337100131000440384101201007910685104546963458043501202440120242120335112124251125396076330569201621012360684303661012212022012025031502011009910040100100001000011001002570100040051401000510100003235171441291203085006013130100001000050100120055120058120145120036120036
60204120057930000000000000112018511963925701035010010001100004010010000100001061914454608345746210120030012005412005411204431124456010030200200001000060200300001000012005112003511502011009910040100100001000011001000001100000031000010100003212683661197575000213130100001000050100120036120058120055120055120055
602041200549300000000001000120040119637257010350102100001000040100100001000010619144545969457462101200330120054120054112044311244560100302002000010000602003000010000120054120051115020110099100401001000010000010010000011000000310000101000032126836611975550002131312100001000050100120055120060120055120059120055
602041200549310000000001000120055119635257010050102100001000040100100791011810622214546083457462101200300120054120035112026311244560100302002000010000602003000010000120051120051115020110099100401001000010000010010000011000000010000100000032123836611975750002131312100001000050100120055120036120055120052120036
602041200519310000000001000120039119712257010350102100011000040100100001000010622214546083457473801200110120035120051112045311244260100302002000010000602003000010000120051120052115020110099100401001000010000010010000011000000310000101001032126836611975750002101312100001000050100120055120036120055120055120052
602041200549310000000001301012004411963725701035010210001100004010010000100001062221454429345746210120030012005412005411204631124456010030200200001000060200300001000012003512005411502011009910040100100001000001001000001100001001000010000003212683661197485000211100100001000050100120055120036120056120055120055
6020412003593100000000018000120039119637257010350102100011000040100100001000010622214546083457462101200300120054120054112044311244560100302002000010000602003000010000120054120051115020110099100401001000010000110010000011000010010000001000032123836311975450002131012100001000050100120036120052120055120055120055
6020412005493100000100013000120039119634257010050102100001000040100100001000010622214546083457462101200300120054120054112044311244460100302002000010000602003000010000120054120051115020110099100401001000010000010010000001000200310000001000032126836611975750002101012100001000050100120055120055120055120055120052
60204120054931000001100001012004611963725701035010210001100004024210000100001062221454608345746210120011012005412005411204631124456010030200200001000060200300001000012003512005111502011009910040100100001000001001000001100001001000010100003212683361197485000213169100001000050100120056120055120055120055120055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60025120051930010000010000120039119708257001350012100011000040010100001000010647414548728457797901200271200541200541120783112469600103002020000100006002030000100001200511200511150021109104001010000100000101000001100001027931000011031409787101197635000013100100001000050010120052120036120060120037120055
600241200549310000000130000120039119708257001350022100011000040010100001000010622644548728457760501200301200611200511120763112453600103002020000100006002030000100001200541200542150021109104001010000100000101000001100002001000011031409956911976350002131412100001000050010120036120146120055120036120055
600241200549310000000130000120036119708507001350022100021000040296100001007610651274548728460182811202451201481203391121681211264465084301442032210042607663012010123120214120334315002110910400101000010000010100270110003005605100051123211910691012006650230131412100001000050010120242120333120238120210120265
6002412023093212000323971760001202281197082570013500121000110000400101000010000106226445487674577979112003012005412005411207631124556001030020200001000060020300001000012003512003511500211091040010100001000001010000011000000010000110314087871011976650000101012100001000050010120057120052120055120055120055
60024120054930000110010000120042119708257001350012100011000040010100001000010622734548728457797901200311200551200541120763112472600103002020000100006002030000100001200351200541150021109104001010000100000101000001100000031000011031401078961197645000213139100001000050010120055120055120055120055120055
600241200549310000000100001200391197082570010500121000110000400101000010000106216845487284577979012001112005812003511207631124726001030020200001000060020300001000012005412005111500211091040010100001000001010000011000000010000110314067810711976350002131312100001000050010120055120055120055120055120055
600241200549310000000130000120037119708257001350012100011000040010100001000010622644548728457809601200301200551200541120773112472600103002020000100006002030000100001200541200511150021109104001010000100000101000001100000001000011031409786101197445000013130100001000050010120058120055120052120036120055
600241200519310000000100001200391197082570013500121000110000400101000010000106226445487284577979012003012005612003511207631124726001030020200001000060020300001000012003512005411500211091040010100001000001010000001000000010000110314097810711977550002101313100001000050010120055120055120056120055120055
60024120054931010000060000120039119708257001350012100011000040010100001000010622644548728457797911200301200371200351120733112453600103002020000100006002030000100001200541200551150021109104001010000100000101000001100000031000011031409786911976050002131312100001000050010120055120055120055120055120055
60024120055930010110013000012003911970825700135001210001100004001010000100001062291454872845779790120030120054120051112076311247260010300202000010000600203000010000120054120035115002110910400101000010000010100000110002000100001103140978691197635000213100100001000050010120055120055120036120036120055

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.2s, v1.2s }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e18191e1f2223243f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6020512005193000000001001200391196372570103501021000110000401001000010000106227545460834574621012003012010812005411203331124516010030200200001000060200300001000012005412005111502011009910040100100001000001001000001100000101000011000003210183111197575000201012100001000050100120055120055120055120056120059
602041200549300000013000012002011963725701035010210001100004010010000100001062230454608345730441120032120115120058112044311244860100302002000010000602003000010000120054120051115020110099100401001000010000010010000011000000010000110000032101831111975750002131012100001000050100120055120055120036120052120055
60204120054931000003000012003911963425701035010210001100004010010000100001062221454620345746210120030120110120253112065311244560100302002000010000604403000010000120035120051115020110099100401001000010000010010000001000000010001000000032101831111975750002131312100001000050100120055120055120055120144120055
60204120054931000000000012003911969125701035010210001100004010010000100001062221454608345746210120030120081120093112044311241360100302002000010042602003000010000120054120052115020110099100401001000010000010010001011000000010000010000032101831111975750002131412100001000050100120055120055120055120057120055
60204120054930000001800001200391196372570103501021000110000402431000010000106222145460834574621012003012016312004711204431124456032330200200001000060200300001000012005412005111502011009910040100100001000001001000001100000001000011000003210296111197585000213139100001000050100120056120055120055120055120055
60204120035931000001000012003911963725701035010210001100004010010000100001062194454608345746210120030120101120082112089311244560100302002000010000602003012310000120055120052115020110099100401001000010000010010003001000000310000110000032101831111975750002131012100001000050100120036120055120055120055120055
602041200549310000013010012003911963725701035010210001100004081511424115591115943454547345746210120030120054120107112044311244560100302002000010000604823000010000120066120035115020110099100401001000010000010010000011000001010000110000032101831111975750002131312100001000050100120052120055120055120057120055
602041200549310000010000120039119637257010350114100011000040100100001000010622214546083457462101200301200961204421120453112445601003020020000100006020030000100001200541200512150201100991004010010000100000100100000110000000100001100000321011141111975750002131312100001000050100120055120055120128120052120055
60204120035931000010000012003911963725701035010210001100004010010000100001062221454700545746210120030120103120075112044311244560100302002000010000602003000010000120051120053115020110099100401001000010000010010000011000001279510000110000032101831111975750002131012100001000050100120055120055120055120055120039
6020412005493100000100001200391196372570116501021000110000401001000010000106222145460834576379012003012009112005911204431124456010030200200001000060200300001000012005412005111502011009910040100100001000001001000000100000031000011000003210283121197575000213130100001000050100120149120055120055120055120055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60025120146931100000001300000120020119714257001650024100021000040010100401000010623274548956457821312003612006012006011208231124786001030020200001000060020300001000012006012004111500211090104001010000100001101000001100000027881000010100031405784511976350002131012100001000050010120061120061120042120061120061
60024120041931101000001100001120045119708257002750012100011000040010100001000010622644548728457797912003012005412005411207631124726001030020200001000060020300001000012005412005121500211090104001010000100000101000211100030011000001111031405784411976950004131012100001000050010120052120052120055120147120055
6002412003593100000001100000120043119711257001650014100021000040010100001000010622914548956458011512003612005712006011208531124756001030143200001000060020300001000012006012005711500211090104001010000100001101000001100001001000010100031634783411976350000101012100001000050010120061120061120061120061120061
600241200579311000000020000012004511970825700135002210001100004001010000100001062282454872845779791200271200351200551120763112453600103002020000100006002030000100001200541201411150021109010400101000010000110100011110001011100001111103140478431197695001414100100001000050010120055120052120055120055120055
60024120056930000000001300000120040119827767001650014100021000040010100001000010623184548956457827412003812015612004111208231124766001030020200001000060268300001000012004112005711500211090104001010000100001101000001100001031000110000031404785411983850002131012100001000050010120061120072120061120061120061
60024120152931101100001300000120045119708257001350012100031000040010100001000010622734548614457809612003012005412005411207631124726001030020200001000060020300001000012005412005421500211090104001010000100001101000211100010011000011110031403784411976950004131312100001000050010120055120141120055120036120055
60024120035930000000001300000120039119698257001650014100021000040010100001000010623184548956457821312011512006012006011207931124786001030020200001000060020300001000012004112006011500211090104001010000100001101000001100000001000010100031404784311976350002131012100001000050010120042120061120043120059120042
6002412006093110000100160000001200451196862570013500121000110000400101000010000106226445487284578057120030120054120057112073311246960010300202000010000602683000010000120054120054115002110901040010100001000011010005101000201110001111100314047855119922500121009100001000050010122347120402120323120129120146
600241201509320010004139717600001221901197637770044500231000610006402961008010078106477345546344584552120167120149120248112215131126406067530262201621008260762302401016612014812034231500211090104001010000100001101000001100000001000010100031405783411974450002131312100001000050010120061120062120061120061120061
600241200619311011000020000012004511970825700135003210001100004001010000100001062264454872845779791200301200351200541120763112469600103002020000100006002030000100001200541200521150021109010400101000010000110100011110002001100001111103140378541197695000413012100001000050010120036120036120036120055120055

Test 4: throughput

Count: 8

Code:

  ld1 { v0.2s, v1.2s }, [x6], x8
  ld1 { v0.2s, v1.2s }, [x6], x8
  ld1 { v0.2s, v1.2s }, [x6], x8
  ld1 { v0.2s, v1.2s }, [x6], x8
  ld1 { v0.2s, v1.2s }, [x6], x8
  ld1 { v0.2s, v1.2s }, [x6], x8
  ld1 { v0.2s, v1.2s }, [x6], x8
  ld1 { v0.2s, v1.2s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020580040620100000700008002516072516010080100800008010080000435896237588230080015800408004059924359998160100200160000200240000800408004011802011009910010080000800001008000882780029003580022612927610511000216228003718000013148000080000801008004180094800418004180041
160204800406200000000000080025166102516010080100800008010080000435898637588230080015800408004059924359998160100200160000200240000800408004011802011009910010080000800001008000001980017101580017611621000511000216338003708000010108000080000801008004180041800418004180041
160204800406200000004100008002506682516010080100800008010080000435896637599011080015800408004059924359998160100200160000200240000800408004011802011009910010080000800001008000001980017001580017611423000511000216228003708000013138000080000801008004180041800418004180041
160204800406210000002200008002516611251601008010080000801008000043589943758823008001580040800405992435999816010020016000020024000080040800401180201100991001008000080000100800000198001810148001761150000511000316228003708000013138000080000801008004180041800418004180041
16020480040621000100220000800251666251601008010080000801008000043589783758822008001580040800405992435999816010020016000020024000080040800401180201100991001008000080000100800000198001700168001660021000511000316228003708000013138000080000801008004180041800418004180041
160204800406200000002301008002516062516010080100800008010080000435901837588221080015800408004059924359998160100200160000200240000800408004011802011009910010080000800001008000001980016001780017611521000511000216338003718000013108000080000801008004180041800418004180041
160204800406200000002000008002516010251601008010080000801008000043589943758822108001580040800405992435999816010020016000020024000080040800401180201100991001008000080000100800000080015002280016611521000511000216328003718000013138000080000801008004180041800418004180041
160204800406200000003400008002516602516010080100800008010080000435899037588241080015800408004059924359998160100200160000200240000800408004011802011009910010080000800001008000001980017001780017611521000511000316328003708000013108000080000801008004180041800418004180041
16020480040620000000230000800251667251601008010080000801008000043589783758821108001580040800405992435999816010020016000020024000080040800401180201100991001008000080000100800000198001740178001761150000511000216228003708000013138000080000801008004180041800418004180041
16020480040621000000220000800251607251601008010080000801008000043589943758823008001580040800405992435999816010020016000020024000080040800401180201100991001008000080000100800000198000010168000060152100051100031623800371800001308000080000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f18191e1f22243f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002580040620100000190008002506642516001080010800008008680000435841737588241080015800408009559946360020160010201600002024000080040800401180021109101080000800000108000001480013008800006110170050202162280037180000908000080000800108004180041800418004180041
16002480094621000001180008002516692516001080010800008001080000435841737588240080015800408004059946360020160010201600002024000080040800401180021109101080000800000108000001480012101380013010170050202162280037080000968000080000800108004180041800418004180041
16002480040620000000190008002510652516001080010800008001080000435841737588240080015800408004059946360020160010201600002024000080094800401180021109101080000800000108000001480013001280000619190050202162280037080000968000080000800108004180041800418004180041
160024800406210011001800080025106112516001080010800008001080000435842137588231080015800408004059946360020160161201600002024000080040800401180021109101080000800000108000001480012701380000610170050202162280037180000068000080000800108004180041800418004180041
16002480095621000000150008002516642516001080010800008001080000435842137588240080015800408004059946360020160155201600002024000080040800401180021109101080000800000108000000800000012800136010200050202162280037180000998000080000800108004180041800418004180041
160024800406200010006880080025066502516001080010800008001080000435841737588241081108800408004059946360020160010201600002024000080040800941180021109101080000800000108000001480013001380013609170050202162280037180000998000080000800108004180041800418004180041
160024800406200010001901080078160525160010800108000080010800004358417375989300800158004080040599463600201600102016000020240000800408004011800211091010800008000001080000014800121019800126110170050202162280037180000968000080000800108004180041800418004180041
160024800406200010101900080025160102516001080010800008001080000435841737588241080015800408004059946360020160155201600002024000080040800401180021109101080000800000108000001480036000800130111200050202252280037180000098000080000800108004180041800418004180041
16002480040620000000151000800251669251600108001080000800108000043584333758824008001580040800405994636002016001020160000202400008004080040118002110910108000080000010800000080013101380013019172050202162280037180000998000080000800108004180041800418004180041
1600248004062001110019000800251664251600628001080000800108000043584173758824108005480040800405994636002016001020160000202402588004080040118002110910108000080000010800000168001200980013611300050202162280077080000998000080000800108004180041800958004180041