Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, 16B)

Test 1: uops

Code:

  ld2 { v0.16b, v1.16b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.004

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.004

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e2223243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
64005294442201190120100610046152887301017108400620062000200020001000023819422831029111293133104000200020002000400029200290901161001100010000200446200410122002404013148915468573083956204833073381526495228722162151337314905200020002936329340294012945229350
64004293432201171120100510046742886000217083400420002000200020001000023852122836029086293323104000200020002000400029196291831161001100010000200000200000032000400013044928768473056550202963081381824504328476162431341014922200020002940229422293512941629367
64004293222200210019000510046932885500017040400420042000200020001000023806422851029405294843104000200020002000400029283293221161001100010000200004200000042000000012931916968413138646205263277381120485028455163411352314784200020002928629296292922935829423
64004292652190210020000510045982886100017159400420042000200020001000023812522848029166292253104000200020002000400029158291711161001100010000200004200000002000444013117933568603093945203033171381214464428421162601340814649200020002930429383293162935329349
64004292952200200022000510046482880400017180400420002000200020001000023816322808029122292463104000200020002000400029117290801161001100010001200004200000002000400013095933868393092652203533067381418535028453163201333214967200020002939529367293462929829403
64004292772200220026000600046362903300017173400420042000200020001000023862322809029047292843104000200020002000400029096290591161001100010000200004200000002000404013021917868433090850202723086381623514828424162341340014645200020002930929218293372934229299
64004293182200220019000010046332881900017109400420042000200020001000023792222777029085292373104000200020002000400029221292121161001100010000200004200000032000400012950923668863109744202783107381015454028497162301323114954200020002927729318292732917829335
64004293672190220022000500045662880700017113400420042000200020001000023847222804029111293083104000200020002000400029149291971161001100010000200004200000002000404012950932469413061642202923090381221494528377162761334215067200020002935529326293582937629319
64004292942200200017000510046192883600017194400420042000200020001000023864422814029028292893104000200020002000400029188292451161001100010000200004200000042000404013001912468723082848203523112381721434728409162921340015086200020002928729361293502932129330
64004293442200250019010510045972890800017140400420042000200020001000023808522811029064292453104000200020002000400029191291271161001100010001200004200000002000004013228927768353150548202493146382117384628405163351342015037200020002932729366293082930729390

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.16b, v1.16b }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0055

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
80205140058104910000020101140048847841296762590106401003000620000301003000020000120450556692548121817941140037014006114006112951531299408010030200200003000060200200005000014006114005711502011009910040100100001000011002000332200050122000022220032101161113980140000141014200002000040100140042140062140062140062140058
80204140061104910100040102140046847851296962590106401003000620000301003000020000120450556692548121832271140037014006114006112951531299408010030200200003000060200200005000014006114005711502011009910040100100001000001002000332200040122000022221032101161113977540000141014200002000040100140056140056140056140060140052
80204140069104900000120001140040847751296902590103401003000320000301003000020000120445336692260121812420140031014005514005512948931299388010030200200003000060200200005000014005514005111502011009910040100100001000001002000002200000002000000200032101161113979140000141414200002000040100140056140056140036140052140052
80204140051104900000020001140040847751296902590103401003000320000301003000020000120445336692260121826931140031014005514005512950931299388010030200200003000060200200665000014005114005111502011009910040100100001000001002000000200000002000020200032101161113979540000141014200002000040100140085140056140056140056140056
80204140035104900000020001140040801301296732590103401003000320002301903009020000120530806692260122020670140193014003514005512950931299388010030200200003000060200200005000014005514005111502011009910040100100001000001002000002200004002000020000032101161113979540000141414200002000040100140056140056140121140086140036
802041400551049000000200011400408477512969325901034010530003200003010030000200001204453366922121218727011400140140057140139129572521300448056230200200003000060200200005000014005114005111502011009910040100100001000001002000002200000002000020200032101161113979540000141010200002000040100140056140053140102140065140058
802041400521049000000140001140043847781296702590100401003000320000301003000020000120441856692260121830491140031014005514005512950931299348010030200200003000060200200005000014005514005111502011009910040100100001000001002000002200000002000020200032101161113979540000101014200002000040100140052140052140097140410140056
80204140051104900000020001140036847851296902590103401003000320000301003000020000120445336692260121826931140031014005514005512950931299388010030200200003000060200200005000014003514005111502011009910040100100001000001002000002200000002000020200032101161113979540000141414200002000040100140056140136140057140052140052
8020414005510480000002010114004084801129686259010340100300032000030100300002000012044533669226012182693114003101400551401041295093129938801003020020000300006020020000500001400351400511150201100991004010010000100000100200000220000000200002020003210116111397954000001014200002000040100140036140052140121140054140056
80204140051104900010020101140036801521296862590103401003000320000301003000020000120448816692260121826931140031014005514005512950931299348010030200200003000060200200005000014005514005111502011009910040100100001000001002000002200000032000000200032101161113979540000141410200002000040100140056140114140073140056140056

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
80025140057104911100410114004284792129684259001640010300062000030010300002000012043909669216412181974014002914004114004112960131300268001030020200003000060020200005000014005714005311500211091040010100001000001020003202000300222000022223031403160221397934000010010200002000040010140109140120140067140063140042
80024140057104910100410114003880150129692259001640010300062000030010300002000012044257669235612183407014001714004114005712959731300148001030020200003000060020200005000014004114004111500211091040010100001000001020003222000400122000022220031402160221397814000031610200002000040010140097140060140080140072140058
8002414005310491010041021400268480112969025900164001030006200003001030000200001204390966915881218305111400171400571400571296153130030800103002020000300006002020000500001400411400571150021109104001010000100000102000322200030012200002202103140216022139797400000100200002000040010140042140054140093140420140042
800241400411049101004101140026801391296882590013400103000620000300103000020000120442576692356121830510140033140057140041129585313001680010300202000030000600202000050000140041140053115002110910400101000010000010200032220004000220000020210314021602213979740000666200002000040010140054140054140094140070140042
800241400531049101004101140026847781296762590013400103000620000300103000020000120428626691588121830510140033140053140053129601313002680010300202000030000600202000050000140057140053115002110910400101000010000010200042220003000220004020220320121602213985540000666200002000040010140054140054140085140074140058
8002414004110501000021011400428479012968840900134001030003200003001030000200001204425766924521218305101400171400411400571296023130026800103002020000300006002020000500001400571400531150021109104001010000100000102000322200020012200002222003140216022139793400001066200002000040010140042140042140171140058140054
80024140057104910000400014004280166129691259001340010300062000030010300002000012042862669235612181974014003314004114005312960131300268001030020200003000060020200005000014004114005311500211091040010100001000001020002222000300122000022222031402160221397974000010634200002000040010140061140102140055140054140042
8002414005310491100041021400388478012968825900164001030006200003001030000200001204425766923561218305101400171400411400571295853130030800103002020000300006002020000500001400571400531150021109104001010000100000102000322200020122200002222203140216025139797400000010200002000040010140058140100140043140398140042
800241400411049100002100140039847971296882590016400103000620000300103000020000120442576692356121819740140033140053140041129597313001480010300202000030000600202000050000140057140041115002110910400101000010000010200022220002001220000022200314021602213978340000000200002000040010140042140054140121140064140054
8002414005710491010020001400428480612969225900164001030006200003001030000200001204286266921641218305101400171400411400571295973130030800103002020000300006002020000500001400411400411150021109104001010000100000102000422200030022200000222003140216022139795400001000200002000040010140054140121140059140057140054

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.16b, v1.16b }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0055

retire uop (01)cycle (02)03090e0f191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
802051400511049100020100140043847791298802590103401003000320000301003000020000120491346717634121826931140031140055140055129489312993480332302002000030000602002000050000140035140051115020110099100401001000010000110020000022000002002000022003210116111397954000010014200002000040100140052140056140056140056140056
8020414003510490000200011400208477912969025901204010030003200003010030000200001204418566922121218295111400271400551400511295093130147801003020020000300006020020000500001400551400511150201100991004010010000100001100200000220000000020000220032102161113979140000101014200002000040100140056140052140056140056140052
80204140055104900002000114004084779129690259010340132300032000030100300002000012044701669226012183037114003114005514005512950944129939801003020020000300006020020000500001400551400525150201100991004010010000100001100200000220011000020000020032101161213979540000141414200002000040100140056140056140055140434140056
802041400571052000020000140410847791296702590103401003000320000301003000020000120441856692068121826930140031140051140055129509312993880100302002000030000602002000050000140413140051115020110099100401001000010000010020000022000000002000022003210116101397954000001414200002000040100140055140036140052140052140056
8020414005110490000200001400208477912968625901034010030003200003010030000200001204283266912901218269311400311400351400351295093129934801003020020000300006020020000500001400551400351150201100991004010010000100000100200000220000000320000200032101161113979140000101010200002000040100140052140056140056140056140056
802041400581049000028700111400409264112968625901034010030003200003010030000200001204715066922601218277911400311400351400511295053129938801003020020000300006020020000500001400511400511150201100991004010010000100001100200000020000000020000220032101161113979540000141014200002000040100140037140421140059140052140052
8020414003510480110200001400408477912968625901034010030003200003010030000200001204453366922601218269311400271400411400551295053129938801003020020000300006020020000500001400351400351150201100991004010010000100001100200000220000000020000220032101161113979540000141414200002000040100140052140052140056140056140056
8020414005110490000200001400408477912968625901034010030003200003010030000202001204453366922601218269301400311400551400551295093129918801003020020000300006020020000500001400351400521150201100991004010010000100001100200000220000000020000200032101161113979540000141014200002000040100140036140056140052140056140056
8020414005510490000000111400408477912968625901004010030003200003010030000200001204453366920681218269311400311400351400511295093129934801003020020000300006020020000500001400511400511150201100991004010010000100001100200000220000000020000220032931161113979140000141014200002000040100140053140086140052140052140056
80204140055104900001643520011400408477912969025901034010030003200003010030000200001204283266912901218269311400311400551400551295093129938801003020020000300006020020000500001400551400511150201100991004010010000100001100200000220000000020000220032101161113979540000141410200002000040100140056140056140056140052140056

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd0d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
800251400531049101011000401031400328013012967025900104001030003200003001030000200001204347166918761218251711400270140035140051129614313002480010300202000030000600202000050000140035140047115002110910400101000010000010200000220000000200002020000357619288000161014218240209066200002000040010142652143088142831142994142602
80024143203107301120003633462211441011436169668413116193590519402583014720064337023369021650122015866774372123551690143640014458614385813089448313212489900342982291434743680182315256510144419140946115002110910400101000010000010200794020000209865520060222040036310134010007513978840000000200002000040010140058140058140058140058140058
80024140057104910100010040001140026801381296922590013400103000320000300103000020000120442576692356121834071140033014005714005712960131300148001030020200003000060020200005000014006114004111500211091040010100001000011020003202000200220000220210031400716000571398924000014014200002000040010140062140062140062140062140062
8002414006110491000100002001014004280139129696259001340010300062000030010300002000012044257669235612183763014003731400611400411296052813003480010300202000030000600202000050000140061140057115002110910400101000010000010200033020002012200002222200314007160305713984240000101010200002000040010140042140058140062140062140062
800241400611049101000000800011400428478512969225900164001030006200003001030000200001204460566925481218197401400370140061140041129605313003480010300202000030000600202000050000140057140057115002110910400101000010000110200033220004012200002222100314007160005714008240000141414200002000040010140062140042140042140062140042
80024140057104910100000020010140046847851296962590013400103000620000300103000020000120428626691588121819741140033014005714004112958531300348001030020200003000060020200005000014005714004111500211091040010100001000001020003322000400220000222220031400716000571398424000001010200002000040010140058140058140102140042140062
80024140057104910101000013001114004284781129696259001640010300062000030010300002000012044257669158812183407014003701400611400571295853130034800103002020000300006002020000500001400571400571150021109104001010000100000102000440200030122000002020003140051600057139800400001000200002000040010140058140058140058140058140042
80024140057104910000000020011140042801381296962590016400103000320000300103000020000120442576692548121819741140033014004114006112958531300348001030020200003000060020200005000014004114005711500211091040010100001000001020002222000301220000022200031400516000571397934000014014200002000040010140058140042140062140058140062
80024140106104910100000080000140026847851296762590016400103000620000300103000020000120446056691588121834071140017014006114004112960531300308001030020200003000060020200005000014005714005721500211091040010100001000001020003302000301220000220200031400516000751398064000010100200002000040010140042140058140062140062140058
800241400411049100000000700101400468478112969625900164001030006200003001030000200001204286266925481218197411400170140041140061129585313001480010300202000030000600202000050000140041140057115002110910400101000010000010200033220003022200002222100314007160005713984240000101414200002000040010140062140049140063140062140062

Test 4: throughput

Count: 8

Code:

  ld2 { v0.16b, v1.16b }, [x6]
  ld2 { v0.16b, v1.16b }, [x6]
  ld2 { v0.16b, v1.16b }, [x6]
  ld2 { v0.16b, v1.16b }, [x6]
  ld2 { v0.16b, v1.16b }, [x6]
  ld2 { v0.16b, v1.16b }, [x6]
  ld2 { v0.16b, v1.16b }, [x6]
  ld2 { v0.16b, v1.16b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320205800555991011000570102800262553253201521001600541600001001600001600005008013861922548080022080041800410323320100200160000160000200160000320000800418004111802011009910010080000800000100160013144316005200151160039615243131510911711800380131321600001600001008004280042800428004280042
320204800415991001000580102800260553253201541001600561600001001600001600005008013721922540180022080041800410323320100200160000160000200160000320000800418004111802011009910010080000800000100160014134316005200052160039615243130510911711800380131321600001600001008004280042800428004280042
320204800415991110000580102800262553253201541001600541600001001600001600005008013801922554080022080041800410323320100200160000160000200160000320000800418004111802011009910010080000800000100160012144316005100112160039615143132510911711800380131321600001600001008004280042800428004280042
320204800416001011100570102800262553253201561001600521600001001600001600005008013871922528080022080041800410323320100200160000160000200160000320000800418004111802011009910010080000800000100160013134316005200152160039015243130510911711800380131321600001600001008004280042800428004280042
320204800416001001000130102800262053253201561001600881603801001600001600005008013861922582080022080041800410323320100200160000160000200160000320000800418004111802011009910010080000800000100160014124316005300052160039615243131510911711800380131321600001600001008004280042800428004280042
320204800415991101000130102800262553253201541001600541600001001600001600005008013801922554080022080041800410323320100200160000160000200160000320000800418004111802011009910010080000800000100160013144316005201151160039615143131510911711800380131321600001600001008004280042800428004280042
320204800416001011000580102800262053253201541001600541600001001600001600005008013801922554080022080041800410323320100200160000160000200160000320000800418004111802011009910010080000800000100160013144316001300152160039615243120510911711800380131321600001600001008004280042800428004280042
320204800415991110000570102800262553253201541001600541600001001600001600005008013801922554080022080041800410323320100200160000160000200160000320000800418004111802011009910010080000800000100160012134316005400152160039615243120510911711800380131321600001600001008004280042800428004280042
320204800415991001000570102800262553253201561001600561600001001600001600005008013861922528080022080041800410323320100200160000160000200160000320000800418004121802011009910010080000800000100160014144316005300152160039615243121510911711800380131301600001600001008004280042800428004280042
32020480041600111100058010280026255325320154100160054160000100160000160000500801380192254008002208004180041032332010020016000016000020016000032000080041800411180201100991001008000080000010016001513016005401155160039615243131510911711800380131321600001600001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfl1i tlb miss demand (d4)d5map dispatch bubble (d6)d9daddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200258006860000010000003801000800260550253200241016005416000010160000160000508013861920154080022800418004103233200102016000016000020160000320000800418004111800211091010800008000010160000035160032000016003261035000502013017001717800380000160000160000108004280042800428004280042
3200248004160010010000004200002800262003253201661016005216000010160000160000508013831922528080022800418004103233200102016000016000020160000320000800418004111800211091010800008000010160014144316005100012160000615143132050201321700141780038014100160000160000108004280042800428004280042
32002480041599100100000038000028002620120253200521016000016000010160000160000508008531921728080022800418004103233200102016000016000020160000320000800418004111800211091010800008000010160000001600000000160036603600005020127170017178003801400160000160000108004280042800428004280042
320024800416001001000000000000800262121202532005210160038160000101600001600005080085319210520800228004180041032332001020160000160000201600003200008004180041118002110910108000080000101600000351600320003216000061364000050201191700161480038014140160000160000108004280042800428004280042
3200248004160010010000004200000800262121202532005210160042160000101600001600005080000019217600800228004180041032332001020160000160000201600003200008004180041118002110910108000080000101600000351600000003616000060364000050201281700171680038014140160000160000108004280042800428004280042
32002480041600100100000042010008002625032532006410160054160000101600001600005080138019225540800228004180041032332001020160000160000201600003200008004180041118002110910108000080000101600131343160053000131600000152012105020130170014178003810140160000160000108004280042800428004280042
3200248004159910010010004200003800262500253200661016005616000010160000160000508000481920152080022800418004103233200102016000016000020160000320000800418004111800211091010800008000010160012134316005300113160039015143131050201271700171480038013132160000160000108004280042800428004280042
32002480041600110200110058000028002605032532006210160052160000101600001600005080004219201440800228004180041032332001020160000160000201600003200008004180041118002110910108000080000101600131443160055001551600406013012105020126170016178003800100160000160000108004280042800428004280042
3200248004159910010000004200002800262000253200521016004216000010160000160000508008531921760080022800418004103233200102016000016000020160000320000800418004111800211091010800008000010160013124316005100151160039005243132050201311700181880038014100160000160000108004280042800428004280042
32002480041599100100000038010028002620120253200101016004216000010160000160000508008531921076080022800418004103233200102016000016000020160000320000800418004111800211091010800008000010160000001600360000160036013240000502012517001717800380000160000160000108004280042800428004280042