Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, post-index, 4S)

Test 1: uops

Code:

  ld2 { v0.4s, v1.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.006

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e23243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
640052962923711610180110080047002907800017296500810002008200010002000200050001000023916522862292702946231050002000200030034000292792943721610011000100002010202004004922004440200131679371698230746382056433163813936392884810011582913219144642000200010002961829501294912952029488
640042956623619221411011039200453229071000171355006100120062000100020002000500010000239205229392934329587949500520022000300040042944329360216100110001000020053620060640420026242301322793866941315834120650337138201440372878710001618513215145142000200010002934029286296962945729487
6400429739239113111300000110046202905802017442500810002006200010002000200050001000023871122860292012936331050002000200030004000291842925411610011000100002003270020073142000244211816134189606692931596392050733273815337402867710001645813114144162000200010002926729313293912941229309
6400429330236114011710000201473728923002171495008100020022000100020002000500010000238723228892922529366310500020002000300040002917429275116100110001000020043420070142000624210131439530690931307352039632983820740372859510001597513046144212000200010002940529405293572931429463
6400429357237114108100008004600289452001722850021000200220001000200020005000100002382842286329171293123105000200020003000400029309292591161001100010000200334200301420004242201319394556885315263820321323238191035342868010001626513030142582000200010002933729339293662931529406
640042940223611001120000014004777288770201708150081000200620001000200020005000100002390222289929343294213105000200020003000400029208292211161001100010000200330200500220004242101307194956919313073420531327438181240372862510001625413138144982000200010002939129432295042937529404
6400429361236112001010000900460828977000172305002100020022000100020002000500010000239114228682921429287310500020002000300040002939629268116100110001000020023620032122002024210133289478697231644362043232353820733312866710001615513131144372000200010002939529428293982942329377
6400429434236190112100009004684288710101729250021000200220001000200020005000100002391402289629229294633105000200020003000400029147292661161001100010000200224200301520004242101300695176920316343320401329938161037372861010001608113376144902000200010002946829437293892941829374
640042948923611111131110014004711290820001719050061000200820001000200020005000100002389222285729308293193105000200020003000400029387292911161001100010000200434200311220000242101314193906912318263420469335038191034372864210001601813118143852000200010002938229398293762940529351
6400429386236115001110000900463829035000171365002100020022000100020002000500010000238164228712926129513310500020002000300040002928829270116100110001000020033420040022000424210132709469698131149342037733283819532392863410001616813187141152000200010002939929436293452945229425

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.4s, v1.4s }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
80205140057108610000000110011400361273001296862510010350100300032000040100300002000016078646669206812182337114002714005114005212950531299349010030200200003000060200300005000014005114004711502011009910040100100001000001002000002200000000200002020003210116111397915000010610200002000050100140052140052140052140052140052
802041400471086000000002001140039127300129686251001065010030006200004010030000200001607853466920681218233701400271400511400511295063129934901003020020000300006020030000500001400521400471150201100991004010010000100000100200000220000000320000200000321011611139792500001060200002000050100140052140055140048140053140052
8020414004710860000000060011400361273001296862510010050110300112000040100300002000016078650669206812182337014001114005114005112950631299369010030200222323167260200300005000014005414004711502011009910040100100001000001002000002200000023200002000003210216111397955000010010200002000050100140052140036140052140052140052
802041400511086000000001460011400201273001296861251001065010030006200004010030000200501607853466912901218233701400281400511402601295483129918901003020020000300006020030000500001400511401211150201100991004010010000100000100200040220000000020000000000321011611139797500096612200002000050100140036140036140131140064140052
802041400371086100001001600014003212730012967025100103501003000320000401003009020000160768646692068121823370140027140051140051129501131299349010030200200003000060200300005000014003514014211502011009910040100100001000001002000002200020100200002022003210116111397915000010010200002000050100140052140053140052140147140052
80204140051108500000000200114003612730012972325100106501003000620000401003000020000160785346691434121823370140027140035140047129501312993490100302002000030000602003000050000140052140052115020110099100401001000010000010020000002008801032000020000032101161113979150000111010200002000050100140052140052140052140052140053
80204140035108600000000380011400361273001296862510010350100300002000040100300002000016078534669216412182337114002714005114004712950131299349010030200200003000060200300005000014014514003511502011009910040100100001000001002000000200000003200000020003210116111397755000010612200002000050100140147140052140052140052140052
802041400471085000000000001140036127300129686251001065010030003200004010030000200001607807066945181218233701400281400511400511295053129934901003020020000300006020030000500001400511400351150201100991004010010000100000100200000220000000020000202000321011611139775500101060200002000050100140048140053140090140036140036
802041400511086000000006001140036127300129727751001035010030003200004010030000200001607853466912901218269301400231400511400511295093129934901003020020000300006020030000500001400511400471150201100991004010010000100000100200000220000000320000202000321011611139791500006610200002000050100140036140052140053140052140052
80204140035108600010100217601140321126287129799102100138501313001420002404603036020100162239806765336123071010140186140137140242129593241300849062430481202483009360946302795031014031914021941502011009910040100100001000001002005802200040077660200082000203293132311398505000010100200002000050100140036140048140052140052140036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f23243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
8002514006111251110000003400011401321273061296962510001350010300032000040010300002000016078712669235612183407114003414005814005812960131300319001030020200003000060020300005000014005714005711500211091040010100001000001020003322000001232000022221031401216121213980150000141010200002000050010140063140058140058140062140052
8002414005511250000010102000014003612730012973225100013500103000320000400103000020000160782526692260121828730140027140056140055129599141300089001030020200003000060020300005000014005114005211500211091040010100001000001020000022000000025502000020200031401116121013979150000101010200002000050010140053140151140052140052140052
8002414005511250010000005000014003612730012968625100016500203000620000400103000020000160786006692356121819740140033140057140057129601313003090010300202000030000600203000050000140061140057115002110910400101000010000010200023220000000020000202012031401116111213979250000101310200002000050010140056140052140052140052140052
80024140051112400000000017880001400401273011296862510001050010300032000040010300902000016077904669226012182873014010714005514005512959531300299001030020200003000060020300005000014005814005111500211091040010100001000001020004022000001002000020200031401316131013979150010101010200002000050010140052140052140052140053140052
8002414005511250000000002000014003612730112968712610001350010300032000040010300002005016077904669206812182873014003114005114005512959931300249001030020200623000060020300005000014005714005111500211091040010100001000001020000022000000032000020200031401216121313979150000101010200002000050010140053140053140052140052140139
80024140057112400000110080000140036127300129686251000135001030003200004001030000200001607835266944221218287301400311400561400511295953130024900103002020000300936002030000500001400521400511150021109104001010000100000102000002200000000200002020003140121691313977550000101010200002000050010140052140053140052140052140052
800241400511124000000000110000140036127300129691251000135001030003200004001030000200001607790466920681218670801400291419741439291298773130024900103002020000300006002030000500001400361401441150021109104001010000100000102000002200000306200022020003140111691313979150000101010200002000050010140052140058140053140052140057
80024140051112500000000020001140036127300129686251000135001030003200004001030000200001607790466920681218287301400271400511400981295993130028900103002020000300006002030000500001400521400551150021109104001010000100000102000002200000009200002020003161816111113979550000101010200002000050010140052140056140052140052140052
800241400511125000000001500001400401273051296892510001350010300032000040010300002000016077904669206812182873014003114005114005112959831300249001030020200003000060020300005000014005114005111500211091040010100001000001020000022000001002000020200031631316121113979150000121410200002000050010140054140052140052140052140052
800241400511125000000001200001401241270931296902510009350020300032000040010300902000016084038669226012182962014003114005114005112959931300699027030020201243000060206300005000014014214005421500211091040010100001000001020004022000200022000022223031701316121213986550010101014200002000050010140062140043140150140059140137

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.4s, v1.4s }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f18191e1f23243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
8020514004710490000006000114002012729712967025100103501003000320000401003000020000160785346692116121823370140027140051140052129505312993590100302002000030000602003000050000140051140035115020110099100401001000010000010020002002020000132000020003210216111397915000010610200002000050100140052140054140052140052140052
80204140047108500000028800014002012730112968625100103501003000320000401003000020000160766406692212121823370140027140051140051129505312991890100302002000030000602003000050000140056140051115020110099100401001000010000010020000002020000102000022003210116111397915000010010200002000050100140036140048140036140052140052
80204140035112500000020000140036127297129686251001035010030003200004010030000200001607664066920681218124201400271400351400351295053129934901003020020000300006020030000500001401371400511150201100991004010010000100000100200000020200000020000220032101161113979150000600200002000050100140052140038140141140048140052
802041400511125000100600011400201272971296862510010350100300032000040100300002000016078534669206812181242014002814003714014312948931299189010030293200003000060200300005000014005414005111502011009910040100100001000001002000000202000043200002200321011611139775500001066200002000050100140054140052140065140052140052
8020414005111240001000000014003612730012968225100103501003000320000402203000020000160766406692068121823370140027140035140055129505312993590100302002000030000602003000050000140140140051115020110099100401001000010000010020000002020000025132000022003210148111397755000010610200002000050100140128140052140052140052140141
802041400381125000000200001400361273021296862510010050100300002000040100300002000016076752669206812182337014002714005314005112950531299389010030200200003009860200300005000014005814005211502011009910040100100001000001002000000002000000200000000321011611139791500006610200002000050100140052140048140055140052140052
8020414005311250000912010114003612730212967050100103501003000320000401003000020000160766406692068121812420140027140051140056129489313003890100302002000030000602003000050000140144140051115020110099100401001000010000010020000002020000002000022003210216111397755000013610200002000050100140053140052140052140053140036
802041400371125000000140101140036127300129686251001035010030007200004022030000200001607697666912901218233701400271400521400511295053129949901003029320000300006020030093500001400511400471150201100991004010010000100000100200120020200131127032001022003230156121402875007210010200002000050100140676140578140447140650140134
80204140733113000000391471210114003612730712972725100119501003000320000401003000020000160786506692068121823370140028140054140051129506312994090100302932000030000602003000050000140041140052215020110099100401001000010000110020000002020000002000002003210116211397915000010010200002000050100140048140054140036140139140054
8020414005311240000101400011401231261591296872510011950100300032000040100300002000016078070669206812186377014002714005114003712954131299669036030293206823009360762301865232514023614032641502011009910040100100001000001002001100202000072550200022200322912420139907500000810200002000050100140230140052140055140054140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0055

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f22243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
8002514005110850010000170001140020127300129686251000135001030003200004001030000200001607790466922601218287301400310140057140055129601313002890010300202000030000600203018650000140055140051115002110910400101000010000010200000020000000200002020003142503162213979550000141010200002000050010140056140036140053140052140056
80024140148108500000001340000140040127303129692251000135001030003200004001030090200001607836866922601218287301400310140055140055129595313002890010303922012430000600203000050000140055140055115002110910400101000010000010200000220000100200002020003144503163213979150000141014200002000050010140056140056140052140052140052
800241400551085000000020001140036127302129690251000135001030003200004001030000200001607601066924041218287301400370140061140061129606313003490010300202000030000600203000050000140064140064115002110910400101000010000010200022220003105200002202003142503163313980250000141414200002000050010140062140042140062140062140062
8002414006110851000000160000140046127310129696251000165001030006200004001030000200001607906466923561218340701400390140041140061129601313003490010300202000030000600203000050000140061140057115002110910400101000010000010200022220002012200002020003140503163313979150000141014200002000050010140056140056140056140036140056
80024140056108600000002000114003612730412969225100013500103000320000400103000020000160783686692260121828730140034014005514005512959931300289001030020200003000060020300005000014005614005611500211091040010100001000001020000022000000020000002000314050316331397955000014014200002000050010140056140056140058140057140056
800241400551086000000020001140036127305129691251000135001030003200004001030000200001607836866922601218287301400320140055140055129599313003190010300202000030000600203102356045143905141449115002110910400101000010000010200000220000002530200002020003140502243213979550000141012200002000050010140056140056140036140056140056
80024140058108500000005000114004012730012969125100013500103000420000400103000020000160783686691290121828730140031014005514005812959931300289001030392200623000060020300005000014005514005511500211091040010100001000001020000022000000020000002000314050316231397955000001414200002000050010140052140053140052140056140036
800241400511086000000014000114004012730412972825100010500103000320000400103000020000160783686691290121832290140031014005514005512959931300289027030020200003000060020300005000014005514003611500211091040010100001000001020000002000020020000202000314050316331397955000001014200002000050010140134140036140056140056140052
80024140055108600001002000114004012730812968625100013500103000320000400103009020000160760106692260121839880140027014003514005512959931300289001030020200003000060020300005015514005914005211500211091040010100001000001020000022000010320000202000314250324321398475002001014200002000050010140150140148140056140134140142
800241401281086011101913988800014032412491112978812810004250050300112003843252326102135016238894676619212301058014021501403171403281296353413008091052302992012430279602063037250465140249140327415002110910400101000010000010200000220000006200002020003140502164313979150000141010200002000050010140036140058140056140036140052

Test 4: throughput

Count: 8

Code:

  ld2 { v0.4s, v1.4s }, [x6], x8
  ld2 { v0.4s, v1.4s }, [x6], x8
  ld2 { v0.4s, v1.4s }, [x6], x8
  ld2 { v0.4s, v1.4s }, [x6], x8
  ld2 { v0.4s, v1.4s }, [x6], x8
  ld2 { v0.4s, v1.4s }, [x6], x8
  ld2 { v0.4s, v1.4s }, [x6], x8
  ld2 { v0.4s, v1.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)67696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32020580070643000000360000817942121227794004328016016014616011680216160130160124481182100372121553160800238026280153482760400410200160124160124200240186320248802658015531802011009910010080000080000100160104331601360010991601416137410111514402500801351800601010160000160000801008026880160802448015582116
32020482022659011011168176000801402121254514006888022316025416011680108160124160016480860960415208520808011580371801546927604004102001600161600162002400243200328004380043118020110099100100800000800001001600024116003110371600386029410111511601600801351800061010160000160000801008004580045800458063880044
32020480044643000000350100800282121202640015480106160046160008801081600161600164805389607342080136080023800438004406144001402001600161600162002400243200328004480043118020110099100100800000800001001600023316003900351600396130410111511701600800400800061010160000160000801008004480044800448004480045
320204800436420000005500008002921212026400154801061600401600088010816001616001648053896073620811460800238004480043697144001402001600161600162002400243200328004380044118020110099100100800000800001001600023316003900301600316130330111511701600800410800061410160000160000801008004480044800458004580044
32020480043643000000420002800292121202540015680106160042160000801001600001600004804999603312080976080023800428004203244001002001600001600002002400003200008004280042118020110099100100800000800001001600003316003700301600306130330000510911711800390800001014160000160000801008004380043800438004380043
3202048004264300010035000280027212120254001368010016004216000080100160000160000480499960328208096008002380042800420324400100200160000160000200240000320000800428004211802011009910010080000080000100160000411600291001600296130330000510911711800391800001410160000160000801008004380043800438004380043
3202048004264300000035000280027212120254001388010016004416000080100160000160000480499960361208097608002380042800560324400100200160000160000200240000320000800428004211802011009910010080000080000100160000331600371037160029613741000051091171180039180000010160000160000801008004380043800438004380043
32020480042643000000360000800272121202540013680100160044160000801001600001600004804999608522080972080023800428004203244001002001600001600002002400003200008004280042118020110099100100800000800001001600003316003000301600006130410000510911711800390800001010160000160000801008004380043800438004380043
3202048004264300000035001080027212002540014280100160036160000801001600001600004804999606602080960080023800428004203244001002001600001600002002400003200008004280042118020110099100100800000800001001600003316003000371600296030330000510911711800390800001410160000160000801008004380043800438004380043
320204800426430000000000280027212002540014280100160042160000801001600001600004804999603302080982080023800428004293244001002001600001600002002400003200008004280042118020110099100100800000800001001600003316003700371600376130420000510911711800390800001010160000160000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)67696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320025800566420000000000570000280027201502540005680010160028160000800101600001600004800499641772080512080023800428004203244000102016000016000020240000320000800428004211800211091010800008000001016001110401600470004616002261472510005019217228003908000099160000160000800108004380043800438004380043
32002480042643100001110011000008002730140254000228001016001016000080010160000160000480047960922208181008002380042800420324400010201600001600002024000032000080042800421180021109101080000800000101600111001600470011016003661114010105019217228003908000099160000160000800108004380043800438004380043
32002480042643100100000054000008002721315025400062800101600461600008001016000016000048004296093120821460800238004280042032440001020160000160000202400003200008004280042118002110910108000080000010160010114016001200050160037614640110050192172280039080000912160000160000800108004380043800438004380043
320024800426431000000000520100080027014002540006280010160048160000800101600001600004800499610262081872080023800428004203244000102016000016000020240000320000800428004211800211091010800008000001016001211401600460004616003661494010105019217228003908000099160000160000800108004380043800438004380043
3200248004264310110000001000012800270000254000628001016004816000080010160000160000480049972999208183408002380042800420324400010201600001600002024032432000080042800421180021109101080000800000101600111240160011010471600366110010105019217228003908000000160000160000800108004380043800438004380043
3200248004264410010100001100000800273141402540005880010160052160000800101600001600004800499659042081986080023800428004203244000102016000016010820240000320000800428004211800211091010800008000011016001311401600460114716003760104011005019217228003908000090160000160000800108004380043800438004380043
320024800426431001000000590000280027214140254000588001016005216000080010160000160000480371960945208196808002380042800420324400280201600001600002024000032000080042800421180021109101080000800001101600111040160046001481600360147010105019217228003908000090160000160000800108004380043800438004380043
3200248004264310010110005600002800273141405140006280010160048160000800101600001600004800499683522082000080023800428004223324400010201600001600002024000032000080042800421180021109101080000800000101600121042160047000491601386147010005019217228003908000099160000160000800108004380043800438004380043
32002480042643100001100058000028002731414025400058800101600521600008001016000016000048004996730020818920801158004280042032440001020160000160000202400003200008004280042118002110910108000080000010160011104016001102014160037614743111050192172280039080000119160000160000800108004380043800438004380043
320024800426431001010000160000280027214002540005880010160044160000800101600001600004800499656372086142080023800428004203244000102016000016000020240162320000800428004211800211091010800008000001016001210401600490004716003661474010105019217228003908000099160000160000800108004380043800438004380043