Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, post-index, 8H)

Test 1: uops

Code:

  ld2 { v0.8h, v1.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.004

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.004

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6400528951232023012000100601047642862600216805500410002004200010002000200050001000023814022801028791288703105000200020003000400028709288821161001100010001200006200200220044240001305094136929316195819824324838233158552836010001584912803140122000200010002886228854288562892228892
64004289482310190022000004000477728442020166325004100020042000100020002000500010000238083228470287782896331050002000200030004000287472880511610011000100002000042002002200420400013201943569043165105419876329938222658502835010001563912708138082000200010002878728918288122887028808
6400428953231017001600000401046982851200016734500610002004200010002000200050001000023807522846028733287473105000200020003000400028622287171161001100010000200004200000020000040001313792516927309775119881326038032554482820810001560212748140152000200010002878228782289392880028833
64004288162310180019000000000483728566020166865006100020062000100020002000500010000238501522853028691288173105000200020003000400028785289281161001100010000200004200200020004260001329896116970312175719979330438242365562834310001584912682139612000200010002882728904290082898928968
6400428811232023001800000400048202860402016761500610002000200010002000200050001000023888222925028677289523105000200020003000400028774287301161001100010000200004200200220024240001326695266937321096019883323838222255562835610001585012704137552000200010002881628916289182888828861
64004287932320170018000004000465828564000167385006100020042000100020002000500010000238121422898028686289253105000200020003000400028849287521161001100010001200004200200420004240001318394866992320566119707323238131646522830910001579112803139292000200010002894928858289482896528735
6400428822232024002000000500046122852202016723500610002000200010002000200050001000023884422819028656289213105000200020003000400028815288631161001100010001200004200200220002000001323295116937314355819841327538191752572840310001562012795140182000200010002895828880288732887528980
64004289342320160016000000000485128467000167135000100020062000100020002000500010000238060228700286652887331050002000200030004000287502868711610011000100002000002000000200040000013188950170053152135319833327738211653512823510001582712737136912000200010002888928914288592888128910
640042875623101200220000040004750285802021672750061000200620001000200020005000100002387002287702872528869310500020002000300040002890228757116100110001000020000420000022000364000131519430696431357631999231683819953512829810001573812825137842000200010002884928992288592890528964
6400429009232016001600000000046702863202216700500610002004200010002000200050001000023844022790028731288503105000200020003000400028756288111161001100010000200004200000420020000001317195686938314365919904316638172854532830010001552212501138542000200010002887728829289372887828874

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.8h, v1.8h }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0055

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)7amap int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
8020514005711251121000040000140037127260129671251001035010030003200024010030000200001608607066923561218269301140031140035140055129509312994090100030293200003009360200300005000014003514005221502011009910040100100001000001002000002200000002000220200032100116111397755000001014200002000050100140036140036140036140058140058
8020414014711250000000017000014004012730412969225100103501003000320000401003000020000160785346692260121828710014001114005714005512950931299389010003020020000300006020030000500001400551400871150201100991004010010000100000100200000220006021770820010202000337301241213985550074141014200002000050100140879140886140794141051140899
8020414078311320001001023520011400451249801297285010010350100300072000240220300002000016084460669226012182693001400311401531401351295143129938903600302932000030000603863000050000140151140114115020110099100401001000010000010020000002000000020000202000321001161213979650010101010200002000050100140056140056140159140058140057
802041401401086010000000880011400401272971296915210010350110300032000040220300002005016078998669480612182693001401071401421400551295383129940901000302002000030093602003009350000140139140051115020110099100401001000010000010020000222000000244520002202000321001161113987550000161014200002000050100140056140056140056140056140056
802041400551085000000001400011400361273041296902510010350100300032000040100300002000016079114669226012182693001400311400551400581295093129938901000302002000030000602003000050000140055140051115020110099100401001000010000010020000022000000020000202000321001162113979550011141014200002000050100140152140056140056140114140052
802041405061127010000102880011400411272971296905110010350110300032000240100300002005016076640669412612181242001400291401401400571295093129994903620302002006230000603863000050000140131140051215020110099100401001000010000010020000022000200020000000000321001161113986250010141014200002000050100140153140056140146140056140056
802041400561086010000102880021401361272971297375010010350112300002000040220300002005016078998669461412182693001400311400551400551295123129965903600302002000030000603863000050155140055140142115020110099100401001000010000010020000022000000020000202000321001161113979150000141014200002000050100140057140052140056140056140056
80204140051108600000000200011400361272971296902510010350100300032000040100300002000016079222669226012182693001400311400581400551295093129934901000302002000030000602003000050000140055140152115020110099100401001000010000010020000022000000620000202000321025116111397935000010610200002000050100140058140059140058140058140060
802041400421131100000012300011400241273001296865010010350112300032000240100300002000016084126669206812186860001400271400511400511295453129989901000302002000030000602003316250000140051140226115020110099100401001000010000110020004222000410520000222200321001241113979850000141010200002000050100140062140044140157140159140058
802041400571135110100001480001140040127305129690251001035011130007200004010030000200501607899866950461218269300140109140051140150129533312993490100030200200003000060386300005015514042214005711502011009910040100100001000001002000002200001062000020200032100232111397915000010610200002000050100140048140052140036140040140138

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
800251400481125000000000200114003612729712968625100013500103000320000400103000020000160779046691290121825170140027014005414005112958031300209001030020200003000060578300005000014005114004711500211091040010100001000001020000022000000020000020031400210168131397915000010610200002000050010140052140056140052140052140048
80024140035112500000000050011400361273021296862510001350010300032000040010300902000016076010669206812182517014001101400511400511295953130008900103002020062300006002030000500001400351400511150021109104001010000100000102000002200000002000022003168051216121313979150000060200002000050010140052140048140052140055140055
8002414005411240000000005001140036127300129690251000105001030003200004001030000200001607801666920681218712601400130140131140047129595313002490010300202000030000600203000050000140035140035115002110910400101000010000110200000220000003200002200314000111612121397935000010610200002000050010140052140052140036140053140052
8002414014411250000000003001140036127306129686251000135001030003200004001030000200001607790466922121218251701400460140051140054129595313002490010300202000030000600203000050000140051140146115002110910400101000010000010200000220000000200000200314000141611121397925000010611200002000050010140052140052140054140048140052
80024140051112400000000020011400361273001296862510001350010300032000040010300002000016077904669211612182517014003001400471400351295793130026900103002020000300006002030000500001400511400471150021109104001010000100000102000002200000002000022003140001416912139791500006100200002000050010140052140053140052140052140036
80024140035112500000000030001400361272971296872510001350010300032000040010300002000016076010669206812182689014002701400511400521295953130027900103002020000300006039230093500001400471400511150021109104001010000100000102000000200000002000022003140001216141513979250000101010200002000050010140036140052140052140052140052
80024140051112500000000080011400361286721296702510001350010300032000040010300002000016077904669206812182517014002301400511400511295953130020900103002020000300006002030000500001400471400501150021109104001010000100000102000002200000002000022003140001024101313979150000101011200002000050010140048140048140052140052140052
8002414004711240000000005001140039127300129687251000135001030003200004001030000200001607790466912901218251701400120140035140051129595313002790010300202000030000600203000050000140051140047115002110910400101000010000010200000220000102550200002000314000131610131397915000010613200002000050010140048140052140052140053140054
80024140051112500000000050011400361273001296862510001350010300032000040010300002000016077904669206812181422014002701400511400511295793130024900103002020000300006002030000500001400511400471150021109104001010000100000102000002200001002000022003140001324101313986350000151011200002000050010140052140185140048140141140155
800241400491126000011010134176011400201251121298001291000455004030016200064025030270201001609195066985921219454131402450140232140421129727431301789079030392201863018660206302795046514023814049931500211091040010100001000001020006222001002756820008220032253201138131413987350010141014200002000050010140153140057140056140146140063

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.8h, v1.8h }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0f18191e1f3a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
8020514005710851100004021400591273061296922510010650100300062000040100300002005016079230669250012182871014003314005814005712951131299899010030200200003000060200300005000014005714005321502011009910040100100001000001002000422200020022000022221000321011611139781500009010200002000050100140042140058140061140058140042
802041400571086100000402140061127308129692251001065010030006200004010030000200001607945466915881218287101400331400411400571295193129940901003020020000300006020030000500001400411400531150201100991004010010000100000100200023220002005200002202100032101161113980050000101010200002000050100140058140042140058140058140058
80204140057108610000040214004312730612969525100103501003000620000401003000020000160792306691588121829600140037140057140057129513312994090100302002000030000602003000050000140057140054115020110099100401001000010000010020002422000201220000222200003210124111397975000011610200002000050100140058140059140042140058140042
802041401281086110100400140058127302129692251001065010030006200064034230090200001608517566923561218296001400331400571400571295111412992490100302002000030000602003000050000140057140057115020110099100401001000010000010020002222000200220002222200003210116111397975001110010200002000050100140061140058140236140058140058
80204140057108610000040014013612730212972925100106501003000620000402203000020000160792306692356121828710140033140058140058129511312994090100302002000030093602003000050000140057140053115020110099100401001000010000010020003322000310220000222200003210116121398005000010610200002000050100140139140058140058140058140042
8020414005710861100004882140047127306129692511001065010030010200004010030000200001607923066923561218287101400351400531401451295113129940901003020020062300006020030000500001400611400571150201100991004010010000100000100200032220004001720000222210003230116111397975000010010200002000050100140058140043140058140058140058
80204140141108610110013402140043127306129692251001065010030006200004010030092200501608516466923561218287101400331400581400571295112312994090100302002000030000602003000050000140156140053215020110099100401001000010000010020002222000301220002020200003210116111397815000015610200002000050100140059140147140042140059140058
80204140057108511001040214014112712512969225100106501003000620000401003000020000160793466691588121871220140033140057140057129511312999990100302002000030000602003000050000140057140143115020110099100401001000010000010020003322000300520000220210003210116111397975000010010200002000050100140061140061140058140138140058
80204140053108512201010021400551273061297162510010350100300102000040100300002000016079230669158812186059014009814005714005712949531299409010030200200003000060200300005000014005714013911502011009910040100100001000001002000222200030025022000022021000323911611139797500001000200002000050100140058140060140058140058140058
8020414005710851100102011400431273081296922510012250100300062000240100300002000016079230669235612182871014003314005714014812951113129940903603020020062301866020031581551151402341402373150201100991004010010000100000100200465220014031019220008222200003269224341398695007010010200002000050100140313140338140236140338140200

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0055

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd2d5map dispatch bubble (d6)d9ddfetch restart (de)dfe0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
80025140047112500000000170001140036127300129687251000135001030003200004001030000200001607790466920681218251711400271400351400481295953130024900103002020000300006002030000500001400511400351150021109104001010000100001102000000200000032000020200314005160440139787500001006200002000050010140052140036140036140048140532
800241405801128000000446873680011404731272971301121541000935007030027200104073030540203001610935866961421218798611400721404881400521295963130008900103002020000300006002030000500001401441400351150021109104001010000100000102000812200000032000000000314004160550139791500006610200002000050010140052140052140052140052140052
800241400511125000000002000114003612867112968225100013500103000320000400103000020000160780166692068121825171140027140051140051129596313002090010300202000030093600203000050000140051140047115002110910400101000010000010200000220000000200000020031400516043013979150000101010200002000050010140055140052140052140052140053
8002414005111240000000050001140036127300129670251000135001030003200004001030000200001607790466920681218251701400111400351400351295983130024900103002020000300006002030000500001400471400471150021109104001010000100000102000002200000032000020201316005160550139791500001060200002000050010140149140054140053140054140052
800241400511124000010002000114003612730112968725100013500103000720000400103000020000160779046691924121946590140027140035140051129595313001390010300202000030000600203000050000140035140051115002110910400101000010000010200000220000100200002000031400416054013979150000101010200002000050010140053140052140036140053140053
800241400471125000000001700011400391273001296872510001350010300032000040010300002000016077904669206812181422014002814005314005112959531300249001030020200003000060020300005000014014914005111500211091040010100001000011020000022000000020000202003140051605401397915000010610200002000050010140036140052140052140052140052
800241400511125000000101700011400201273001296872510001350010300032000040010300002000016078016669206812182603014002714003514005112959531300219001030020200003000060020300005000014005414004711500211091040010100001000001020000022000010020000002003140051605401397915000001010200002000050010140052140052140052140098140054
8002414005111250000000000000140020127297129686251000135001030003200004001030000200001607802066920681218251711400271400521400511295793130024900103002020000300006002030000500001400471400471150021109104001010000100000102000002200000062000020200314005160550139791500006610200002000050010140052140052140048140036140052
80024140051112500000000500011400361273011296862510001350010300032000040010300002000016077904669216412182606014002814005114005112957931300249001030020200003000060020300005000014005214005111500211091040010100001000001020000002000010020000200003140041604401397785000010010200002000050010140055140052140052140052140053
80024140035112400000000500011400361273021296875110001350010300032000040010300002000016077904669274212182517014001114004714005112959831300249001030020200003000060020300005000014005114005111500211091040010100001000001020004022000000020000202003140051604501397915000061211200002000050010140052140053140053140052140053

Test 4: throughput

Count: 8

Code:

  ld2 { v0.8h, v1.8h }, [x6], x8
  ld2 { v0.8h, v1.8h }, [x6], x8
  ld2 { v0.8h, v1.8h }, [x6], x8
  ld2 { v0.8h, v1.8h }, [x6], x8
  ld2 { v0.8h, v1.8h }, [x6], x8
  ld2 { v0.8h, v1.8h }, [x6], x8
  ld2 { v0.8h, v1.8h }, [x6], x8
  ld2 { v0.8h, v1.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6067696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32020580071599100000006400028002700140254001548010016000016000080100160000160000480499960920208181600800230800428004203244001002001600001600002002400003200008004280042118020110099100100800008000001001600111040160032000531600366146401010005109117118003908000096160000160000801008004380128800438004380043
320204800426201000100010000280027214150254001528010016004816000080100160000160108480499960917208183200800230800428004203244001002001600001600002002400003200008004280042118020110099100100800008000001001600121240160021110471600366148400100051092171180039080000010160000160000801008004380043800438004380043
3202048004262000000000540002800273014025400148801541600101600008010016000016000048049996003020819220080023080042800420324400100200160000160000200240000320000800428004221802011009910010080000800000100160011110160048001101600376122401100005125117118003908000099160000160000801008004380043800438004380043
32020480042620000000015100018002730140254001528010016004616000080100160000160000480499960887208011400800230800428004203244001002001601081600002002400003200008004280042118020110099100100800008000001001600121140160046000111600366130401110005109117128003908000090160000160000801008004380043801548004380043
3202048004262010000000340002800273141402540014680100160048160000801001600001600004808219609052081758008002308004280042233244001002001600001600002002400003200008004280042118020110099100100800008000001001600101240160048000481600376146401110005223117118003908000099160000160000801008004380043800438004380043
3202048004262110010000650002800272000254001488010016001216000080100160000160000480499960884208177200800230800428004203244001002001601081600002002400003200008004280042118020110099100100800008000001001600111140160012011471600366110011100051091172180039080000010160000160000801008015280043800438004380043
3202048004262010000000530102801372141402540015080100160010160000801001601081600004804999608872081760008002308004280042032440010020016000016000020024000032000080042800421180201100991001008000080000010016011611401600121004716000061474011000051091171180039180054100160000160000801008004380043800438004380043
32020480152621100000005200028002721415025400148801001600481600008010016000016000048049996088220806240080023080042800420324400100200160000160000200240000320000800428004211802011009910010080000800000100160011042160046000291600296148421100005109117118003918000099160000160000801008004380043800438004380043
3202048004262110010000540002800270014225400146801001600521600008010016000016000048049996096220818080080023080152800420324400100200160000160000200240000320000800428004221802011009910010080000800000100160011110160048000471600366148401100005109117118003908000009160000160000801008004380043800438004380153
320204800426201001000010000280027214140254001368010016004616000080100160000160000480499960878208177200800230800428004203244001002001600001600002002400003200008004280042118020110099100100800008000001001600111240160104000461600366147401000005109217118003908000009160000160000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6067696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200258005764310000110079002800272141402540002080010160048160000800101600001600004800499608842081772008002308004280042032440001020160000160000202400003200008004280042118002110910108000080000010160010114016004801048160036614740103501921723800398000099160000160000800108004380043800438004380043
32002480042643101010100710028002721514025400056800101600481600008001016000016000048004996087420801260080023080042800420324400010201600001600002024000032000080042800421180021109101080000800000101600111240160048011541600366146401105021117218003980000109160000160000800108004380043800438004380043
32002480042643100110100552002800272151402540005680010160048160000800101600001600004800499608742081796008002308004280042032440001020160000160000202400003200008004280042118002110910108000080000010160010114016004600048160036614640100501911711800398000099160000160000800108004380043800438004380043
320024800426421010001005500380027201402540006280010160048160000800101600001600004800499608822081944008002308004280042032440001020160000160000202400003200008004280042118002110910108000080000110160011114016004913046160036614740110501921721800398000099160000160000800108004380043800438004380043
3200248004264210110012645200380027313140254000628001016004616000080010160108160000480049960887208177000800230800428004201324400010201600001600002024000032000080042800421180021109101080000800000101600111340160048002471600366148401005021217128003980000910160000160000800108004380043800438004380043
3200248004264210010010093003800272141405140005680010160050160000800101600001600004800499608742081772008002308004280042032440001020160000160324202400003200008004280153118002110910108000080000010160011114016004600151160037614940100502111722800398000099160000160000800108004380043800438004380153
3200248004264310110010053002800272141402540005480010160052160000800101600001600004800499608742081786008002308004280042032440001020160108160000202400003200008004280042118002110910108000080000010160113114016004700148160036614801115019217118003980000917160000160000800108004380043800438004380043
3200248004264310001010064003800273141402540005880010160052160000800101600001600004800499608842081972008002308004280042032440001020160000160000202400003200008004280042118002110910108000080000010160011114016004700147160036614740111501911712800398000099160000160000800108004380043800438004380043
3200248004264310001110071012800271141102540005680010160048160000800101600001600004800499609142081934008002308016280042032440001020160000160000202400003200008004280042118002110910108000080000010160011114016004800047160037614840110502121721800398000099160000160000800108004380043800438004380043
3200248004264310010010159003800272141432540005880010160052160000800101600001600004800499608882081778008002308004280042032440001020160000160000202400003200008004280042118002110910108000080000010160013104016004700048160037614940110502121712800398000099160000160000800108004380043800438004380043