Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDP (post-index, Q)

Test 1: uops

Code:

  ldp q0, q1, [x6], #0x10
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e1e202223293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200510407000013443100112102511311627212530001000200010002000247004520511015104010407293773300020002000104010401110011000100002044142108210624032022802115771066711203731161110381100027182000100010411041104110411041
2004104171100125361004010252333228212530001000200010002000243444518011016104010417283774300020002000104010401110011000100002042135882074180140075209662789911214731161110383100027172000100010411041104110411041
20041040811101093610023210262733202115253000100020001000200024416451860101510411041728377330002000200010401040111001100010000203305105205912024018652122481062100000731161110370100026262000100010411041104110421042
2004104170000153551000410252532722172530001000200010002000243724513701015104010417283774300020002000104110411110011000100002036051052088281590078211442746110003731161110373100020182000100010411041104310411041
20041040800001283700014102616349222253000100020001000200024308451691101510401040728377330002000200010401040111001100010000202403972083250340127920955585455002731161110372100023302000100010411041104110411041
200410418000014144100101025031102202530001000200010002000243364518811015104010407283773300020002000104110401110011000100002029041152092171350129320965084694001731161110372100022222000100010411042104210411042
200410417000013024100141026253408162530001000200010002000244244519111015104110417283773300020002000104010401110011000100002036018120661303501286210472858106003731161110371100024182000100010411041104210411042
2004104180000972810004102625331017192530001000200010002000243444516211016104010407283773300020002000104010401110011000100002025027420881704701065211862136763001731161110370100020252000100010411041104210411041
2004104070000119570001121025413129152525300010002000100020002427245168110151040104072837743000200020001040104011100110001000020250284207022045018622098441062111001731161110372100020242000100010421041104110411041
2004104180000134201002410252031721172530001000200010002000245404520411015104110417283773300020002000104010401110011000100002030026320592502301010221004794495001731161110380100027242000100010411041104110421041

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldp q0, q1, [x6], #0x10
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1227

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f404346494d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6022312122190310010000288502241100137611921212952200023423699952111135258040350278100842000040100100002000013987583578945235052831121240012138212140611346731137377010030200200001000060200200001000012120812142311502011009910040100100001000001002242003862273222024002272427240527226109619433500303210031633121071501864996092872000050100121325121325121370121196121309
602041215759071000000028200226110010481216121072220601662221001321109632580400502961008720000401001000020000139763335780956349800501213153121457121450113230291139127010030295201281000060200200001000012154012102331502011009910040100100001000001002241303842296221933991162436230499226089216428700003210031633120943502065714962452000050100120846121163121156121400121275
60204121203908000010002802022621001248221212108922070180152997011108882580430502681009420000401001000020000140064375780908349655201212540121111120426113002311367170100302002144010345602002000010000121274121225215020110099100401001000010000010022423037422252220940528524232046582422707132276401100039870113421413124290507288926894902000050100121500121272120832121307122010
60204121364909110110002226022470011328222012134722350218206998161110282580448503401010320000401001000020000140000155795980349707501212840121460121416113161311361370100302002000010000602002000010000120953121188115020110099100401001000010000010022418037022732224039711124161905552261289204275001132102431633121113502166186733342000050100121280121101121210121424121448
6020412166490800000000261802281001140811361212762229019020210014311134025804005035010091200004010010000200001399475157956443498441012097001213661212021133733113678701003029820000100006020020000100001211411213141150201100991004010010000100000100224188391226922219360113243152488226148519635000303210031633121238502047115383432000050100121204121400121336121399121114
6020412110890700000000231402259001127201841212082211023417810005211091125803825028210115200004010010000200001398763057808123499092012125501211621210491136423113616701003020020000100006020020000100001212541210971150201100991004010010000100000100223990343232622199401113242129269322608108178293001003210031633121010502007066293892000050100121289121942121249121465121328
602041211369080000013323840227600113440140121214221401822081000271109342580418502961010220000401001000020000140136195782492349964501212710121243121324113558311394070100302002000010000605802012810095121319124669271502011009910040100100001000011002244603602346222354021132424122558226169617020900003210031632120933501646035243622000050100121305120960121483121454121153
60204121385908000000002759022150011400037212207321910202216999451112422580448503201010920000401001000020000140023355797180350376801211120121177121418113325311374070100302002000010000602002000010000121300121109115020110099100401001000010000110022424038122772221440310824222824642262991192302001803210031633120829501686957023722000050100121507121353121336121428121589
602041215809080000000026210228201013840216121300220701901889981211114025804245030810101200004010010000200001401967957876283499445012132301211801213531135283113869701003020020000100006020020000100001212391210001150201100991004010010000100000100224160404228922235388110240952472226089316829900003210031632120923502147412133372000050100121210121284121410121291121284
6020412114490910000000263702293010129611201211822168016817010015211112625803075032810095200004010010000200001399089957922843499284012133001211431214171133103113743701003020020000100006020020000100001210931212772150201100991004010010000100000100224218387228022235346113244658558226128823233200003210031633121170502326596873852000050100121395121151121111121318121369

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1162

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f404346494d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cdcfd0d2d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
600431208299042000000024310229120012401484121054225501682029727511082025803225018010092200004001010000200001397268157688603498473001209470120910121206113211311365370207300202000010000600202000010000121416121117115002110910400101000010000010224193352228322231388116241356651622592106019324220210314000111710022120825501985564522832000050010121076121055121107121198121225
600241211519072220000022280227120011201404121225225311762069725411079525802835019810106200004001010000200001400763357936763478298001211110121282120989113171311353870010300202000010000600202000010000121078121147115002110910400101000010000010224442335228022281377112243858253122590920171305000031403121711122120944501806935762222000050010121218121033121136120885121137
600241210979061010000014000226020011280352121204227312142009744211112025802925018810071200004001010000200001397636657785563494692001211470121277121133113362311368270010300202000010000600202000010000121271121408115002110910400101000010000010224401351234122246374112242961857322625900224320000031400021700022121076501665395363392000050010121216121156120870121153121113
600241211989061101000023730230520096004841211292210315419197380110944258025050194100962000040010100002000013998509578071634928880012113901211091214371131093113548700103002020000100006002020000100001210661211631150021109104001010000100000102240073432296222203661162439574525226351040213269000031400021700022120766502086075183652000050010121142121198121000121396121495
6002412118590822200000262702298200112003201211052247312418797403111009258027150172100932000040010100002000013991462577524435011690012107901211861204701132973113718700103002020000100006002020000100001212051210671150021109104001010000100000102240753582293222554371142421644561225871090192359003031400021700022120978501965696062912000050010120916121246121275120960121367
600241212179082010000017350230720045605401211852227417020497410110699258032850180100992000040010100002000013980606578378835056520012127501211111212191134013113661700103002020000100006002020000100001211691212081150021109104001010000100000102241953572240222223791172410632528225951030153275000031400021700022120872501746325322522000050010121321121220120985121071121259
600241212519082002000027540228120011681392121172223011902279725811099925803075017810106200004001010000200001394938857981883502727001213030121293121397113312311350470010300202000010000600202000010000121425121340115002110910400101000010000110224327362224722221380117241757653522636880192293061031400021700022120970501886144822352000050010121200120970121135121042121120
600241212369072010000016770224820011360392121060221322082329756211075925802745021810100200004013410000200001396657457821563487039001212570121198121196113140311351270010300202000010000600202000010000121012121333115002110910400101000010000010224297363227222215311112242063654022615890207323009031400021700022120744502006516202712000050010121181121270121088121421121387
6002412113590821000000246602286200111213161211282232317019997624110826258026850204101142000040010100002000013935056579540435015490012116301212621213091134133113790700103002020000100006002020000100001209981210971150021109104001010000100000102242433542221222103711182428608575226241030188299004031400021700022120943502145674212172000050010121104121153120896121169121310
60024121049909200000002480023082001136031612108922163178194973461108622580313501921008620000400101000020050140062625776156349994500121351312129112103811347431135297001030020200001000060020200001000012121112137411500211091040010100001000011022400534722582224337711724061330743022608910151278003031400021700032121058502126534482432000050010121120121047121226121201121026

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ldp q0, q1, [x6], #0x10
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1969

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f4043494d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60223121701912500000002664022191001656123612205121521010051311184425803645036010128200004010010000200001408481258193083521326112168701218951217231141633114978701003020020068100006020020000100001222791221091150202100991004010010000100000100224161945119902193847610424082585352240210511150010321011611121540502008957726722000050100121928121908121779122249121726
60204121653914440410002629021991001368322812174921740010080011166725805235036610129200004010010000200001408322258296283520019012193301215881217281139003114483701003020020000100006020020000100001219591215751150201100991004010010000100000100224121944319962193247911224832585642239214531360031321011611121494502829646919002000050100122101121989122006122085122333
60204122095915551000002660022011001640323612200021682010070111157125805145039010121200004010010000200001407668558369243526742112103101216901219801138843114418701003020020000100006020020000100001220261221031150201100991004010010000100000100224242045219822193847811024162586612238511211440530321011611121730502608397788462000050100121086122037122036121699121837
60204121878913404000002622022171001400326012211021691010071411170425804785035410128200004010010000200001408330458055323522711012167101220081220931140813114707701003020020000100006020020000100001218771221271150201100991004010010000100000100224252445419902235643110724212585752238312601390000321011611121628501929207099322000050100121993122232122223121975121546
60204121681912300300002187022261001464026812190821990010034811157925803975036210130200004010010000200001407962458028443528943112216801213071217491138123114878701003020020000100006020020000100301218591216211150201100991004010010000100000100224241838720272204642111024304886152242910901330000321011612122077502647887216542000050100121641121913121545121846121585
6020412199191130030000236802227100139222721216702172241004061114762580523503621011520000401001000020000140304955805196351623511218920121688121844113637311456270100302002000010000602002000010000121942121919315020110099100401001000010000110022414952720542200541810924225285532238610001140000321011611121576502046147868412000050100122023121892121470121757121979
602041218689112000100024070223610013841224122184219410100272111719258049650304101002000040100100002000014032347583709035227091122186012191212202211345731145447010030200200001000060200200001000012226412227311502011009910040100100001000001002243727445199521945477113240525853922397160115700203986232921125641506988708909812000050100122152121987122054121815122452
60204122200914411000413671072640212710021286152124306209800987771133865828107650714102172012241315100002000014119964584906835307721122026012295612236511417818411649074163337192157010861639902158010856124008125058321502011009910040100100001000001002246721560191421897521113237950261522387137214500003664323511121765502628036738272000050100122192122010122117121891121931
60204122049914404011002442022011001608225212185821642010042011178125804875035010112200004010010000200001407266158190203525931012194201220261218221141043114516701003020020000100006040420000100001221621222501150201100991004010010000100000100224201945419932195147310824122585432237814201251500321011611121542502349118487992000050100121876121915122131121935122041
60204121901912400010002698022010101640225212219821741010028611152825804935030610133200004010010000200001412009558305883522153112201501224861219181142143114752701003020020000100006020020000100001221771221771150201100991004010010000100000100223922344719912194153810724082586242238712411050000325211611121883502329006718562000050100121574121802121971122220122184

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1523

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f2022293a3e3f4043494d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6004312171290800000022350223511392016812131021971097885110976258029550174101022000040010100002000014043545581066834856200012157612157412182211378031141227001030020200001000060020200001000012212912150811500211091040010100001000011022421038020482201236011223966433122418108010800203140051711121200502188015527862000050010121697121830121790121924121551
600241218429131110002417022541152014321215742194109799911138025802745018210114200004001010000200001407970258063963512706001206951213771215111138193113959700103002020000100006002020000100001217471215531150021109104001010000100000102241603272108220773991142398563282242110728100003140011711121639502187836526662000050010121315121946121658121576121912
60024120787912000001245602285114481192121324223320981261114302580331502041008320000400101000020000140527135798620350790800121386121552120692113419311417670010300202000010000600202000010000121481122184115002110910400101000010000110224220333205421996357113244550326224328408900003140011711120501502206426636232000050010121877121538121496121395121900
60024121584912000000205002243113921132121374218400978881114702580340502121010320000400101000020000140379675806924352038600121295121687121538113752311422170010300202000010000602122012610000121666121715115002110910400101000010000010224060381205622026356109241623633922422111010500003140011718121422502107454424742000050010121766121057121617121656121529
600241217519120000002431022671140001801217112225109787411091025803225019410112200004001010000200001401584658030843507399001214691215121218741137073114114700103002020000100006002020000100001215811215801150021109104001010000100000102244303762056220274051092412643212241894111600003140011711121065501527025976572000050010121727121645121567121500121377
600241214339120000002429022521136801841216692204009795611162425803825020610107200004001010000200001405280657911323504225001215691213831217981135483113913700103002020000100006002020000100001216151217191150021109104001010000100000102243003402071220164101082431623242241199210400003140011711121054501728095156592000050010121688121726121531121785121989
600241216839110010002174022421143211881218512250219803611146625803075020410079200004001010000200001406545358085083514621001215941216321218471137323113866700103002020000100006002020000100001221631216971150021109104001010000100000102244803822054220224041092417603182243010009000203140011711121476502127626087742000050010121657121650121662121376121469
6002412144491110000023300226811368020412089622261097907111547258035850204100942000040010100002000014021936578854035151150012160412144512146411407431144777001030020200001000060020200001000012149412160311500211091040010100001000001022430038820492205834611224142123372240693211300003140011711121334502282926847962000050010121643122037121999121727121837
60024121506910101010242302238112401240121659225010978791114772580283502201011920000400101000020000139071915780236350705400121651121875121517113737311396470010300202000010000600202000010000121884121539115002110910400101000010000010224210350210322016396111242025031722411117311600203140011711121739502007125897792000050010121843121352121694121336121496
60024121147910000000220602257114000220121501222401980711113132580343501761008420000400101000020000140088585808028348666900121674121487121554113924311406770010300202000010000600202000010000121777121727115002110910400101000010000010224150381207322019402116241821433422417101210300003140011711121575501869326207332000050010121611121656121551121531121641

Test 4: throughput

Count: 8

Code:

  ldp q0, q1, [x6], #0x10
  ldp q0, q1, [x7], #0x10
  ldp q0, q1, [x8], #0x10
  ldp q0, q1, [x9], #0x10
  ldp q0, q1, [x10], #0x10
  ldp q0, q1, [x11], #0x10
  ldp q0, q1, [x12], #0x10
  ldp q0, q1, [x13], #0x10
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.7155

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16022357097441101000001233302288116083922457269213428719182310190125241732818081600008010016000040404125246730158557063565715734336996337153240100200160000200160000572085716011802011009921001008000080000010016291012226049091634542342107239730439921663046701363195499300000511021622572213181649346321107160000801005706557580574115750657455
160204575144462010000011241022301139244196571722184217217717101934252416628165016000080100160000403905252420711657572955736957290372733369702401002001600002001600005740057307118020110099110010080000800000100162420272229369916379523401112416244468216660871010439611785192410511021632572882981675315378110160000801005734057116571445701157481
160204573184362011300010656022281163242220572032163231192120861945252415888168916000080100160000403849251374411649574475739956941370283370502401002001600002001600005714857315118020110099110010080000800000100162424422214587164338233710623923124094166749748913876448100000511021622571012581785367316106160000801005755357035573205707057069
160204574754271000000010813022601159242272572552197330182017672140252416448180116000080100160000403877252431401581570855744057320372343371982401002001600002001600005718257058118020110099210010080000800000100162408822184559163909228810924143024131166129712141390839040000051102162257500268174733433992160000801005743357181571405739357385
1602045726443110000000115300220811408372205727121962551438206720032524170081826160520801001600004041602512118115575709857671572933746033708924010020016000020016000057240572891180201100991100100800008000001001623884223741141636382321111239524648891665036911073312437580000511021622573842881647378332101160000801005711857317571575718157362
16020457360428100000001094802203117284130457725214829918212060208725241803816821600008010016000040403625327481172857199570275718937210337034240100200160000200160000569235717711802011009931001008000080000010016240482201359316311523111082416504414616670669110335654157130000511021622572603081734401295104160000801005694557063573115712557135
16020457422428101000001183402233115764924857445217123518902379216325241739817531600008010016000040554025300081182557175573915728736995337269240100200160000200160000571335697011802011009911001008000080000010016240414221138451643082313110241026043991657236361304161422001000511021622571782281664259257112160000801005743857387571595704856972
16020457432429200220101214402219114644515657160220625220872198215125241828816921600008010016019240391025235840162857077572025728837337337177240100200160000200160000575505703911802011009921001008000080000010016240582189415316369423011092404504111166390817125315645240000051102162257103248170632928792160000801005707756944570615706757261
1602045714042720300000108580219211616425045715522272472149197519202524183181792160000801001600004039542519189016005701157369569543747833742624010020016000020016000057461571971180201100992100100800008000001001624001221943881163952232511224202444507165302752923913433703000511021622570823081632315360156160000801005719257405571415716457113
1602045718842820000000111750224211624432645713922102641994223119832524169381740160000801001600004041342508973116155697857267570863709533722324010020016000020016000057504569801180201100993100100800008000001001623964221944711641122304110241826236221658467931242600460600000511021622571732181629266358101160000801005742657359572545717457433

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.7161

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0f18191e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606167696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16004357144429666000011429022471142472232570882216245192618331923252416598159716000080010160000403654253811910158457427574775747137171337396240010201600002016000057148574481180021109210108000080000010162459532234461901635112340117243827438671664966841354598456419005020001116712574372181363398458130160000800105734657317575565723557531
1600245778243360020001192602260113848124857197222925318761939210625241687817031600008001016000040508325263590016515742157416572513736533737924001020160000201600005752257548118002110921010800008000001016245363222641020163600232911524202583910166564668108387043441960502000716914571372781441494444167160000800105717457418575625754757155
160024574234307077000108770228611480792805742722432612241205520412524156781798160000800101600004034742527978001592573585741257252373843372222400102016000020160000575375755911800211094101080000800000101624477222474241016409923381172438222459516631758011638314502310050200014161110572102581628537417126160000800105700557403572755743157301
1600245735343060061001089402248111689217657075225527720131672195125241731816381600008001016000040323525109310016505729057321574013713133744024001020160000201600005750357637118002110941010800008000001016244956225140410164289232311524402584354166728701117382040301960502000101597575602181602386524116160000800105714257112572145723057210
16002457174430500000011630022431124054148573262218235190319862172252416668162716000080010160000403330253246700158957339573355721737346337261240010201600002016000057325571681180021109110108000080000010162439522233420401640102356117242624245851673437141274258413631005020007161010570872081710457446110160000800105695357274573845716757100
16002457299430500500011534022481140073268572292255227195815911806252417318135516000080010160000403364252096400159257598573435736037503337197240010201600002016000057204574821180021109310108000080000010162467632202521801635252339118241620239371669268861073830461219005020006168957573268168946343394160000800105757157288574905763957584
16002457571430700000012169022511143285232574542188313180621722285252418068149016000080010160356403455253372000165057569574295736637311337199240010201600002016000057675574821180021109510108000080000010162429522215534001640742352116241631452301659948691314184519519005020001216108572752281655437359153160000800105764457376575775746657366
1600245734743180020001126202226114166924057308221827922142142228225241719817861600008001016000040361225380661015795721557510577343749333759624001020160000201600005733757447118002110971010800008000001016241960219643880164609230411524285104235166765719152442451401900502000121697576122281571504470160160000800105767457543573895742057634
1600245763943088880001137602240116889324857612221029822121904230425241742817861600008001016000040378525400340016955739857470577023768733769424001020160000201600005745257419118002110971010800008000001016244145226645920164365231612124262264111166261760114407338101910103350200071698572011981558527492131160000800105721757203573045747857376
16002457147430610600011542022261132876196572752258294223318382070252416038166516000080010160000403418252107410184557404571905703337028337347240010201600002016000057129572491180021109110108000080000010162461522214485101643872381113242024850961662036721534230473519005020001216119572462381483538446137160000800105742357462575525731157231