Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDP (pre-index, Q)

Test 1: uops

Code:

  ldp q0, q1, [x6, #0x10]!
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0f1e20223a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb5b6bbl1d cache miss ld nonspec (bf)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20051041800007920108102500011225300010002000100020002472445128110151040104172837733000200020001040104011100110001000020250054202730024030272049867135304732161110372100031272000100010411042104110411042
20041041800004615130102590201725300010002000100020002467245206110161041104172937733000200020001040104011100110001000020160053202924024020102028286146101731161110371100026302000100010421041104110411041
2004104070000732811401025300801725300010002000100020002462845234110151041104172937733000200020001040104011100110001000020280166204110023018242027284193700731161110380100025292000100010411041104110411067
20041144800006156120102591062152530001000200010002000242644521211015104010407283773300020002000104010401110011000100002023024520411002001615202940696900731161110371100028192000100010421042104110411041
200410408000010322100102598061525300010002000100020002459245188110161041104072837733000200020001040104111100110001000020520185204132041016282054579197701731161110370100029312000100010411041104110411041
200410408000060251301025210121125300010002000100020002464445121110151041104172837743000200020001040104011100110001000020260177202016026014162061526315300731161110370100016172000100010411042104110411041
20041040800003815110102613001122530001000200010002000248604520411015104010407293773300020002000104010401110011000100002034057720251613800132022346136100731161110381100019222000100010411041104110411041
20041040700005115120102510163101525300010002000100020002462445173110161040104072937733000200020001040104011100110001000020260195204421016016462034346203700731161110381100020172000100010411041104110421041
20041040700005316170102500051625300010002000100020002438845280110161040104072837733000200020001040104011100110001000020160078203911029018392043386232900731161110380100022202000100010411041104210411041
200410408000147131201026600012253000100020001000200024852451981101510401040728377330002000200010411040111001100010000201403622027220200209203334692900731161110380100017222000100010411041104110411042

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldp q0, q1, [x6, #0x10]!
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1300

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e2022293a3e3f404346494d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
602231212479031011100025642223114160212121484219102322009991611090725804305030810108200004010010000200001399165257946363503488112111901211751212551135983113791701003020020000100006020020000100001215391213141150201100991004010010000100000100224040387228222244393116244976500226129317425700000321021611121059502087135143232000050100121296121234121537121326121336
6020412050890800000000278922261127201401214712217021819099901110937258044850344101122000040100100002000013987892578398035053470121129012120112110311325931135827010030200200001000060200200001000012132512106111502011009910040100100001000001002240403302277222083551152438744822259810920227200000321011611121061501826036282752000050100121180121297121233121394121316
6020412136290900000000255122621122401721211722264018623299968110900258043050316101172000040100100002000013980557578306835014670121402012157412115411356331137177010030200200001000060200200001000012112212125811502011009910040100100001000001002241203342268222394031142416242524226429919029900300321011611121250502426885863102000050100121313121405121394121197121298
60204121318907000000002762225711224022412106622210256212997671110752580394503441010120000401001000020000139769345791852349891701212950121170121146113488311386370100302962000010000602002000010000121307121377115020110099100401001000010000010022408036022762238042111524222165102259810622030200000321011611121118501706006203042000050100121466121406121519121441121236
602041213009070000000024452263113920196121278219602122281000401108512580442502821011320000401001000020000139900585792092349787801210730121232121378113470311370570100302002000010000602002000010000121069121241115020110099100401001000010000010022420038223002221735011624172445422261810320626600000321011611121127502287855833702000050100121374121086121218121432121391
602041212429091001000028442245113921116121323226302322361002151110142580466503201011920000401001000020000139763855792716349973201214310121291121473113526311359270100302002000010000602002000010000121282121004115020110099100401001000010000010022428838223032229040411724301505372264210421232601200321011611121384502407805342982000050100121307121417121406121436121201
60204121399908100000002820227811400150412132322280188192999381108942580466502961010620000401001000020000139863975797420350330401210180121175121198113470311379070100302002000010000602002000010000121559121320115020110099100401001000010000010022430137623002224840311524352565362262012221234100000321011611121152502006285983632000050100121245121205121219121310121123
60204121368909100000002504223811168111212144022160206234100030111055258046050322101132000040100100002000014010724578839635026600121046012119612118311356531139997010030200200001000060200200001000012145012154111502011009910040100100001000001002243283802328222664021162425505422262811116831400000321011611121053502467516084012000050100121349121283121323121393121460
6020412145890910000000256422251126411961212882184023223299775110809258043950276100882000040100100002000013976089578921234968710121255012127212130611328031137917010030296200001000060200200001000012136612137011502011009910040100100001000001002242393862223222753241142422505442263011320236500000321011611121267502347836442902000050100121179121298121343121215121087
6020412130190910100100253422351137612161210462213020619699861111077258036150322101152000040100100002000014017155579886034999160121413012135312121311351231136387010030200200001000060200200001000012125012129511502011009910040100100001000001002240683492335222783581122417565692263912019829000100321011611121193502207895993372000050100121272121312121413121164121270

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1249

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0f18191e1f2022293a3e3f404346494d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
600431214369071100000280802337114000148121271218502062429757111044925802955019210098200004001010000200001399274958032763500519001212870121164121421113234311380270010300202000010000600202000010000120983120754115002110910400101000010000010224881390229722212407113248112066622610108928300000314000018170145120956502106505713342000050010121372121428121348121328121195
60024121369909000000028180220911400123212130222160202180976901110322580337502081011020000400101000020000140177615784796347938510121341012066412114211348631129117001030204200001000060020200001000012140112113211500211091040010100001000001022437833321632227240211023945850822611402342930020031400004170145120904501946265623942000050010121202121275121233121296121071
600241212819111000000256002250114401276120612220002022629759411103425803315024610035200004001010000200001402272157921883501487101213480121269121262112748311395970010300202000010000600202000010000121315121119115002110910400101000010000010224210334231022221521113241666541226031111782870000031400006170617121194501644056622802000050010121200121128121242120437121505
60024121095908000000028290224311424226012146922392747296894110966258038850242101362001640496100002000013982088577015635001651012119301209641214231133233113888700103002020000100006002020000100001212081206421150021109104001010000100000102241003822323221924201122414264420226086320832300000314000017170176120341502105875683032000050010120679121310121149121159121369
6002412052490400000002537022881140021921212502212122417297681111055258038250222101262000040010100002000013957861579881235044950012126601214181212461136703113784700103002020068100006002020000100001213451211261150021109104001010000100000102241983652299222204171122432585622263295213362010003140000171701717121208502046041801422000050010120566120666121200121430121032
60024121536902210000024170233311216026412125822800218177977161108632580229500701009620000400101000020000139092745753212350037410121082012122712073111352131138497001030020200001000060020200001000012118412134511500211091040010100001000001022490163952257222294091092479605382262011622234704200314000041701717121224502568251702732000050010121257121120121436121666121369
6002412128690810010002806022741132021961213462202121219897558110230258037950222101192000040010100002000013989833578724434996191012118801213491214041136053113103700103002020000100006002020000100001211031211951150021109104001010000100000102248383672254222414021102424270536226229918026900000314000014170517120996502286125973122000050010120687121365121240121358120465
60024121492907100000028510225311248216812114422993762349704311113625803135019810103200004001010033200001401579957927163501744101212020121331121626113234311393770010300202000010000600202025010000121323121487115002110910400101000010000010224321041322822228740511324455053822646972103021000031400011517017712089950142225500832000050010121053120465121490121395120930
6002412111890800000002511022381139219612128522120941989756811114125803225022610101200004001010000200001398969057946843502047101211730121612121042113478311372570010300202000010000600202000010000121057121204115002110910400101000010000010224378375225322242407112241560492225909124115811300314000015170157121049501901274913242000050010120991121370121008121425121463
60024121333910000000028100233911232217212082522620194222978071103132580325502361009320000400101000020000139740955787388350191600121332012117812104211338631139377001030020200001000060020200001000012120012140511500211091040010100001000001022443838522832218739811524126649722636104253314000003140000151701717120949501607405723542000050010120758121215121238120509121229

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ldp q0, q1, [x6, #0x10]!
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1795

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e1e202224293a3e3f4043494d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60223121525913200002489222310142441841217062211111003281117792580475503041011520000401001000020000140513135775340349663001222010121769122045113206311404370100302002000010000602002000010000121119121887115020110099100401001000010000010022437333782077220304071142445132341223791101862620321021633121515502767596277722000050100121779120850121216121452121650
602041210919064400024232248101472517612181122160010040011161025805025032810103200004010010000200001404848258140763519496112061601215461215741137893114077701003020020064100006020020000100001218751220341150201100991004010010000100000100224514038120582206935811424336230022402211560620321031622121274501126366629032000050100120951121769122034121588121779
602041216179113003024622290101272318412153922911099438111776258046350282101142000040100100002000013946910583102034879580121819012165912156411377631137737010030200200001000060200200001000012207112189311502011009910040100100001000001002243317353210122029393115240864331223921050500050321021633121429502446147107542000050100122066121790121677121855121896
60204122010912330002432225510143221481215032223011003031117212580379503561011820000401001000020000140255825823724351186101215160121890121875113813311342670100302002000010000602002000010000121505120444115020110099100401001000010000110022399183792068223603651162485258327224049301010000321021622120698502065815707122000050100121751121836121726121709121577
60204120994911200202649234610115242121220142216001003431114342580454502281012420000401001000020000140400885788444351410401209330121963120800114083311418370100302002000010000602002000010000121806121162115020110099100401001000010000010022492193752081220223681112486500339224019101050000321031623121634502386237187642000050100121900121505121731121746121891
60204121118915330002545225710141631241218422297111004431115652580400502061009720000401001000020000140232725772892351568201216790121694121653114061311418970100302002000010000602002000010000121757120958115020110099100401001000010000010022446243382113220463231132427132336223971351691000321031622121367502628185959022000050100121049120925121736121903121776
6020412178691233040201222471013683192121048226100100197111663258049650328101112000040100100002000014051367580015635139080121809012165712450211460019111624275784329522207010883660702181610886124135124573301502011009910040100100001000001002246917523187721894567112240194686692245311911301010321033432122154502747613485952000050100121765121634121939121825121767
6020412155691230030335222361012484961223352214001005691117712580412503341013520000401001003120254140884455812876351875901214700121709121838113862311436370100302002000010000602002000010000121922121640215020110099100401001000010000010022406273932058220594021112396503062244610701141000323831622121513506939134199232000050100121770121766122033121925124115
6020412090191240011232322400114404324122008220700100413111445258042750292101212000040100100002000014070716581974035138930121769012190512163911366431142317010030200200001000060200200001000012200612205011502011009910040100100001000001002242932383206722081402117240458330223881170720000321031623121381502268007798292000050100122017122185121875121712121736
602041220029134401023962254011400425212082522280010044111145325803945028210137200004010010032200001402401158206043521014012182401218011220321142503114387701003020020188111076095020000100001214181222711150202100991004010010000100000100224223037520632206137611024432623232240211321301630321031633121668502467277108262000050100121557122288121637121795121698

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1522

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2022293a3e3f4043494d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6004312060091530003100022170229621208230412147623460097811111157258028650230100852000040010100002000014004756581251835090811212880121684121346113422311375170220300202000010000600202000010000121631121053115002110910400101000010000010224265315208622126387110243420631122445943962020314071711121679501666385976472000050010121450121751121506121527121558
600241215309104040400002148023392103236361218802220119756011124125803795018010104200004001010000200001401123257904123514133121925012137412156611365731141867001030020200001000060020200001000012149812125711500211091040010100001000001022426143892065220974441112430648330224271123772050314011711121644501927123047272000050010121242121441121462121813121435
60024121355911200000000186102376210482308121838230100976551114912580304501761010820000400101000020000139794425807548349519912164401214471217681135883114154700103002020000100006002020000100001216451214311150021109104001010000100000102244417357207722077397109244262231622408854652300314011721121371502346516376662000050010121873121534121606120919121745
60024121477913404000000161902260294416121214602327119794311131325803135016610115200004001010000200001400991058134523509989121607012123512139111366831144927001030020200001000060020200001000012151612152711500211091040010100001000001022437163422071220803731122436622337224049631102000314011721121696502388844318352000050010121583121887121529121766121584
600241220299112000000001799882263292033641216062229119799411127925802745020810100200004001010000200001401350858113403502534121379012122312156911352931141917001030020200001000060020200001000012186012169111500211091040010100001000001022450534120752206139610624485743102243110409700230314021722121468501585274926092000050010121452120683121386121458121390
60024121197911200000000189202366210242456121556228000977401113332580328501941011320000400101000020000140502425813308351757512133901219041215281136513114075700103002020000100006002020000100001215341215591150021109104001010000100000102244815310208922080358107244256032822422923992900314021722121381502086275068272000050010121964121255121602121799121548
6002412181091230000000019690229821016246012164222941197730111522258029250164101112000040010100002000014031522580543635181971215470121762121943113571311413570010300202000010000600202000010000121620121460115002110910400101000010000010224476352207722100324110243021632022445682792000314021722121231502368215117322000050010121309121440121716121619121493
600241207469112010200001811023482117614681218882210009765911130525802475020610105200004001010000200001400959058095163515976121489012149012131011358731141527001030020200001000060020200001000012161312148811500211091040010100001000001022429173442092220813781072450664309224081113952000314021722121411501706985986482000050010121233121308121506121291121512
600241214959103020300002059023542130414081216092228119788011157925802655019410100200004001010000200001401760458010203507409121356012153012148311364131140557001030020200001000060020200001000012182612199811500211091040010100001000001022447173642073220893671092431618312224148738221440314221722122105501746476268232000050010121663121454121362121613122180
600241216299114030300002618023812107249041215852233129772411115925803165020810090200004001010000200001399763958143643514590121515012168012176411375231139977001030020200001000060020200001012512176312203311500211091040010100001000001022419173382084220623981112454652316224151213772020314021731121073502547235236462000050010121585121521121612121522121423

Test 4: throughput

Count: 8

Code:

  ldp q0, q1, [x6, #0x10]!
  ldp q0, q1, [x7, #0x10]!
  ldp q0, q1, [x8, #0x10]!
  ldp q0, q1, [x9, #0x10]!
  ldp q0, q1, [x10, #0x10]!
  ldp q0, q1, [x11, #0x10]!
  ldp q0, q1, [x12, #0x10]!
  ldp q0, q1, [x13, #0x10]!
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.7157

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f181e1f20222324293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16022357161429700100108420224610014086518057568219826018111694225925241753817621600008010016000040399725234321164357185057111571803704233735624010020016000020016000057368572371180201100995100100800008000001001624362722593979016365623341072435258442116617665593330534451200511011611571542381558415406118160000801005717157303575245735857408
16020457391428505000115400221110016647117657364217523317551853226225241747817471600008010016000040377425262951166357536057380573383741133717624010020016000020016000057480572641180201100998100100800008000001001624312322244726016367923421102436258413516673374110132253944050511011611572862981620418348101160000801005742057263573665729357223
160204570664315500001110802245100153663184571642249250138616251936252417068173516000080100160000403885251007711621572510572775711937203337072240100200160000200160000573155729211802011009931001008000080000010016257744226539720163690241611224332504118165811548113345736621900511011611570651181971440456129160000801005729857281571915720257191
16020457255430500100112790223510014008816457254220932517551730202925241693816771600008010016000040403425186081169157283057304571953715333723624010020016000020016000057136572421180201100994100100800008000001001624454022653985016346823621102430232431616537364576344433514200511011611572782181818314402116160000801005726657044572105740357202
1602045747842950000011110022291001408891165715821832821734142519842524167381819160000801001600004039902515352116175729705730357390370503374802401002001600002001600005700257222118020110099101001008000080000010016244344231739140163364232111324322583685166121542129377637351900511011611572871981654448412113160000801005714057032571175716757300
160204571354285050001117102253200140071136572102220231176616862257252417238167716000080100160000404710251559311651573640568945737637025337052240100200160000200160000570335718211802011009971001008000080000010016245943227749740163968232811224142503922165416634117412437071910511011611572482081728544452116160000801005715857294572195732457178
160204572344295500001101802244100147283272573182202218205317091912252416408195716000080100160000404165251710911625571190571545723737153337126240100200160000200160000572115731811802011009931001008000080000010016245936223246440164216234611224082323675165792717125352432353050511011611571822281626449433118160000801005741757258570485686457441
16020457054428505000111610223610014005625257263220224117572081212425241771817411600008010016071240381825196561168057486057233573513711033730224010020016000020016000057138572701180201100992100100800008000001001624483422674085016359623661132420254338816614352514938283622190051101161157383188166649535693160000801005696557327572245726957036
160204570674285000001132702260100145662252573042203284188315572262252418168190216000080100160000404748252940001621572440573395713637195337143240100200160000200160000574205734311802011009931001008000080000010016243939226241990162876233711524392224012165660744104340745201910051101161157178328154638938793160000801005703557275572015724257063
160204571204284040001124702212100164063228572122204197188516602035252417788168216000080100160000403995252315611534574030571895710137121337194240100200160000200160000572625703811802011009931001008000080000010016251838226046360163686236111324281804251166419712100413536601900512611611572492181838456460107160000801005697957345569665730057171

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.7199

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f202223293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
160043571074326600000011614022181014161042445784821685722220198824462524174381571160000800101600004034062553141001633577440580155787237518337710240010201600002016000057686579591180021109610108000080000010162398402174471901642922349108239623055771668387661165096518200050200011151112572082881888374496142160000800105754457671574915773257462
1600245713943044400000118190223010168870260577712178432218319921889252420548166516000080010160000403299255090401157357703057672576473742033752624001020160000201600005731857525118002110941010800008000001016242926219446010164526232010723992584788167127820134349337882412050200010151111576732981542615479141160000800105773157399576535760157688
16002457428431400400001168602248101480721045740222084562276217727282524162681457160000800101600004034702534881011864572980574455765537329337698240010201600002016000057745576831180021109710108000080000010162416252138524601648142291109239124647531672438471323970387100050200012151313573422281810417569141160000800105746357573576265767057532
160024574914324404000012721021971014087226857835217346119122361232925241632815461600008001016000040339925284860117115726405762657595373563376152400102016000020160000566675776211800211098101080000800000101624143220944895016434022791112387232468616675976811947374529134050200015161412577263181556546489128160000800105735157249574465758057826
160024575844315500000011359022271014089824457826219256917522539228825241733816391600008001016000040354825396090115655744705766557698375343376352400102016000020160000577965747811800211098101080000800000101624124021844540016491422951082394510454316683777012139225428240050200014161412574202381619379487149160000800105753257468575535765857716
160024573004315000000011782022021016168524457371219251620792183254425241633815831600008001016000040346425296210116045749305803457670375623374542400102016000020160000575285783211800211092010108000080000010162423422208588301654402306109238327247791675077516758094839215650200012161311576482781646435482132160000800105760757587573335759657510
160024575684315000000012595022321016326419657640217955521392396231425241680816261600008001016000040529125595390013505723405754357568374473377472400102016000020160000573225768711800211095101080000800000101624383222055282016443923031112404254514416743174613839905667110050200011151214573952281664542523170160000800105730257333574775768157820
160024575784295050000012696022061015689050457569219947923842517227825241759817111600008001016000040343425392060115655774705722357716376533376622400102016000020160000565635749611800211096101080000800000101624013622134977016463622921132402254482616709787013347324673130050200010151414578581881604611452122160000800105764057403574155754957546
160024575934345000000011974021921011447725657733221148518752248202925241651816031600008001016000040360125328780018825750905758957919375193376222400102016000020160000577805735811800211094101080000800000101625473622534623016491422831072424266514216722780016952944583130050200011161013577642281648464543174160000800105775657545575045765557444
160024576084314000000012223022391014967125257391219644419072329220725241652816721600008001016000040336125354460116935775905696957663375553374852400102016000020160000574025743111800211090101080000800000101624223921694146016471723061072409132525016658889814843494637190050200012161410573313281678394429135160000800105747857506575945745057657