Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDP (pre-index, S)

Test 1: uops

Code:

  ldp s0, s1, [x6, #0x10]!
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e20222b3a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2005105081110039121004102560111425200010001000100010005284445824101510401040699377320002000200010401040111001100010001032014110262116801016580619610732162210371000262910001000100010411041104110411041
20041040800000191510081025230111325200010001000100010005286845824101510401040699377320002000200010401040111001100010001016001610160016801032350023000732162210371000322910001000100010411041104110411041
2004104080000041421010102590317252000100010001000100052852458241015104010406993773200020002000104010401110011000100010300042101430241601026190015000732162210371000262410001000100010411041104110411041
200410408111001816101010256012825200010001000100010005285245825101510401040699377320002000200010401040111001100010001024703410260027871026350631610732162210371000292510001000100010411041104110411041
20041040810100291610101025100138252000100010001000100052860458241015104010406993773200020002000104010401110011000100010257029103011261661016190739700732162210371000333310001000100010411041104110411041
2004104081010038161020102512011825200010001000100010005286445824101510401040699377320002000200010401040111001100010001023903110640017891016150719710732162210371000483210001000100010411041104110411041
2004104071010030221010102570134252000100010001000100052848458251015104010406993773200020002000104010401110011000100010247021102420178101024230619710732162210371000284210001000100010411041104110411041
200410407100004232101121025190115252000100010001000100052848458251015104010406993773200020002000104010401110011000100010238052102810171071016230623700732162210371000262910001000100010411041104110411041
200410407100003518102010258031725200010001000100010005286045824101510401040699377320002000200010401040111001100010001024805610293028861016270719700732162210371000453510001000100010411041104110411041
200410408110002832100010253012825200010001000100010005285645824101510401040699377320002000200010401040111001100010001024802410610017871022190639610732162210371000332710001000100010411041104110411041

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldp s0, s1, [x6, #0x10]!
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1503

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f202224293a3e3f40434d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60214120626906000000010021503522280101240213212155622170121156257032250254100991000040100100001000010741104590983458779011214201213051214621136993113985601003020020000100006020020000100001214401215891150201100991004010010000100000100124200339206112009347113243121831812432890101017000032444834412123250230885721739100001000050100121261121897121390121459121290
60204121415910000000000022050226910136829612178522490121037257042450270101081000040100100001000010782034609431463080401213111214101216221134013113595601003020020000100006020020000100001216611216601150201100991004010010000100000100124341331210112034354110242421832812426112110610000032335834412119550246825744613100001000050100121424121899121582121525121707
60204121730910100000000021396862257101344222812150721910121786257044150162101151000040100100001000010755034607175463162401216791212851216111138643114149601003020020000100006020020000100001216201218821150201100991004010010000100000100124171139020891208636311324221823311239610918419000032104834412114850164850838664100001000050100121212121560121408121317121375
602041211989060000000000242602239101248123612144222310121531257033450296100811000040100100001000010755504593158462437301215301214191214601132583113567601003020020000100006020020000100001216351213611150201100991004010010000100000100124352298208212070369114247986339124108509000000032104834412157250196832869764100001000050100121328121619121435121569121484
60204121570910000000000022470228710123211281214032191012112525703855031210101100004010010000100001079017460648446279311121505121749121678113383311406660100302002000010000602002000010000121328121419115020110099100401001000010000010012418835221011201735711124222263151243711208300100132104834412132450194832552855100001000050100121621121255121333121532122039
6020412135691100000000002227352226210124003161213382203012102225703345033410116100004010010000100001076297460231446244410121337121841121580113490311393660100302002000010000602002000010000121440121676115020110099100401001000010000110012433834320951205833311324131843241241910608500000032104834412143850218739728559100001000050100121574121434121354121603121539
6020412130091110100000002372022361013280236121575220801208922570428503321009810000401001000010000107449545995174626612012148412152512130911336031136466010030200200001000060488200001000012137312127511502011009910040100100001000001001240403472087120594031132427563301241111009500000032124834412100550190708783327100001000050100121441121376121352121208121288
6020412157291200000011002415022711012000176121531222101210012570403502841009410000401001000010000107449746071054631267012136912154712110211352231139856010030200200001000060200200001000012133312147111502011009910040100100001000001001240603302082120563591112403923301242295096030200032104834412091250224757776853100001000050100121321121587121279121329121515
60204121578910000000000023300226110135202361215812233012124825704575026410083100004010010046100001075739460946846215300121324121310121302113232311394460100302002000010000602002000010000121487121486115020110099100401001000010000010012429034021071209135610824152043271244498010400200032104834412103650236790928661100001000050100121575121487121600121452121694
602041216889110000000000234402251101208114412134121970121014257042150296100951000040100100001000010743084579451464115801214621217011217271134263113970601003020020000100006020020000100001212191215941150202100991004010010000100000100124340368210612004345111242217031812419118010500000032104834412134550198807818648100001000050100121314121741121321121495121324

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1574

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2022293a3e3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cdcfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
600341203589072000000239402259112482200121854220460120442257037950270101331000040010100001000010758454613896463975611121844121876121538112860311343260010300202000010000600202000010000121728121740115002110910400101000010000010124251037120891208140511124382283111248010349200403140004782212137450236945783692100001000050010121587121618121812120963121624
6002412184091230000002279023011120011521215882197501204312570292502401011710000400101000010000107443246093044651330001213181216121216451135863114141600103002020000100006002020000100001216071214651150021109104001010000100000101243483802057120074021052424503181242211107700003140001782212109450250404860692100001000050010121823121584121452121506121712
600241216289091000000237602226114161204121556224960120928257040950214101041000040010100001000010768444605260459879901121501121670121046113787311427060010300202000010000600202000010050120968121385115002110910400101000010000110124281637220551207136610824161743301240790211900003172002782212116250174627756760100001000050010121512121361121700121359121662
600241216069121000000247502262112561280121537221602121144257028950130101231000040010100001000010763314610757464143301121423121937121863113401311396260010300202000010000600202000010050121570121861115002110910400101000010000010124369350209712019404107242325431912431126410700003140002782212154650200936795841100001000050010121717121588121493120744120994
600241210849101010000248135222551124831241214182238231213082570340502461010210000400101000010000106948246170744641255001214991213721215651134623114468600103002020000100006002020000100001215851215481150021109104001010000100000101248816333204612030372107244028232612424105210500003140002782212169550214917890917100001000050010121547121720121639121016121896
60024121558911200000022040230011424224812171822022112117725703525020610116100004001010000100001074693462035946518180012160112169612089311307831140016001030020200961000060020200001000012194212166611500211091040010100001000001012413183772062120163921102444502951242551610000003140002782212151850162882725732100001000050010121532121398121031121983121475
60024121634912220000024170229411416224012162522554212135025703825028410104100004001010000100001079985459920046482920012179412155912182411387931140416001030020200001000060020200001000012146012137811500211091040010100001000001012425163692054120334041092432254309124611111101001031400037822121613502107951078683100001000050010121572121378121560121415122003
60024121855912202000023540227011416342412107023413212130025701695027210132100004001010000100001076133460436146302180112091212188712123511371031140176001030020200001000060020200001000012191812162011500211091040010100001000001012420183682085120913991082414662961242710644700003140002782212072050228981483931100001000050010121714121757121445121296121589
6002412144291120000002349022751123221001210812202151214822570364502521009910000400101000010000107176846065484635800001216471213801217161134323114378600103002020000100006002020000100001217471215171150021109104001010000100000101243216336204812091361109242621032212404115510200003140002782212103850202396965714100001000050010121661121758121867121595120816
60024121564911100000024828822661124812921217202190601211462570358502301010610000400101000010000107627746108744645692011217671208381213791136193113899600103002020000100006002020000100001215251216231150021109104001010000100000101241613742101119903681082396563001239810429701803140002782212136250160822917782100001000050010121716121713121543121706121541

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ldp s0, s1, [x6, #0x10]!
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1614

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e191e1f202224293a3e3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
602141212709130202200237902257101424050812153322190012033825704455031710113100004010010000100001075703457384146391980121660012149512172211378831139156010030200200001000060200200001000012076712132011502011009910040100100001000001001248353832116120443891102406563391240698011300032113833312130350254872330341100001000050100121471121666121689121613121781
60204121483911010000023290222310141602241214112235001213682570358503221005510000401001000010000107716146019714630496012162701214081216161126753113998601003020020000100006020020000100001217151218581150201100991004010010000100000100124142381215112012389110242476330124054209900032112832212135050188636270669100001000050100121695121507121589120655121492
60204121285909010000024710227310140012121215572197001212112570418501881009610000401001000010000107731445765514633266012071501216961217711134103114131601003020020000100006020020000100001214811213891150201100991004010010000100000100124181437020581201740411724345033612392111010000032113832312136650222802857682100001000050100120741121556121943121527121589
60204121727910020200025000225810119212281218102225001212952570292502741011210000401001000010000107621646057874635734012124801216551216741136753113727601003020020000100006020020000100001218001218051150201100991004010010000100000100123891438320631201340711324164632712487118010200032113173312130150206300830698100001000050100120807120499121410121588121447
60204121894911010000025140229410139202481214672205001211112570442503181010410000401001000010000107555046030764630927012144901214441206481134943113675601003020020000100006020020000100001214021214871150201100991004010010000100000100124212378209512063352113241952309124327908900132113833312031850214690736634100001000050100121699121632121661120669121687
6020412124590802020002414022621013680272120791218901123077736708915061110201100554395411109110631138154467754847035250124141012479012494511499529911580865643335962226011146670462218011145124538124415291502011009910040100100001000001001247233562134120014041102414643251248013811352043732819755123088506231255898911100001000050100124922124030124936124698124109
60204124717935120221166853164213910128822161251032215001226847447086350723101751004643675109911000010749744604953462862901216090121364121565114872311380860100316872072410245633922209410204123589122750215020110099100401001000010000110012458440121281200839911024343343341241510919810032113832212167750206857762687100001000050100121720121920121431121705121767
60204121855912030300025090225701125621881215662265001210872570466503761011710000401001000010000107710746019494636024012151231214561217061138532311386160100302002000010000602002000010000121647120795115020110099100401001000010000010012420113762140121093661112423503311241013419712032113833312145850236630800729100001000050100122013121611121334121919121647
602041217869120300300258102333011240220812200922150012142025702025032210100100024010010000100001087553459655746377730121699012155712163211262431139276010030200200001000060200200001000012155112139311502011009910040100100001000001001240711371205112009392112241564328123961181118110032112832212128050220727806735100001000050100121634121771121689121686121570
6020412158591403030002470022360112482480121528223000120884257044250336100981000040100100001000010752804604678463593301216050121668121521113550311430360100302002000010000602002000010000121477121447115020110099100401001000010000010012437113802056120073991102435502201240845111810032113832212150950232736588678100001000050100121818121877121475121635121045

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.1545

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e2022293a3e3f4043494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5bbl1d cache miss ld nonspec (bf)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60034121235910220002262222811448620012143722071112120925702895021610115100004001010000100001074828460388146335271216170121151122065113957031139616001030020200001000060020200001000012134312134011500211091040010100001000001012424038620941201340710724207631512392130097000031403783312139350222760873877100001000050010121542121694121575121380121344
6002412191891110000235022631140063841212662194001212072570322501941010510000400101000010000107673645947464640435121715012170912162511324903114113600103002020000100006002020000100001218301216591150021109104001010000100000101241883472102120193971102403222319124101232117000031403783212095250266951669729100001000050010121654121893121428121573121641
60024121912909100002345225011328247212171222420012122125703375022010083100004001010000100001077177460537746398841214550121420121839113773031141916001030020200001000060020200001000012178812133811500211091040010100001000001012428838320391201240011024407433012416840106020031403783312145650204878859685100001000050010121724121871121636121605121825
600241215049111000023532240112082176121611224810121575257037950194101021000040010100001000010747024604637464427212168601216611217101134270311392860010300202000010000600202000010000121780121491115002110910400101000010000010124088352206612026395108242031834112431100196230031403783312105250238837758600100001000050010121366121755121861121592121727
600241218449080000022312251114721144121439220510120715257033750172101091000040010100001000010754584603633463396812113401216741213751136750311383060010300202000010000600202000010000121418121619115002110910400101000010000010124250388211312027403109243245831512420971115000031403782312043050202746645670100001000050010121607121343121577121718121442
6002412150691000000239422521141621321217012236101210332570151502041009910000401521000010000107686246139834639852121902012143612146911334903114028600103002020000100006002020000100001213511214291150021109104001010000100001101242203822066120393481082419523251241086190000031403783212127950188885809694100001000050010120635121626121421121549121175
6002412124090900000239422701124011321213882266101209042570295502201008910000400101000010000107192146098374623210121473012119412159411330103113717600103002020000100006002020000100001216771216941150021109104001010000100000101241203572065119994061112418134438124161151128020031403783212148350208661795647100001000050010121520121411121431121630121684
600251215459121000020852228110243224121759224000120870257033150242101141000040010100001000010749094606285463535212168501215511215801134500311403260010300202000010000600202000010000121950121516115002110910400101000010000010124368386206912018379107240742433312420761107010031403782312140250202981749691100001000050010121713121442121719121686121431
600241213989121000023172237112402961215112197011216332570340501661011810000400101000010000107868046155174630190121859012183612164011367003114061600103002020000100006002020000100001213391219001150021109104001010000100000101243503772062120074021132410184318124331091115000031403783312152450206901903645100001000050010121633121485121360121330121432
600241214479120000023222226114001244121152222912120898257028950172101191000040010100001000010758814608120462935112116701212051209901138530311409660010300202000010000600202000010000121391121843115002110910400101000010000010124300395206412007396111242325631912400951107000031403243312136750200734748553100001000050010121795121576121596121350121813

Test 4: throughput

Count: 8

Code:

  ldp s0, s1, [x6, #0x10]!
  ldp s0, s1, [x7, #0x10]!
  ldp s0, s1, [x8, #0x10]!
  ldp s0, s1, [x9, #0x10]!
  ldp s0, s1, [x10, #0x10]!
  ldp s0, s1, [x11, #0x10]!
  ldp s0, s1, [x12, #0x10]!
  ldp s0, s1, [x13, #0x10]!
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3784

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f181e2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606267696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16021430036227330000102452205118727624830228219514991774421402516021380221800008010080000400949134525900118303363037530261102633101341601002001600002001600003012330143118020110099101001008000080000110082408162229310108252023081122353730338485071769122298230890025110116113030322801163263401108000080000801003041130394303723039030189
1602043013822730001110077220211832704883048721501388988242135251602028039680000801008000040101313462400011730310301813032210147310232160100200160000200160000304053021111802011009941001008000080000110082408921663573082752231210723807403469853787239729983088022511011611303782580122375349828000080000801003023630133300993022630229
1602043029922810001099602273117766555630268214115886099417672516020780212800008010080000400969133286710122301473016230262100513101151601002001600002001600003015530295118020110099810010080000800001100823929224730160827902371106240446228728512279511423732930002511011711301992380107340329948000080000801003016630002303703019230265
16020430070226101010983922401165674272301922164154746872173925160219802058000080100800004008941338298101113007130414302231011931025016010020016000020016000030144302341180201100995100100800008000001008240118222120190829492311109240624832338521671011424833383002511011611304592880109325219818000080000801003025130139299943024730301
160204302742261210101019721411165667340301512134169791724147925160219801948000080100800004010081340782101053021730260301601016831024516010020016000020016000030185300491180201100994100100800008000001008238424221731180823882364111240350437298485493013524252909002511011711302392980108340283958000080000801003012530355303173026930252
16020430283226300010105212246116488924030234211113386277519762516021980208800008010080000400980134288610107301653035530121101503102061601002001600002001600003046630171118020110099101001008000080000010082376242224319008238723221072404544393585252896122263933160125110116113031121801092513721108000080000801003016330100300593007630384
160204303312263000101058722031141674132302342150198908978191925160214802078000080100800004009201338039101013010130448303561008031038116010020016000020016000030295303021180201100998100100800008000001008241224220828720829632330113240023841688538383911631753023002511011611303282080136313350788000080000801003022630411304503030130206
160204304032273000109901220611824715043028721151789971079197325160208802078000080100800004010091342703101233029230210298961034531017016010020016000020016000030390304051180201100991110010080000800000100824193322243211082497236510724063183477849477218631713408001511011611303561680105334374958000080000801003034430288302783026430160
160204302612263000101061521661158470256302142155133107511092046251602308022780000801008000040094113464531010630359302443029810268310348160100200160000200160000302643031511802011009941001008000080000110082406272248389208251423541072404246405985408921133350031421502511011611304612280100324346448000080000801003044730421304493024430299
1602043031122833000010538220011848845163034621869492589420352516021680220800008010080000401051133333110130301923045330144102403103201601002001600002001600003044030156118020110099310010080000800000100824142722743962082618234411324122303501853308651172717357316015110116113011826801304182941238000080000801003032530503301683035030400

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3779

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600343015922730330098350221611376911323034322191728486961862251601188012280000800108000040059913355400011230424302773048810254310149160010201600002016000030275301471180021109181010800008000001082428202212304282455232111024277422920852357529524393004034502031621174303772780133429327918000080000800103031730198304043036330358
160024303042263000101047502181116487214830257220116070769020542516011780269800008001080000400564134417900903020730301301701024831021416001020160000201600003013830369118002110929101080000800000108241512225730678238523351132433506294584938725117256430590345020416000563036820801183843631008000080000800103031530146302423018130152
160024303192272102009495022361144879256301652220173716725194225160109801218000080010800004005051344211001053037430311303741029231035616001020160000201600003052430313118002110927101080000800000108241924228031988250723381132385506313884946775133214537150345020716000763048219801003863801048000080000800103028030185302763029630304
160024302202273000001033702202116567428030215219714970476919417116011480117800008001080000400493133481900108304173023830155100953103161600102016000020160000302443037611800211091510108000080000010824382021972787827312277108241775229398514569111821542629004502071600067301112980100321303918000080000800103027230199302723029630141
16002430154226100000975502237116168824430100219415874972019082516012280117800008001080000400510133682800119302783010530353101753100981600102016000020160000301843037911800211091510108000080000010824031622483357825902340114240975626188472472211823082688034502071600076303032780120381333748000080000800103021030127302393024630194
1600243025822510000097660222011424802523035022031875347791907251601278010980000800108000040044913410400013730453303913016010317310213160010201600002016000030242301091180021109101010800008000001082440162267248382474228511124067502843850527141042413298400450206160006630148980116301397878000080000800103021730029300873008630221
16002430150225100100958702185116085818430245219713139248616812516012880118800008001080000400528133975911123301713014730251102413101941600102016000020160000302793046811800211091610108000080000010824448225431118239123401122418788288584646810123241925380345020716000453026919801183903831148000080000800103050230401302533030130290
160024302252263003009988021991162499260302952168170591801200425160114801378000080010800004005581329080001093034430194302821012331020416001020160000201600003021530230118002110921101080000800000108242018222536178269323301102424506293685235737125236631500345020916000653013228801223723081048000080000800103035830241300813009330263
1600243018522620000010196022241164078260303992164174788747209225160126801328000080010800004005161348259001253030730278301691026731035116001020160000201600003037030318118002110919101080000800000108241922221429748272423591092417508322085073701121271329371534502071500077301981480089392333968000080000800103026930289303043030730198
16002430205225202001100240221411656742603029521812026847402040251601188010980000800108000040049313463160010630181301483035810296310300160010201600002016000030241302441180021109910108000080000010824171722503169824682294111240950630788479785211625182784044502071600037302152580112379425868000080000800103034130311301793028930169