Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PSTL1STRM)

Test 1: uops

Code:

  prfm pstl1strm, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100415951233193424801391597873251000100010006955211573160113053146010001000100015701573111001271226622623266024642283100073116111518100016251600159115961573
100416351234163424791181544916251000100010006822501580160613183145110001000100016011579111001269226222623261024612260100073116111485100015741579156516151596
100415951234173324491311559879251000100010006893701584159212923147510001000100015691570111001250226822593299024602270100073116111495100015931631158215901617
100416131233173324711261558866251000100010006923101542157412863142710001000100015961558111001238229023033286024452279100073116111513100015781610157116021575
100415981232183324601211585902251000100010006953811592160213253145910001000100015671595111001259225922903264024782257100073116111486100015941569158716271618
100416291234163524391181585870251000100010007023911600161712823143210001000100015771569111001258226522653236024412266100073116111498100016271592157215801574
100415711233163324461311602862251000100010007069811603158113013144410001000100015821601111001263227822803267024682276100073116111507100015701580157715871587
100416101233163224561201578872251000100010006926411590160212813147210001000100016081613111001247228122553266024552273100073116111485100015771626158416161618
100416061234183424481221588882251000100010006891511575159212783147510001000100015731602111001247230522313271024462280100073116111504100015731572159415901634
100416101232173224531411575912251000100010007030411589159912853145210001000100015851590111001259228523003235024432262100073116111530100015711573162915731614

Test 2: throughput

Code:

  prfm pstl1strm, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5777

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20204157741183351843372427998241568597864820218102081000010100101141350767432934249127801579715880131437132562010410220100161020810016157281551120201100992594100101001002263322495324400243672273210000111131701600157171011110000101001576415732158571581915904
20204159201193371813342425698001593698992520208101841000010116100001327357335613249127551575215886131416132492010410216100081020410008158331561120201100992427100101001002258622485325260241442263610000111131701600157021010210000101001588515828158471595015725
20204158191193331783302441698651584598212520205102141000010100100001331057369353449127171566915923130963133122010010200100001020010000158271631120201100992481100101001002265422658326040241392270010000000131011611157821013510000101001575515799157481582815786
20204157581183311793372414698311565197872520208102081000010100100001322657412373349127351575315788131703131792010010200100001020010000157741571120201100992565100101001002254222650325691243232264210000000131011611156351035610000101001576515792158191580315840
20204157871193301813342438398061570798812520211102141000010100100001337857435604749126771568215738130033133392010010200100001020010000156471561120201100992621100101001002256522592325871241732263810000000131011711155871011410000101001581015795157371570015796
20204157631183301833342434998081574898722520199101721000010100100001320807390483449126631572315725131383133152010010200100001020010000157531571120201100992481100101001002251222623326060242112259610000000131011611157361010510000101001578815803157461572515807
20204158061193381813312421098021572997802520208101961000010100100001318837419693149126891570215850132443132852010010200100001020010000158251561120201100992509100101001002268922666324210243802258210000000131011611156681009310000101001579215803157661572915783
20204156731193351833312428798871577098402520241101961000010100100001329097425203449125941581415782130693134072010010200100001020010000156761561120201100992547100101001002255322619325650243192260310000000131011611156011009910000101001592115895157481570515851
20204157781193291803342430498091569898892520190102111000010100100001333907399273849126581574115791130723132502010010200100001020010000157061621120201100992628100101001002266322465325560243622255910000000131011711156941056210000101001577115820158631576115772
20204158421183381803322416497511583698312520214101991000010100100001341987404133549126761565615662132163132602010010200100001020010000158781551120201100992551100101001002264222617326130243792250210000000131011611157541010210000101001580315688157971577815816

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5698

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20024156751173541823542464699161567098662520151101661000010010100001336137325070504912670157531566812901313125200101002010000100201000015844165112002110924501010010102299322785328450245012279810000127021621155131015010000100101572915821157811574615870
20024157921183451793422466098891560895782520124101571000010010100001313217359920434912549155931580313173313151200101002010000100201000015786166112002110923271010010102293722997325850243832279210000127011611155991015910000100101578815669157381589715711
20024157301183491833472451399881582798662520166101571000010010100001309927341240424912605156851571512867313378200101002010000100201000015835156112002110923831010010102303122898329280244942281810000127011621155951013510000100101580415578156671558515807
20024157931183441913522451699381579496612520121101241000010010100001337937313300434912655156721566012986313198200101002010000100201000015640155112002110924101010010102279822964327380246182297310000127011621155201007510000100101573415797156581580715839
20024156681183591773542450699011577596062520160101571000010010100001317377381870404912593157271573913033313146200101002010000100201000015678155112002110923171010010102275622971327480245352287810000127011611155881011410000100101567815559161541573215771
20024158071173471853522437199041559497672520109101421000010010100001330327349200384912613156711575713168313418200101002010000100201000015721155112002110923911010010102278622627329230245162288610000127011611155511013210000100101578115761156931567415812
20024156931173521843452465999081569496902520163101421000010010100001317697334100444912802156811572013045313198200101002010000100201000015648154112002110923451010010102278522648331070246962288710000127111511156571014110000100101569415597157011573115601
20024156431173531853432460398551556497482520169101691000010010100001320577356280344912712156291570213038313210200101002010000100201000015792147112002110923141010010102287222940329320244772276510000127011611156761015910000100101583115669157381568115851
20024157481183491883472451398611571297122520136101511000010010100001328607417350594912572157771576112979313066200101002010000100201000015609154112002110922791010010102271722764327360244322288910000127011611154801015310000100101573215709155691567615762
20024157261173581813532449598981565497002520112101241000010010100001311637359670404912771156511589612986313147200101002010000100201000015693156112002110922701010010102280122981328670247132299210000127011611156811014410000100101579015682156421578415727

Test 3: throughput

Code:

  prfm pstl1strm, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5536

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10204155291162881432852395213551549895912510100100100001001000550072879904912419154311556314080714227101042001000820010016154601225611102011009926851001001002221022241322652395922164100001117170240015440100001001556415527155891550215550
10204155361162901432892395413431551395682510100100100001001000150072594704912412154961551814033614215101002001000820010008154781220411102011009927291001001002217022266322502403422229100001117190160015403100001001543815635155651560715565
10204155701172881452902395913281550395402510100100100001001000050072847104912379156421556914105614272101002001000820010016154341233911102011009927821001001002219522238322152402122262100001117180160015345100001001556815499155551549415563
10204154891172881462852397213901543895402510100100100001001000050072780104912337154831552314097614193101002001001620010016154341233911102011009926681001001002224822204321292399422200100001117170160015371100001001543815498155651558115512
10204155261162881442902399413561562195422510100100100001001000050072981304912337154831552314095714272101002001000820010016154341225311102011009927281001001002224022204321662396622240100001117190160015390100001001554915558155201554615472
10204155201162871432862401013101553696042510100100100001001000050072686904912474154791564614118714224101002001001620010008154551221811102011009926891001001002223922260321802392622280100001117170160015359100001001561015629155401557515579
10204155831162921442892391113481544396942510100100100001001000050072135604912426154981549314094614219101042001000820010016154151221711102011009927251001001002222022227321992391122164100001117170160015421100001001543015549154741555015518
10204155241162901422902399613111554796172510100100100001001000050072853204912383155411558714056614232101012001001620010008156081229611102011009927141001001002227722213322362389822238100001117190160015453100001001555715536155321553815600
10204155231162901432882393312901539496432510100100100001001000050072570804912507155691557114021614263101002001001620010008154581220611102011009926391001001002221022240322552395322149100001117170160015403100001001563715503154701551115518
10204156091162881442882401813421551396492510100100100001001000050072357304912409155541539813972614214101002001001620010016154371226011102011009926961001001002218422275321772393722187100001117170160015430100001001558615533154891552615495

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5528

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100241549111729314729023965133815545958025100101010000101000050725467049124841549815655141133143691001020100002010000154621546611100211092640101010222382234932227023952222841000064481611101546610000101549515557154871559315687
1002415633117291147296239561341154809632251001010100001010000507268390491259215555156331407631419810010201000020100001552215466111002110927421010102225122256322300239832230010000644101610171545510000101538715584154771548815516
1002415606117295145288240671315154909659251001010100001010000507227901491247515623155661413831431310010201000020100001554115709111002110927091010102223422261323810239652228410000644101610101544210000101550515609155321556715658
1002415644116294148293240741350155629648251001010100001010000507269391491251615473155461411731421710010201000020100001554215578111002110926531010102225322277322310240252234210000644101610101542210000101556915527155331552115505
1002415464116294145296240011320155159593251001010100001010000507290191491239815529155361411331426410010201000020100001545115476111002110927221010102229522233322800240312220510000644816881544110000101554815475155321560215624
10024156091172921482932400813431555195632510010101000010100005073125414912445155541557914147314261100102010000201000015453155231110021109271910101022245222873227102400322217100006448166111546310000101554615639156401561715628
100241557411729214629623899134415486953925100101010000101000050723327149124801553315473141063144141001020100002010000155351551011100211092753101010222352226532240023956222691000064411161051541810000101551115457155831552215387
1002415617117295147295240051366155639713251001010100001010000507291761491249915436155311413231425810010201000020100001550315525111002110926681010102220822316322260240262233310000644101610101546010000101556815502155711554915503
100241547411629614729324008135315520963225100101010000101000050728528149125971551215563141503141921001020100002010000154741545111100211092737101010222732219332222024045223361000064451611111537210000101557115500154531551415480
1002415587117290145293239911355155419551251001010100001010000507261691491250215513155341415531418710010201000020100001544315412111002110926381010102224022219322500239702227910000644101610101555910000101551215525155581550715674