Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PSTL1KEEP)

Test 1: uops

Code:

  prfm pstl1keep, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)181e3a3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1004160112361634024801121560878251000100010006949611577159013133146810001000100015671580111001227226722993281024372271100073216221521100016061611160815951598
1004160212341835024551281594924251000100010006931911559163112823147910001000100015761570111001255228122923266024602288100073216221538100015991624159916211638
1004162312331735024461321574913251000100010006926311598159513193148210001000100015831586111001274230822823282024552276100073216221517100016011599156916041606
1004161112331735025021131618883251000100010007023611603159813033148310001000100015901576111001223226322573281024982282100073216221519100015721642161316201597
1004160912351833024611271575885251000100010006942311582160013253148410001000100016021559111001256229922733286024452286100073216221515100016111622160716191619
1004161812331534024631131595885251000100010006946311586161613133149410001000100015921565111001246227622543285024722288100073216221524100016121594157815941631
1004159512341634024651381603872251000100010006923811587161413073145910001000100016191580111001255231322933267024462282100073216221511100015961587162015961595
1004160612341534024681371581891251000100010006895611597158312873144010001000100015861601111001249226723103273024822293100073216221487100016441604163615951637
1004157512351634124601411621868251000100010006950711624160013273148010001000100016341567111001249227722863307024692285100073216221499100016131580159416111603
1004158012371835024641181583887251000100010006838811592160613183145310001000100015891603111001236229024063307124712299100073216221507100015691626160016021599

Test 2: throughput

Code:

  prfm pstl1keep, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5757

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20204157031173421830347245630997715624988725202021022910000101001000013459573983402749127271589915630130123131912010010200100001020010000157161561120201100992421100101001002274022703327510024678226561000000131021611156091011710000101001582115905159251599415843
2020415898118352184035124530196998615862990325202241019910000101001000013274973638804949126881568815870131163132802010010200100001020010000157801551120201100992479100101001002273222801327841024526227641000000131011611156371011710000101001586215690158631572515758
20204157391183441830336243860993615700976825202411020210000101001000013175773894004349127571574815660131213132972010010200100001020010000156221571120201100992377100101001002269622796328660024618228401000000131011611157381014110000101001575315732157591579615748
20204157701183461840347244190994215854977025202171021110000101001000013243374450903349125651581615802129983132472010010200100001020010000157631651120201100992361100101001002271122749328170024670227201000000131011611156261012010000101001577515866156651586215722
20204158461193451810342243390993715649981525201841019910000101001000013230373869813549126901580015685131103133032010010200100001020010000157361561120201100992490100101001002273822877328070024304227941000000131011711158001009310000101001575115820157051569115763
20204158581183441850345243550974915804987425202291021710000101001000013321773707913749127501588915794130433132962010010200100001020010000157111561120201100992467100101001002277622755328590024571227441000000131011611156841009310000101001571615790157691587415657
20204157531173551830346244530984915874970225202421017510000101001000013258973399103649127231575915828130443132312010010200100001020010000157451561120201100992537100101001002266322625327030024437226861000000131011611156261012610000101001575715733157401565315804
20204158421193401830346245560986215716980225201991020210000101001000013259473948813349128351575715724130903131062010010200100001020010000158371471120201100992564100101001002275322710327110024347226181000000131011711156311010810000101001567615848158891568915816
20204156361183471840346243530993615669978325201871022310000101001000013310773300603949125761562115754131033132072010010200100001020010000158941571120201100992458100101001002267122714325931024507227181000000131011611155561009010000101001574015876158171570915653
20204157461183441860338245920984615690960725202111024110000101001000013139274043402349125351559515791130353131932010010200100001020010000157691561120201100992400100101001002284122675327918024548226411000000131011611154741010810000101001575415757157571570515779

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5725

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200241561411735718035724760099671563197612520130101421000010010100001320807304980046491260715769157141291831319220010100201000010020100001563714411200211092319101001010228592285832978124499229461000012702161115634101410010000100101566315818157001580515664
200241565211835818835124687098441573397262520148101061000010010100001326287378940049491263415803157631300531317020010100201000010020100001562015711200211092396101001010230292302932844024698229751000012701161115575101470010000100101567215607158111562515680
200241567411935718335624576098631569397332520151100971000010010100001317767399150044491267915671157601292131316420010100201000010020100001564214811200211092450101001010230312305432980024723229241000012701161115473101230010000100101569815715158001559115557
200241576011835917734924546099191578197822520133101211000010010100001331657367500038491260615630157141300731312420010100201000010020100001568414511200211092375101001010229652297232967024499229311000012701161115504101230010000100101578515700157601561515653
200241574211735018035824674098721574096052520142101631000010010100001329387397060058491255615717157131294431322920010100201000010020100001568314311200211092358101001010229902286232859024676228471000012711161115564101260010000100101563315751157511563115629
200241575111835217935024567098651581197892520133101541000010010100001327597300900053491264815857156911301231321020010100201000010020100001575714111200211092418101001010229292289233079024584228491000012701161115567101230010000100101575415696158291570715715
200241593211835218435424535098551565798632520121101571000010010100001313587410700053491256315612156361307231311020010100201000010020100001569715011200211092462101001010229162294532879024752228611000012701151115629101560010000100101572115668157231574915751
200241569011735117935224702099321580098922520151101331000010010100001340517417900053491268015592157021304131311320010100201000010020100001568515411200211092341101001010231032281832986024606229241000012701161115590101440010000100101582315760157111561515506
200241559211935518335524539098511568297402520133101241000010010100001330267347560046491255215636157351303631314320010100201000010020100001559614311200211092383101001010231082292132755024690228021000012701162115595101140010000100101560115619156781566515639
200241568511835318536124592099581566296592520151101481000010010100001327237328300138491260515774155891290931313920010100201000010020100001571814721200211092562101001010227752290532923024431230831000012701161115523101470010000100101577915727156531572215542

Test 3: throughput

Code:

  prfm pstl1keep, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5487

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10204155041153211633152460412501548395252510100100100001001000050072234449124311546215465139707141661010920010016200100161547612296111020110099275810010010022666226643263312444522557100001117170160015457100001001538615485154671550015466
10204154141163261673182455912921544895602510100100100001001000050072519549124131547415431139426141241020820210000200100001545212203111020110099273010010010022666226953262602446422602100001117222242215350100001001541715404155181539315480
10204154731153161603202436012831547995762510100100100001001000050072308749124331548515458140256140651010020010000200100001549112226111020110099268310010010022644227023252502438522632100001117222242215374100001001551115495155191547615401
10204155171153181633142442812261547095242510100100100001001000050072408149123881551615430139956142151010020010000200100001543612308111020110099272010010010022711226663265202450122653100001117222242215408100001001545515454154361538215564
10204156071163201603212447413351550594892510100100100001001000050072846749123351548515517139456141941010020010000200100001545712254111020110099276910010010022650226143271602446122730100001117222242215426100001001549615394155231547615420
10204154811163211623172445112431541094892510100100100001001000050072558949124231544715387140006141391010020010000200100001548712280111020110099271110010010022635226293264002456922694100001117222242215376100001001545715454155181547015476
10204155211153191633222445812021544895972510100100100001001000050072370349123261551815495139996142061010020010000200100001545512172111020110099274110010010022611226933259502445022694100001117222242215318100001001553415490154851543015402
10204154911163231633212454212641544695702510100100100001001000050072819349123671547515466140226141611010020010000200100001541812267111020110099265910010010022716226923265802447022615100001117222242215398100001001548415447154191550415409
10204155041163191613192444712701554195602510100100100001001000050072574949124141532815573140396141021010020010000200100001545712251111020110099270710010010022580226803262402446622650100001117222242215362100001001545915497154631551615438
10204154641163211623202435012311555995352510100100100001001000050072234849123121547015417140096142091010020010000200100001555212183111020110099271910010010022693226413271902442822667100001117222242215388100001001554715464155051550115461

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5533

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002415513115292145290238891361155379642251001010100001010000507283001491252915574155761415331430610010201000020100001547415499111002110926731010102222922167322240239102224410000640316331536710000101556715532156581542815558
1002415498116296145291239471357154719641251001010100001010000507258681491247515605155151406431427710010201000020100001557115428111002110927131010102226822209322320239722226610000640316331548410000101554415616160251561015524
1002415542117290146291239231318155209519251001010100001010000507255281491244015557155711417231423710010201000020100001544915488111002110926591010102219522245322150239982222810000640316331545110000101558415491155211551015413
1002415600117288147290239691293155329630251001010100001010000507273381491253815583155331421431423010010201000020100001536915443111002110926911010102227022283322080239702216210000640316331541810000101563815621156141554915546
1002415481116292145292239981332154469545251001010100001010000507272981491238715521156131410931420310010201000020100001544415449111002110926981010102228422314321311240172223210000640316331540210000101552015585155881545015640
1002415441117287145294240541315157159600251001010100001010000507330220491267515476155911416531442210120201012120100001555015511111002110927011010102223922243323060239832218410000640316331540810000101554315510156471563115594
1002415540117294144291240571364155759528251001010100001010000507248781491248915544155931413031428010010201000020100001543415472111002110926731010102222522213322880240402220310000640316331548110000101547015555156011560315496
1002415505116290144292240381351156239551251001010100001010000507268361491242415515155441413731430210010201000020100001548515454111002110926461010102230122252322861239642215310000640316331542610000101555115561155391554815552
1002415499116288145284239791347155529590251001010100001010000507226951491237015611154991417331432910010201000020100001548415480111002110927061010102223622216322340238752218510000640316331536010000101550515539155801554115547
1002415608116292146290239251310154959542251001010100001010000507288431491236315435155841404831425910010201000020100001547715518111002110926171010102229922247322950239692227610000640316331546510000101555515474155601550615512