Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLIL1KEEP)

Test 1: uops

Code:

  prfm plil1keep, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100416181229153023881590928251000100010006936615891618130431473100010001000157015831110012522207223032370024002214100073116111504100016031589161515931619
100416271230143023791607876251000100010006937615981616130131472100010001000159915971110012052221223332080024072231100073116111497100015931618161915981603
100416001229143024041556881251000100010006995416091591130831459100010001000159315861110012612231223232320023762215100073116111539100015941599160815921613
100416041228153023941608914251000100010006938616121611129131450100010001000158715761110012122242222132490024002228100073116111495100016191603161716251635
100415641230153023651599926251000100010007036315971636130331446100010001000157815871110012482214222532180023882216100073116111507100016241620161416231620
100416261230153024011592863251000100010006933415781605130931479100010001000158915761110012422236223132270023752241100073116111533100016241563161516331591
100415871330153023801590920251000100010007081215761604130931486100010001000160915881110012312228222132150023862250100073116111494100016021592159216031589
100416261230153023951623884251000100010006983516131598129931447100010001000158315881110012392211221532210023712231100073116111500100015791568161516131588
100416201230153023841601892251000100010006971316161570128731465100010001000160515921110012302214225132171224052188100073116111514100016171614160116101596
100416191230153123601579888251000100010006985115841597130031450100010001000159515951110012332213220532040023812236100073116111513100016121602159916191572

Test 2: throughput

Code:

  prfm plil1keep, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5754

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202041567811834818834424360115635983225202201021110000101001000013079373588829491269201566015665129363132202010010200100001020010000156971551120201100992353100101001002277922626326980243692287410000131331633155911010810000101001562015718156961569715790
202041569911834118334124525115712960625202171020810000101001000013191573708538491260301576115709131083131642010010200100001020010000156611541120201100992425100101001002284022812327510244002278310000131221632156191011110000101001580015735157721565415624
202041573911834318534524505115759973125202381022610000101001000013391473186137491259301579415702129373132802010010200100001020010000157241561120201100992354100101001002285622757327330244412270310000131231623155871015010000101001584315690157451568215732
202041574111834418934524404115696984425202391018410000101001000013144773666740491265901569415675129253131482010010200100001020010000156611641120201100992435100101001002265522775326481244562276510000131231633156091014410000101001565215848157381559115695
202041570411834418234524477115702994725201961020510000101001000013346473289941491250601568215781129033130442010010200100001020010000156431551120201100992522100101001002278122651327540244632266610000131221732155741010510000101001580115773157071564915737
202041570111834417833624607115769974425202051023510000101001000013150574410438491260401568015900130023133962010010200100001020010000156981541120201100992437100101001002280422670326630246222267210000131231723156981009610000101001576315726157281583815700
202041575511833818434524427115716965125201971019910000101001000013254473827724491265001572215763129673131612010010200100001020010000158571561120201100992406100101001002262322581327001245872281010000131221623155301009910000101001574815794156721580915752
2020415814118344181339244991156529888252020810196100001010010000132414731080424912667015656157931315131323620100102001000010200100001570415611202011009924731001010010022730228543284218242562275010000131231623155711010210000101001583915716156991567915780
202041572511734218533724427115699972625202261019910000101001000013247074004437491264701571715752130323131672010010200100001020010000157791561120201100992432100101001002267022907326360245252271410000131231633156201009310000101001563615646157281567315913
202041575811734318734324435115792959125202231022610000101001000013341273701431491266301580115766131163131612010010200100001020010000157431541120201100992555100101001002278422696327360244802281210000131231733156611010810000101001578315820158351582015772

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5760

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2002415851117340178339244401583097342520148101511000010010100001339527348570424912702156811571213056313221200101002010000100201000015656143112002110925871010010102272222657327300244902266810000127081655156281013810000100101584015749157741573615788
2002415836118333178343242291579497922520163101721000010010100001322287361890504912514158451565813001313354200101002010000100201000015758142112002110925391010010102267522802325910243552283310000127051655157261012610000100101575115833157641572215743
2002415700118344180339242351581396952520118101781000010010100001327487378830344912632157781573713138313242200101002010000100201000015683146112002110924811010010102271122719326510243352260310000127061655155811013210000100101577815641158361572115744
2002415801118342178345243841567599262520154101331000010010100001333217367020454912559158131572913043313214200101002010000100201000015731158112002110925771010010102268022621325270244162287410000127251655157361012610000100101573915783157321587015804
2002415777118340182334244781568997802520160101631000010010100001335347391920514912682158071570412972313319200101002010000100201000015820156112002110923131010010102274722762326920244752260510000127051655155091008710000100101572315726156871582515771
2002415814118334183339244071575597462520124101151000010010100001334307334570474912622157271577913089313263200101002010000100201000015771155112002110924211010010102264522827327792243902259210000127151655156511009010000100101582515912159331567015739
2002415887118339181334244181568398112520130101271000010010100001333667365700394912743157471576413094313209200101002010000100201000015707143112002110926461010010102274122798327070246362258110000127151655156741015010000100101578315869158891569715869
2002415710118343181342243831586398192520181101151000010010100001330947386060434912714160021574113083313168200101002010000100201000015724157112002110924131010010102276122618327030244332262210000127051654157351014110000100101584115923157261569215767
2002415709118335178340244851578296982520133101301000010010100001349357413540514912632157761585213042313185200101002010000100201000015720158112002110923461010010102261422734327710245192267310000127361647155741012310000100101579215736157781574615825
2002415699118342176336244401595697912520136101421000010010100001346407423120474912753157611569113103313320200101002010000100201000015702149112002110925061010010102274922724326820244872260410000127051655156361013210000100101588415917157011581315658

Test 3: throughput

Code:

  prfm plil1keep, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5415

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020415397115331171328302445101534894372510100100100001001000050071879804912285015407154161400071407310106200100162001000815421121851110201100992569100100100228442283932820010245302273810000111719016001531000100001001542715390155131541915437
1020415453115329170332002467401538294672510100100100001001000650072043504912385015415154621401661406710103200100162001001615383121971110201100992553100100100228992277533318010245772274710000111718016001535200100001001552115389154511533315415
1020415445115325172333002455801543994732510100100100001001000850071973104912334015465153941399161412310100200100082001000815390122101110201100992594100100100228262291532784010245712268910000111718016001526300100001001546215477153891537915346
1020415425114329170332002510801531194692510100100100001001000350072113104912251015385153701391171405010100200100082001001615433121841110201100992559100100100227672277332720000246292271710000111717016001528500100001001560115395153511537015438
1020415415115328167331002448501535493812510100100100001001000050072253304912363015383153641399661415210100200100242001000815350121431110201100992514100100100227992276432676000244682277610000111717016001537000100001001554315453153441533615407
1020415465115326172332002453801532394782510100100100001001000650072232104912297015329153811397271407810100200100162001000815372121301110201100992570100100100227232283632765000245932275210000111717016001535001100001001539815412153661542015446
1020415498115328170331002454001545894342510100100100001001000050072467804912390015356153371400461416810100200100082001000815385121871110201100992522100100100227902270832700200245892281910000111717016001531100100001001548315373154261538215423
1020415400115327171332002467701542994762510100100100001001000050071888704912384015421153381398171412010100200100162001000815382122391110201100992577100100100227672281332774000245592270610000111718016001529600100001001534515492154461539115384
1020415415116327171327002459601537594742510100100100001001000050072196804912414015398153921399471414210100200100162001000815398122101110201100992519100100100228052283332696000246062266210000111717016001532900100001001544815432154291542715319
1020415384116330177331002450201530394642510100100100001001000250071584204912343015446154151388571420110100200100082001002415292122191110201100992528100100100227762273233387010245462278810000111717016001530500100001001540015415153431545915384

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5574

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100241555511729014929523951155309719251001010100001010000507306004912521155251554714171314271100102010000201000015505154541110021109267010101022379222563230502397122273100000640216221537510000101554415595156181556715522
100241557811729514829424039155979664251001010100001010000507289634912468155191556214186314398100102010000201000015619155021110021109261010101022275223053228502398922271100000640216221544810000101569915632155921555515600
100241558311729814729524037155769630251001010100001010000507316594912463155571547614135314310100102010000201000015522155441110021109270410101022278222883226502408622245100000640216221537510000101559715586155611558815597
100241549611729614829424013156259625251001010100001010000507301844912461155851558314096314226100102010000201012115503155171110021109275010101022378222813224102403022363100000640216221543010000101555915591155531554715542
100241557911629614729323986156249711251001010100001010000507302214912530155871559314142314320100102010000201000015406154881110021109264410101022294223223225102396122270100000640216221543510000101557815513155531551515612
100241557411729314829923947155199653251001010100001010000507305604912501156051561214071314283100102010000201000015534156041110021109270510101022351223243228302400022302100000640216221547610000101554715587155511557215609
100241553011729914629824005155189581251001010100001010000507299524912484155171560214126314317100102010000201000015512154871110021109265110101022317222573225102401422273100000640216221545810000101557015602155171551815616
1002415566117296147298239981556796263610010101000010100005072871449124931560315599141453143261001020100002010000156261548411100211092710101010224952229532347312402522366100000640216221540810000101559115544155591555415504
100241563511729214829524005156769585251001010100001010000507248764912443155271546214057314374100102010000201000015579154271110021109265910101022362223343232002401722275100000640216221542610000101572915589155651553815516
100241547611629514729424003155249533251001010100001010000507252964912372155531550014120314363100102010000201000015563154741110021109265510101022366222523223702399422345100000640216221539910000101567015518155661555715541