Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLDL2KEEP)

Test 1: uops

Code:

  prfm pldl2keep, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100415941231173424511574859251000100010006840711585157013023146010001000100015811582111001241226322833285024232240100073116111498100016431614158316121593
100415991233163224351602891251000100010006891411612159512903146110001000100016041569111001268225522743284024452264100073116111504100016081603161315961599
100416121233153224311591866251000100010006942901588160112903142210001000100016091548111001251228522363272024442240100073116111494100016001598158415971591
100416061132163224551597881251000100010006924911576160413033145710001000100015831589111001233221922533279024212291100073116111493100015981557158016031616
100416121233163324311556859251000100010006793501580159412933147410001000100015801578111001267226922523252024292280100073116111494100015941612159615961611
100415851232183224561576865251000100010007016001612160612863144310001000100015741587111001253226722363258024442261100073116111525100015891608161015971591
100416011231153224401589878251000100010006894511588161213003146610001000100015821590111001260226822583239024422266100073116111486100016141608158816041596
100415991232163124241564895251000100010006942611573159913003147310001000100015871610111001233226222713256024252256100073116111498100015671644161416001575
100416031232163224391541869251000100010007041911557155513043147110001000100015681690111001254226222823254024192265100073116111496100015841598159216171597
100416011132163224551580910251000100010006969201554161612953144710001000100016041588111001267225522613271024252264100073116111487100016131602159515881609

Test 2: throughput

Code:

  prfm pldl2keep, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5600

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2020415645118369193367002471001567296002520196102231000010100100001319957293451364912547155031567613124313174201001020010000102001000015567156112020110099225910010100100228682299032970002475823005100000131011711155331012010000101001557315618155561554415605
2020415648116362197365002458701551096882520211101991000010100100001315517276391284912560154881557412861313092201001020010000102001000015640155112020110099216410010100100227792300032930002480323008100000131011611155481012010000101001553515679155631558815631
2020415681117360199360002474701551596952520211101901000010100100001305087326511394912488156381568312797313029201001020010000102001000015767157112020110099229410010100100229722282233138002471823123100000131011611154651009010000101001564415563157021571215740
2020415671115364195361002460201557496002520184101781000010100100001305377310011324912380156551560912913313077201001020010000102001000015647154112020110099209510010100100230542309533198002478122997100000131011711156081011110000101001556315657156691555615599
2020415585117368195364002480001560195142520181102141000010100100001316627297481314912481156611567712890313164201001020010000102001000015570161112020110099235910010100100231372331632895002487522793100000131011611155101013210000101001559515639155291560315797
2020415570117364198362002489701554396722520190101871000010100100001300497256441404912497156381552512877313054201001020010000102001000015600156112020110099230310010100100229572301433134002481623025100000131011611154391009610000101001562315637155061550315732
2020415557116362194360002477401555095302520211102081000010100100001306727266081364912559157101557112862313052201001020010000102001000015655161112020110099224010010100100231972301433141032468323096100000131011611154401010810000101001555015593156621550815653
2020415714116362196360002464301562397332520217102171000010100100001312847276810304912658157501553612882313070201001020010000102001000015635154112020110099208210010100100229362299733005002467523234100000131011611154501013810000101001559715610155881562415638
2020415669117366191365002465701556295522520181101961000010100100001306087310101444912481155781571012852313131201001020010000102001000015720162112020110099235410010100100230882309933117002469323026100000131011611154171012910000101001571015678157161567015524
2020415563116356193358002464501564996002520235102111000010100100001316237268270344912506155321559513079312926201001020010000102001000015637155112020110099217210010100100228902306032895002469123186100000131111611154431009310000101001548615720155401558115587

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5755

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)191e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200241580311735018334402449411573797872520142101391000010010100001320907368921614912727157671559712955313239200101002010000100201000015834176112002110925091010010102265222807329470244602267210000127021611157791014710000100101560115741158381582815742
200241575111935118434502438301574597672520115101421000010010100001336197372871434912694157081578612997313273200101002010000100201000015748141112002110924181010010102280322770326290244922279510000127011611157451012310000100101573315725158331577715712
200241585011835018334602445001571698162520121101361000010010100001335837379120434912622157381561613105313239200101002010000100201000015806155112002110924151010010102259722793327680245452273810000127011611155721012610000100101575215779158121566715588
200241582611834518134502447701572898372520127101421000010010100001328547302791554912680158091582213065313244200101002010000100201000015729152112002110924531010010102287322677326960243382289010000127011611154791016510000100101569815672158111586415807
200241578511834618334402449701566498402520190101541000010010100001336957314851454912561157161564813123313163200101002010000100201000015738154112002110924171010010102282422631327930244272289110000127011611154761014710000100101584915915157251574115797
200241572311834819035102434801576998292520136101481000010010100001321587297010464912647156781570213100313115200101002010000100201000015729154112002110924241010010102281722910328050244382286510000127011611154951013210000100101570115701158411574615758
200241568011834819034602452401568198632520136101571000010010100001329567354470444912728157921571113003313273200101002010000100201000015656150112002110925261010010102283722807327800244862275710000127011611156251012910000100101578815733158251575615659
200241570411834518234702445401581798852520130101481000010010100001321117365191364912677157171578313095313097200101002010000100201000015655141112002110924771010010102294522624326380245322273310000127011621156051015010000100101571215706158581573115667
200241578011734519034602444201589997242520127101361000010010100001341927353751374912682157651568612992313307200101002010000100201000015778160112002110926121010010102282522811326060244262276610000127011611156781015010000100101577015753156561579215740
200241567911834218534702439201583596622520163101271000010010100001318457324231444912620158341571713003313308200101002010000100201000015768158112002110923471010010102279622555327090243802271510000127011621155671014110000100101579215799157841568715620

Test 3: throughput

Code:

  prfm pldl2keep, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5444

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020415459116331181351246380153909491251010010010000100100005007218871491233101545815366139976141971010420010016200100081540512260111020110099260510010010022706227323280700245992281810000011171701600153340100001001542715428154391554215547
1020415536115332174334244860154759523251010010010000100100055007198351491242301551415862139467141731010020010008200100241538012215111020110099263610010010022741227813270400245512271110000011171701620153570100001001545115526154931548815384
1020415433116332177338244840154679555251010010010000100100005007229601491237401545015451139746141341010020010016200100161549212203111020110099259610010010022982227743285900246072277510000011171701600153450100001001546315560154231555115451
1020415457115339172337245020154179443251010010010000100100005007213241491249601542515308139656140771010420010008200100161544712270111020110099265910010010022760227743269400244912281210000011171701600153360100001001543315493154721550115458
1020415437115333176336246520154529488251010010010000100100005007173531491237701539715501140226141711010020010008200100241547412324111020110099261410010010022694227273282000246582281810000011171901600154580100001001547415352154441551215483
1020415418115334180329245250155369560251010010010000100100035007174261491235701540815324139856140731010020010016200100081543012309111020110099256610010010022770228123277600246172257810000011171701600153120100001001543715566153881555115416
1020415426116334182336245680154469480251010010010000100100005007252321491237401544615454140306141341010020010008200100161534212206111020110099260010010010022716227113267700245762275210000011171701600152950100001001548015414154861537415463
1020415530116331178333245950154629406251010010010000100100005007238081491231701554015442140096141311010020010016200100241543712264111020110099257310010010022732227753289600246102273410000011171701600153120100001001541215483154541548915420
10204154261173371803362456801539694741131010010010000100100005007219051491237701544815389140157141071010920010016200100081534512277111020110099254910010010022733227663277800245442275710000011172222422153130100001001548815395154981548015411
1020415458115337177337245841154779496251010010010000100100005007184341491233401538915379139136141121010020010000200100001533512260111020110099261110010010022930227403269100245222276610000011172222422153440100001001544615384154261549715539

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5433

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)181e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002415432116333169337024463015468948825100101010000101000050720190491239015474154091401831419410010201000020100001542015395111002110925471010102282922678326580024538228151000000640416551530510000101550415423155131541415469
1002415403116341182340024519015397954825100101010000101000050726294491237615382154701406331410710010201000020100001546715565111002110925281010102265022762327490024610227581000000640516551531510000101541615354154651542615468
1002415420116334172337024596015427949325100101010000101000050720082491229215390154741395931419710010201000020100001540515386111002110925931010102282922758328120024594228071000000640616651536310000101551515418154451541015581
1002415525115334180332024523015372946925100631010000101000050728024491235115471154341399531424110010201000020100001553815401111002110926231010102276322768326770024515227701000000640516551539510000101539515469154471547615492
1002415447115339177337024561015408945225100101010000101000050721837491235815467154481410531424410010201000020100001548015524111002110925901010102275822689327200024629226901000000640516551543810000101538915432154381543515403
1002415473116332176336024589015466953725100101010000101000050724541491245015426154711404031421910010201000020100001552315430111002110926151010102268822798327220024609228561000000640516551530710000101542315450154311547815473
1002415411115336174335024532015378947825100101010000101000050722142491222715497154121412831429510010201000020100001535315441111002110926591010102275622709327530024503227541000000640516541530110000101539615407154201547015426
10024155141163341733360245770154979520251001010100001010000507239724912271155291552514053314209100102010000201000015398154101110021109261610101022715227443271600246172280810000006401016551522910000101549215528154691547015414
1002415368115335174338024534015416941254100101010000101000050724726491243715402154021405931420610010201000020100001547015451111002110926751010102269722828327600024514227291000000640516541534510000101543115424155411546715446
1002415395115335177333024471015426957925100101010000101000050723326491251315383154531400931424310010201000020100001546615418111002110925941010102280222751328040324553226871000000640416461542310000101536315465154351545815481