Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLDL3KEEP)

Test 1: uops

Code:

  prfm pldl3keep, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1004161212030152923961620876251000100010007004911599161913053142710001000100015651585111001284223522453214023672215100073216221507100016281618161516311671
1004162112030153023661608904251000100010006946211601163413293149010001000100015991621111001266222622363228023962233100073216221516100015831596156916171589
1004161812031153024121600895251000100010007004511599165912973142510001000100015811596111001253221821983244023932229100073216221542100016271623162715931618
1004163112029152925091608926251000100010007022911603162013513144010001000100015831606111001233224622483215023952236100073216221516100016131620161415951595
1004163412029153024021608892251000100010007021611593159513103148410001000100015981595111001275224122403227023902243100073216221516100015751616163216201613
1004160612030153123951634887251000100010007100411594158713053143910001000100015831635111001238221122103208024072229100073216221499100015951609162515821640
1004162412030152923961585897251000100010007093511602163413003146010001000100016221588111001266223321933211023912238100073216221506100015941567165016241618
1004163412030153124051592930251000100010007065611588156813533147810001000100015951593111001259223822103225023912251100073216221494100015841611162315911632
1004163112030153024111578880251000100010006988911580167112973147110001000100015801589111001245222521973224023962228100073216221485100016151620162616241625
1004160812030153023981601896251000100010007000711605163013103146310001000100015941582111001259222822463222024142223100073216221497100016421631161216231624

Test 2: throughput

Code:

  prfm pldl3keep, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5627

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2020415514117371193368247311553095522520205101991000010100100001303877319691374912466157131565512847313061201001020010000102001000015601155112020110099221510010100100230132318932916246362305510000131011711154521010810000101001561515664156651558115608
2020415586117358200366246231565797062520190102051000010100100001301417300981444912627155711567512917313120201001020010000102001000015567155112020110099226010010100100229712275633225246942302510000131011711154611010210000101001563515669155691555615667
2020415653116358192364246841560196962520229102111000010100100001308767288151344912713156721559513047312988201001020010000102001000015597154112020110099217610010100100230472292733106247292299310000131011611154431012610000101001551915655155711573115704
2020415540117371196360248111550696362520211102261000010100100001311397277811424912427155171554412841313039201001020010000102001000015519155112020110099225710010100100230022283932728246922301210000131011611154861008110000101001566215754156781553715627
2020415685117366195369244811555696282520193102231000010100100001302587377391294912668156971554712847312967201001020010000102001000015617154112020110099228610010100100228352290632953245942329010000131011611155171011710000101001555015625156151565515602
2020415590117365193364247491552796922520220102141000010100101041299457315871384912547156331559212779313146201001020010000102001000015632156112020110099215110010100100231372290333025247132318910000131011611154541012910000101001573315632155621563015671
2020415677117358194368248311555496592520199102061000010100100001304327284001334912586157201561613035313059201001020010000102001000015685156112020110099221210010100100229622290933161245712306910000134411711154611011710000101001553615595156861570315567
2020415624117360197361247361554496432520208102171000010100100001312927315631344912411155781559312863313207201001020010000102001000015659155112020110099219310010100100230792305533063247822315210000131011611154781014410000101001565515481156611555815692
2020415686117362189363247931567996502520196102111000010100100001300387239431404912519156491550012878313096201001020010000102001000015553155112020110099223310010100100230422300832979245862290210000131111611154101014110000101001558415499156071564515584
2020415647118374189369247071571697322520199102171000010100100001308567341531284912474155041567612932313001201001020010000102001000015580156112020110099226010010100100230612294033018247932293610000131011611155861012010000101001543815657156311545615630

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5598

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)acbbl1d tlb miss nonspec (c1)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2002415617117379191376248021564897252520154101331000010010100001307127389690484912582155311560412788313068200101002010000100201000015591143112002110922191010010102314823078334112488523345100000127011611154021011410000100101550115533155051563715835
2002415527117372199368248871563095762520163101421000010010100001318177345311314912412157161566812886313055200101002010000100201000015619143112002110921681010010102325123035330162497423106100000127011611154771014410000100101561915635156321572415560
2002415775117375196372250651555697222520127101661000010010100001326657294131334912566156091552512877313156200101002010000100201000015572150112002110921261010010102329523055330282483923385100000127011621154781014410000100101558915568156031566815514
2002415603116372197371247321552796092520175101301000010010100001323267309760484912439156681564112931313027200101002010000100201000015439142112002110922171010010102324823213330262481523327100000127011611155241013210000100101551115565157191561015612
2002415500117372196380247411556796252520157101241000010010100001329307313220474912473155341584512879313232200101002010000100201000015589150112002110922581010010102307623180329902465223227100000127011611154961010810000100101544815558156221550215598
2002415423116372194369250371557995702520142101601000010010100001307037328831394912581155611557212959313094200101002010000100201000015707143112002110923131010010102306923218330872488623167100000127111611154061013810000100101558915639156071544715676
2002415551117368200374247701563396642520136101211000010010100001308547283331354912460156391556212928313063200101002010000100201000015504143112002110922481010010102312423143331482483923037100000127111611154371014110000100101563915660157061572615577
2002415650116375191371247601562594872520142101541000010010100001302677304330344912583155641557512911313058200101002010000100201000015616144112002110921761010010102310723234331232465722989100000127011611155121012610000100101563515525155921556115786
2002415579116372197372248341562195272520136101871000010010100001314137266360504912565155271557712845313054200101002010000100201000015608149112002110922031010010102304223200329692465523143100000127011611154341009610000100101555415642156861553115647
2002415728117379197373248841556896702520151101391000010010100001296847307670434912611154671556012897313289200101002010000100201000015553143112002110922021010010102287323188332672472723032100000127021612155651012610000100101566415684155931571815455

Test 3: throughput

Code:

  prfm pldl3keep, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5423

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d2map dispatch bubble (d6)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10204154481173061563072431515431948325101001001000010010000500724753149123471545415387139206142031010020010008200100081529912203111020110099255210010010022437225323248502425322517100000111717001615366100001001543015514154081543515403
10204154741153061533052435115401947225101001001000010010000500722495149123421537515416140277140891010020010016200100081538312201111020110099246310010010022510224863243902426122582100000111718001615203100001001535315384153861538115404
10204153841153101563042425015392945725101001001000010010000500720573049123601542615337140127141231010020010008200100081539812161111020110099253010010010022443224203247502422522428100000111717001615335100001001543215407153821545715383
10204154401163101543032423415314951125101001001000010010000500722408049122961536115402140036141431010020010016200100161545712161111020110099249310010010022557224763246502428622531100000111717001615273100001001538215439153821548315368
10204153861153071533092427315367946125101001001000010010119500726611049123321546215550139687140531010020010016200100081539412203111020110099258510010010022547224803246802425622510100000111719001615280100001001542815390154411538315368
10204154671163121573082426515336952025101001001000010010001500723195049123221547615370140707141201010020010008200100081533012194111020110099257610010010022547224823246802424722535100000111717001615296100001001542815388154701538315454
10204154671153101543062437315407952425101001001000010010000500723287049123061536915476140127141231010020010008200100081542212166111020110099253010010010022582224773256702422522491100000111717001615242100001001542315407153611541815426
10204154201163081563042428815435946825101001001000010010000500722146049124211545915329139467141701010020010016200100161541112221111020110099256210010010022500225193245802425622499100000111719001615320100001001539115487154081546615482
10204154741163051523072431715420934225101001001000010010000500722494049123051538115349139817141251010720010008200100081533812135111020110099245110010010022414224673240902418822517100000111718001615291100001001540915411154051535815391
10204154141153081523042437515461951425101001001000010010003500718697049123131542715393139526141441010020010008200100081537812190111020110099256210010010022523225593252502423722543100000111737001615188100001001543815379154341543315414

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5505

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)191e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100241543411629414629402392715483957525100101010000101000050722507049124311546515449140393142041001020100002010000154361544611100211092544101010221702221132136002386522201100000640216221537210000101550315471154571551515590
100241548311629114729202392415496951625100101010000101000050726140049123911547615504140423142381001020100002010000154181551111100211092596101010221892221132220002395522249100000640216221532710000101546015533155061546815481
100241549311629414829002391615407955325100101010000101000050722021049124731553215424140003142321001020100002010000153761545411100211092563101010221322217432199002387422192100000640216221537710000101554415454155261549815447
100241556411629014529402391415568957325100101010000101000050722225049124271544815469141333142001001020100002010000154601542811100211092493101010221682224232238002382322236100000640216221534610000101544815486154741544315420
100241541511629514729002393515510958325100101010000101000050727241149124081546415544140843141451001020100002010000154121539711100211092578101010222032221932219002389022218100000640216221536310000101541915441154881546615517
100241546411629414529202394315449955225100101010000101000050724147049124161541715426141683142641001020100002010000154991541311100211092576101010222112229132220002389922163100000640216221535810000101539815489155361552015526
100241553211629014829502389315513947925100101010000101000050726314049123901547015532140233142341001020100002010000154391538211100211092553101010222192221232132002393622093100000640216221534810000101542615404155841545315488
100241541011629014729402389215464952725100101010000101010461732996049123901546415489140763141831001022102262210121154471553421100211092596101010222302234732253202412522363100004640216221538410000101549115467156991550815531
100241573611529514729402391715479955925100101010000101000050725757049123831553715477141773141531001020100002010000154431552411100211092544101010221572218532188002387422267100000640216221537710000101545215552154461545915485
100241551811629214629102395215591955925100101010000101000050722858049123931555615520141053143351001020100002010000155181547711100211092589101010222132220732222002385022218100000640216221531210000101550315466154511552815465