Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLDL3STRM)

Test 1: uops

Code:

  prfm pldl3strm, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100416241231163124141553912251000100010007027611601163612953148910001000100016031597111001269225022433249024362270100073216111517100015861591163116231600
100416181232163124291562880251000100010007047811565158613473148310001000100016141599111001252223522753252024292269100073116111494100016141595158915961578
100416141232163024301615899251000100010007008411601160913123144410001000100016071612111001258225222553225024372275100073116111496100016201621158516131587
100415991232163024241591895251000100010007045911604163212903144310001000100016001569111001270226222513239024132271100073116111505100016191583156216011619
100415841230163224391577885251000100010006945111603161813453149210001000100016121594111001260225322573237024332252100073116111512100016221620160516231567
100416181232163224281605920251000100010006987911761157613193145410001000100015861593111001270227322553260024342239100073116111508100015911612160415811622
100416271232163124391609889251000100010007004211567159613283148110001000100016071629111001264225022643255024402249100073116111524100016331629162915801585
100416471231163224221605858251000100010006995711574160913493147710001000100015851590111001265225822443248024312238100073116111479100016081617161315961624
100416151231163224281586883251000100010007029111594158712953144410001000100016061605111001243222122703263024152238100073116111510100016091623163816391666
100416131232163224231582870251000100010006982511566163012953147510001000100016041595111001257226222733261024692220100073116111508100016241620160315851605

Test 2: throughput

Code:

  prfm pldl3strm, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5670

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2020415625117036719203630024773156439721252020810211100001010010000130503727098414912592157191573612841313217201001020010000102001000015597156112020110099220710010100100230252298233018002477323174100000131021622155891008710000101001559415620155561570615683
2020415718118036319303640024685156439613252022310181100001010010000131183732376384912568156431567712930313106201001020010000102001000015663155112020110099249810010100100229552298132820002459022872100000131121722155861009610000101001556315657157421561615598
2020415796116037019003740024618156139837252021410241100001010010000129240727847424912588157291564612887313046201001020010000102001000015638154112020110099225010010100100228642289732760002455623118100000131021622154321010210000101001575915595155951565715567
2020415639115036719503680024699156229747252024110323100001010010000129578731979334912585157491577912864713219201001020010000102001000015732154112020110099232010010100100229042296932924022464722956100000131021622155401012310000101001571515689156771562115588
2020415706117037219103620024843156839628252017810199100001010010000129698737113414912583156231570412838313187201001020010000102001000015507155112020110099230710010100100232052319232870002486122993100000131021622154931012610000101001573715621156381589015531
2020415780118036819203620024673155929602252020210229100001010010000129467735273434912627156031567212914313211201001020010000102001000015620155112020110099238410010100100230592294433133002474122913100000131021622155431012010000101001555615711155431555415611
2020415669117036919503650024674155529720252039310220100001010010000130206729588434912618157551562812925313142201001020010000102001000015686162112020110099227010010100100229872274333059002464922835100000131022522155491011710000101001563415557158111557815703
2020415775117036519403640024717156369717252022910193100001010010000131490729822304912557156391566112962313204201001020010000102001000015801155112020110099244710010100100228352309632973002473722971100000131021622155271011710000101001566515599156791565015640
2020415669117036919103640024807157229695252023010232100001010010000130259731455394912497157191560212989313188201001020010000102001000015614156112020110099234910010100100228292314233086002491022978100000131021622156811012010000101001555115828156191560915661
2020415660116037018603670024707156819698252022910214100001010010000130566731411324912556156781577212942313105201001020010000102001000015439154112020110099230310010100100230732287132791002479022891100000131021622154691008710000101001556215664156641558915641

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5686

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)191e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20024156841193581853560246231557399342520148102601000010010100001326597325810374912749156881575212996313083200101002010000100201000015602143112002110923051010010102287722983330583245332289410000127031633156711016810000100101566915692158671576315692
200241572511734918635502472615699976325203241014210000100101000013194874153004549126051569115617129623131692001010020100001002010000157571511120021109231710100101022823229323286628245422288410000127041633155151011710000100101560515596156151577715671
20024157191193581843600247401565797572520145101001000010010100001332547294660334912638157781566013117313108200101002010000100201000015571147112002110922731010010102292722929329360247922298210000127131633154811010810000100101569615648156691568415769
20024156571183591823540247491571497592520181101361000010010100001312717360150424912506155531559813058313101200101002010000100201000015748144112002110923191010010102290022953332580246552312710000127131633155461011110000100101569415670156481575815691
20024158521183611853580245071571995662520121101451000010010100001322407364530504912574157601565712911313297200101002010000100201000015557318112002110923581010010102290022753328390246772290610000127131633154591013510000100101570615783157771564815596
20024157041183611863510246051565397922520109101571000010010100001314317392270484912533155851570013050313190200101002010000100201000015637151112002110923861010010102316222850329420246772310310000127031633154711013510000100101563715705157411555615505
20024156711173601873530246241565398002520139101331000010010100001323057373131414912707156041564212919313166200101002010000100201000015703151112002110922321010010102299823036331630248102324610000127131633157191016210000100101569115667158051568115639
20024157801183571823520246741562697422520136101421000010010100001319887316521464912551157021575313095313086200101002010000100201000015615144112002110924431010010102301022793328530245992284210000127031633155601012310000100101563415634157091575415696
200241552511835418835302455415666967425201391014510000100101000013283473438614749125671559315638129583131882001010020100001002010000156431511120021109221010100101023018230793305326248462292710000127031633154921015610000100101575415703156231566415659
20024157711173531833560246741570597272520127101451000010010100001333097363751464912544156971577113076313225200101002010000100201000015597148112002110922991010010102293423036329400246122298410000127131633155491014110000100101567915641157371576915537

Test 3: throughput

Code:

  prfm pldl3strm, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5454

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102041553311633618033824543015385952625101001001000010010000500720074491237715390154671395161417010100200100162001002415443122061110201100992582100100100227602278232748042459922781100001117190160015396100001001548215499154151548215493
102041547311633717133424504015481942325101001001000010010000500725238491229015393154991398871413610211200100082001002415484122481110201100992632100100100227172269832770012449622741100001117222242215235100001001540615404154331538215501
1020415437116333170335245081153999496251010010010000100100005007246024912429154271541414040614064101002001000020010000153871218111102011009926281001001002284722851327650262453522890100001117222242215308100001001544115459154631539915468
1020415554116333175333246110154409502251010010010000100100005007241464912325154381534114028714212101002001000820010008154101221411102011009924741001001002281122796328070272461922757100001117170160015454100001001541115371154411544315413
102041544111633617533324610015418947125101001001000010010000500727995491238215471154671392061411210105200100242001000815456122571110201100992576100100100227182261132738002496922801100001117170160015225100001001551815490155181544615512
102041547211533617633924589015454951925101001001000010010000500724273491237615419154641402361412210100200100162001000815418121491110201100992611100100100227692275332733012454322727100001117170160015351100001001547015491154331532715403
102041548811634418233724682015521951125101001001000010010000500720431491233515486154021402671422710100200100162001000815444122681110201100992625100100100227642281932771002451722765100001117180160015334100001001540115498154151546815488
102041546211533517333624547015395947125101001001000010010000500721800491233715458154821398771423110102200100162001001615443122161110201100992583100100100227172272632652002445322802100001117180160015379100001001543115386154571551615436
1020415461115329166336246040154489558251010010010000100100005007225744912329155061547114079614144101082001001620010016153381224411102011009926331001001002289622670326700472461422852100001117170160015396100001001547315462155071541815446
102041549911533417833624538015425942325101001001000010010003500723944491241715313154201398261424910103200100162001000815399121931110201100992609100100100228182281632721002450122755100001117190160015304100001001541515468154941543115570

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5492

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002415536116294149292239341545895452510010101000010100005072442104912330155251547314023314208100102010000201000015462153621110021109263510101022196222363219802385022122100006402162215363010000101547915455154911543715468
1002415495115293147292238721538895762510010101000010100005072399304912484154841551514084314214100102010000201000015462153631110021109263410101022252221223219902400022184100006402162215350010000101545015443154111549815537
1002415505116289147292238931548294892510010101000010100005072347104912369153841540313979314225100102010000201000015455153921110021109262710101022249221573215102390922244100006402162215357010000101545215451154681553115471
1002415525116295148292237771547795412510010101000010100005072632204912375154671550513981314154100102010000201000015477154451110021109262010101022146221663216332391222157100006402162215304010000101549615512155431545015403
1002415531115293146291238801549895032510010101000010100005072623814912335155231545314111314197100102010000201000015447154781110021109254410101022226222083228422384422189100006402162215409010000101544115484154411549215525
1002415519115291146292239181546195252510010101000010100005072602004912413155001552114153314194100102010000201000015397154331110021109262210101022147222693219212395322243100006402162215396010000101550415513154771547115517
1002415479116292147296239061549094472510010101000010100005072453604912397154901546714096314167100102010000201000015424154281110021109249310101022172222133220102393022291100006402162215314010000101549515426155531550715416
1002415564116291145291238711556894702510010101000010100005072919004912442155231552614148314179100102010000201000015452154591110021109256910101022195221663219302390322177100006402162215448010000101553615526155201546815408
1002415452117296146293238691545295362510010101000010100005072748404912460154861541614135314221100102010000201000015425155411110021109255610101022179221743224802382822216100006402162215376010000101539015524155321550215497
1002415481116293145288238861546595932510010101000010100005072373714912427154191543614086314218100102010000201000015406153841110021109257510101022189221643221902416222188100006402162215323010000101539815505155151549215506