Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PSTL3KEEP)

Test 1: uops

Code:

  prfm pstl3keep, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100416171232173424421600852251000100010006979111571158613163143210001000100015681574111001254226522623298024552305100073116111517100016151601161515761579
100416291232183424421553878251000100010006999911588157413033142910001000100015951570111001235228422423266024522260100073116111503100016191587159516101632
100415851233173424661554887251000100010007032411583159813073145610001000100015831594111001255228122983256024382270100073116111477100015751572161616071572
100415761232183424391585895251000100010007047211597160612793146510001000100015761577111001253226223003281024662266100073116111477100015921591157216141612
100415801232183224341576863251000100010006979111590158013153159710001000100015751569111001253227823013243024332272100073116111496100016161571162716081611
100416241232183524411584869251000100010006835311587162613173145010001000100015891593111001221226522513265024472265100073116111504100015681593156915741618
100415741136153424701584888251000100010006928411578154612873142310001000100015941566111001242226322833271024802269100073116111495100015661594156816211586
100416141233173427141570890251000100010006875011577157913003147210001000100015881601111001247226122723287024752268100073116111470100015691619164115951595
100415621132173524571581911251041100010006878111554156312813143110001000100015761581111001237225022993278024542279100073116111490100016121592159915721598
100415681234163324471599890251000100010006865311604159213043147110001000100015721602111001258227022883289024272286100073116111508100015971610159016001613

Test 2: throughput

Code:

  prfm pstl3keep, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5678

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20204158161203601893480024651159829686252022310211100001010010000132301734570140491293315524158381300031311120100102001000010200100001566015511202011009922791001010010023067227173280702454822910100000131012511157481009010000101001559215655157031563515839
20204158661163581923591125031156519704252019910202100001010010000131478733243143491271115664157681293131316520100102001000010200100001560515511202011009922681001010010023062228153284502476322993100000131011611156181010510000101001556315565156991568215603
20204156781173531913500024723156109764252019010208100001010010000130323738703137491260315585156931298131318820100102001000010200100001568216111202011009922431001010010022829228053275102452822744100000131011611156111012310000101001561915762156161567715753
20204156991163591873480024639156089600252021710250100001010010000132659728464140491250015758156531300731325220100102001000010200100001566416311202011009923511001010010022849229413278902444022893100000131011711156571012310000101001565915612156031571515626
20204156971173491873500024560155899705252022310232100001010010000131140739593135491276115662158261288831323720100103171000010200100001559515411202011009923491001010010022903229483284402464522947100000131011611155061011710000101001571815701156311565815610
20204156491173501913470024446155899705252020510214100001010010000132436729945135491271815548156021296031325020100102001000010200100001565315411202011009922801001010010023009227733291702456022780100000131011611155451012310000101001560815603157461562715766
20204157141183551913550024690156399769252017210187100001010010000132368732605133491264115721155881289231307420100102001000010200100001567515611202011009924131001010010023080228173295702436823105100000131011611154841010510000101001571415768156161567915742
20204157171173571973490024374156009692252021510220100001010010000133128734428138491258315699155931304431319720100102001000010200100001575315511202011009924111001010010022854227993292602453722633100000131011611156031012010000101001570115688155461577015659
20204157811183471943590024468156889787252019910202100001010010000131343738703131491263315629156201301731326120100102001000010200100001560115511202011009924051001010010022724228853284902463622765100000133611611155561010210000101001566415673156181568415551
20204156821183491893540024524158169667252021410193100001010010000132232733944137491270315700157691295631316720100102001000010200100001554816211202011009924371001010010022717228613287402465422969100000131011611155501014110000101001578215573155761563115739

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5479

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200241549911538419939000251670015523951825201271013910000100101000012964371704904149124131561615468128543129352001010020100001002010000155561461120021109208210100101023513234393347202527723522100000127011621154091012910000100101556115425155531539815569
200241549311538620338400252450015476944325201511014510000100101000013047672314003349123271541215429127623130182001010020100001002010000154161541120021109208010100101023401233403328602501623534100000127011611154541010510000100101552015573155821548415378
200241548111538019738900251250015415957525201631015710000100101000013005872728304849123861542915441127543128802001010020100001002010000154851461120021109207110100101023277232743330302512023185100000127111611154041013210000100101548915473155431535415469
200241555211637921338700250150015472947325201511013010000100101000012890772109903849122041533315452128013129962001010020100001002010000153291411120021109218910100101023438232773351402509923326100000127011611153381011710000100101538615497154591567115491
200241545411638120438100252330015458962625201541014210000100101000012886072520104949122941543515424127213130342001010020100001002010000154171511120021109201610100101023315233903334902530123340100002127011611153441014710000100101551015486155211544815490
200241544511638320838100250020015476942325201541015410000100101000012999272089203949125301555915493128813128512001010020100001002010000154751531120021109208810100101023449234743340702532523513100000127011611154771013510000100101552015409154431529315374
200241541211639220838600254070015465949825201631016010000100101000012936273206104149124721534815494128283130422001010020100001002010000154711401120021109206910100101023171232043326802499423285100000127011511155221012310000100101556715416154831548915513
200241547411738620538400249990015535952825201451014810000100101000012837572842005049122841546015506128853128682001010020100001002010000154351441120021109208410100101023413234813343102507823505100000127011611152951012910000100101541515596154981539215432
200241552411538119638100249260115381943425201601013310000100101000012831772244405549123081559315506128013129402001010020100001002010000153441441120021109213910100101023454236623328502526123428100000127011611153561012010000100101552115584155881541615392
200241534811538320238500249280015564950525201301016610000100101000013040872378503649124431544215548127943128832001010020100001014110000156521471120021109203810100101023216234273357302533223524100000127011511154301012310000100101542315427154831544215548

Test 3: throughput

Code:

  prfm pstl3keep, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5378

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)1e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)dde0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020415402115035318634924787015332933125101001001000010010000500719588049123841538115392139517140561010020010016200100161530312117111020110099249210010010022782228803289022467922875100000111718016015286100001001541115347154301550115436
102041536211603471843572473910815402934225101001001000010010002500726118049122591534615378138836141801010020010008200100081531112141111020110099248310010010022864228683298402454922938100000111719016015238100001001534715493154421536115396
1020415334115035218634824600015249938025101001001000010010000500724636049122641541615314139466141471010320010016200100081536312126111020110099246010010010022880228813291702470522874100000111717016015338100001001537315362153491530115411
1020415403114134518234924645015513947125101001001000010010000500720689049122761540415346139436140581010020010008200100081538612252111020110099253810010010022956229323297902468022854100000111719016015368100001001536815394154541544515406
10204154851160346191347246690154379359251010010010000100100005007243370491231115479154001391071405410100200100082001000815369121961110201100992506100100100229282309932865122477022798100000111718016015354100001001537315477154601535015405
1020415469116034718334824728015390934125101001001000010010000500719592049122361546115337138637141081010020010008200100161540412174111020110099254910010010022927228983293102475222904100000111717016015242100001001539115359153401542015464
1020415422115034518034624715015461941325101001001000010010002500717822149123671541915422138926141471010020010024200100161533812142111020110099251010010010023001229363289102463622888100000111718016015288100001001541315372153611532715454
1020415524115034419134424689015402932825101001001000010010001500719501049123041534415340139987140121010020010016200100161537512151111020110099243610010010022792229133300202463822871100000111717016015347100001001546015398153381543115481
1020415428116034718135024745015375946325101001001000010010010500721667049122571530115330139346140991010720010016200100161536312184111020110099251810010010022947229673296902468022935100000111717016015280100001001536615432153611549115385
1020415287115035218835024695015376943925101001001000010010000500722313049123121538015393139886140691010020010008200100161537712084111020110099244210010010022978229113291452474422817100000111718016015302100001001536315419154311537815343

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5568

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002415631116295148296239911560096172510010101000010100005072640004912469155571563614216314313100102010000201000015557155211110021109273910101022198223273231102401322332100000640216221544010000101560815493155851559715567
1002415601117296149296240381555396052510010101000010100005072728714912499155341552914148314366100102010000201000015475154851110021109271210101022318223293225702410022309100000640216221548410000101554915572155131548615522
1002415485117294147294239321561996922510010101006710100005072860014912527155331553414156314290100102010000201000015519155201110021109273110101022214222333232902403822290100000640217221550910000101551415625155801557115541
1002415583116297146295239581557597262510010101000010100005073168004912498156281562214217314226100102010000201000015602155381110021109274310101022274222853233442401022264100000640216221541410000101557415583155821552315579
10024155811162931472992400515608963025100101010000101000050730025149125411553415612141083142571001020100002010000155061555611100211092729101010222542219532239212401322252100000640216221544410000101555215609155771564615570
1002415577116297149296239971554496892510010101000010100005072716614912503154711567814136314378100102010000201000015555154871110021109273910101022277222793228302399022303100000640216221538110000101564015597156161552315544
1002415548116293147297238801562896192510010101000010100005073011004912418156501553814187314294100102010000201000015529155171110021109267610101022288222833227702407322327100000640216221539610000101547715600156271556915612
1002415503117294149295239381565895512510010101000010100005073092214912554155781557014196314317100102010000201000015577155691110021109264910101022237222953228942427922262100000640216221542210000101554115565155131554715577
1002415632116296147295240081549995462510010101000010100005072556404912458156051556414136314272100102010000201000015455154851110021109264010101022324222873228802401522254100000640216221539110000101554715574155371552815586
1002415479116293146295239671558996032510010101000010100005072629614912567155541557314159314209100102010000201000015536155861110021109271110101022288222733227202406722345100000640216221539910000101548615503155931552915502