Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLDL1STRM)

Test 1: uops

Code:

  prfm pldl1strm, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)181e3a3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100415761232163102453461608891251000100010006987911568159213173146510001000100016051581111001257224322433217024232262100073116111509100015971597161616281595
100415751230163002438181563904251000100010007036711611159913143142410001000100015731575111001259224222603246024382229100073116111498100016381575157716261587
100415951232173202417151602925251000100010007030801609159713173145210001000100015861597111001227225622623252024342262100073116111509100016001599163116301597
100415941330163202435441551875251000100010006910211616162813113145410001000100015811594111001254224122633262024252253100073116111549100015721581159916131736
10041606123216300243591604900541000100010006943211558157213203143810001000100015771617111001251226822563270024232254100092116111507100015731595162616371640
100415661230153302439191625880251000100010006881901585160613183148210001000100016201594111001262225322463250024192250100073116111508100016271627163216111600
100416301332163002454501615881251000100010006898911577157513233145810001000100015801602111001265223622473248024302256100073116111513100016301632162616281581
100416001231173202425511603896251000100010006792011568160713173143110001000100015951565111001287226222353243024292247100073116111500100016371598161316261631
100415801233153002395191609888251000100010006974501588162713123144710001000100015941605111001245227522943253024532250100073116111505100015831574162415811628
100416391230163212437541582867251000100010006831501572162912973148710001000100015621606111001251224422543268024242235100073116111495100016111558159815901630

Test 2: throughput

Code:

  prfm pldl1strm, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5868

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2020415971118327178327002439701037615743997725202221019910000101081000013447274165304349126451576915789131156133762012510208100081021210004158061591120201100992414100101001002255622635326130002423622473100000111131901600157471012310000101001580015731158721573915895
2020415743120333175331002436101042215746991125202231021110000101041000513323273768603449127911576315862131466132792010410212100201020410012158411541120201100992559100101001002259522633326430002461522451100000111131801600155731009010000101001585415856157861585715671
2020415838120331181336002424201038715857985225202141019910000101001000013222774023904149128211572316062131163133232010010200100001020010000158981581120201100992650100101001002262522547325460002418222528100000000131011711157501013810000101001576115860157921587915788
202041580311932717833200243400724315725995125201901022010000101001000013365374417703849127201569915768131873132092010010200100001020010000158161561120201100992457100101001002264022642326010002417622587100000000131011611157471012910000101001590115975158751589815814
202041588411932517832700242760729315844981025202291019910000101001000013260874779603849127081580815759131273131472010010200100001020010000156691572120201100992522100101001002266522644326280002421622599100000000131011611155991010810000101001573815899157141578615916
202041594311934218132600243780724615775978325202231022610000101001000013369173851803549129101595315822131313133332010010200100001020010000159871572120201100992493100101001002263122662325410002419822548100000000131011611156871009910000101001580915773158421589215818
202041582212032918433100241620728915803975225202201019310000101001000013323774700804049127791582715851129733133342010010200100001020010000157961591120201100992564100101001002256422510324730002427022660100000000131011611156321009910000101001577415784158911592315776
202041591211833017533600242630728515744994725201931020810000101001000013491474310903649127111585115767130603133062010010200100001020010000158901541120201100992640100101001002265822570325500002428722443100000000131011611156561009610000101001593215854158861582815790
202041590511933017833600242990730515812988625201931021710000101001000013389573512503349128051593315916131773132942010010200100001020010000157171581120201100992511100101001002261022558325450002466122647100000000131111611156451012610000101001583915942157801578215939
202041593511932617833300242580730915809989325202321022910000101001000013536174260803549128161582315891130883132062010010200100001020010000157401571120201100992543100101001002256822559326910002426222543100000000131011611157341011110000101001590715718158581583215943

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5651

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200241566911736119435824602105131554996572520145101691000010010100001298187324051514912581157111565412936313205200101002010000100201000015580154112002110922891010010102283622941329130002452423024100000127011611155891014710000100101562215529155961563115650
200241562511835919536324515105071568696152520130101481000010010100001310897344180354912624156481563513034313081200101002010000100201000015544154112002110922121010010102277522810328710002461322834100000127011611155531014710000100101558315720156211567215724
200241559111835718735924564106281556396402520166101541000010010100001311197303071504912560156891563913016313060200101002010000100201000015570157112002110923251010010102292222944329060002477922987100000127011611153951013810000100101575815622157041569115565
200241568511635618335724589105271561197892520163101271000010010100001313087319901514912588156791560212954313225200101002010000100201000015561154112002110922091010010102285022850329380002483222903100000127011611154811015910000100101574515653157141567515670
200241565211735718835724492105001559196962520145101751000010010100001310867337111404912643156101558912887313018200101002010000100201000015631154112002110922231010010102286222904329430002462522993100000127011611155381013810000100101570115625157141572715604
200241570711636218936224606105651568497192520166101361000010010100001307697295501354912640156961566112960313048200101002010000100201000015649154112002110922781010010102299923051328140002472923088100000127111611155701013510000100101564615789155481573015594
200241565112135918735724614105241560897442520145101181000010010100001307617292351504912620155671553412990313057200101002010000100201000015604156112002110923001010010102301323143328320002446623045100000127011613155231013810000100101585615696155721564715697
200241562511735819436224502105531558996652520148101451000010010100001322027309281524912579157441565212968313146200101002010000100201000015589154112002110923591010010102279622906330350002474822865100000127011611154451013210000100101569215484156551572215676
200241548411735819436124918105301570297942520136101181000010010100001307307301431474912499156861560112967313164200101002010000100201000015642155112002110922461010010102282722892329430002473122750100000127011611154771014410000100101561015694155811565515585
200241552611735719036224644106381552398082520139101271000010010100001310777393631424912613156231561112800313093200101002010000100201000015612156112002110923621010010102292122828330150002454022876100000127011611154961014710000100101552815687156311566515690

Test 3: throughput

Code:

  prfm pldl1strm, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5475

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102041552011629014628923879013715519949825101001001000010010000500724405049123760154791546714050714179101002001000820010016154311222611102011009926521001001002214322231322370023892221991000011171901600153840100001001554915504155601563415578
102041549511629014529623865013115477954425101001001000010010000500728831049123780154521552714143714251101032001001620010016154951218911102011009926791001001002210322139322120023904221661000011171801600154540100001001545615470154311551515422
102041546511628914628923904051715500960225101001001000010010000500724818149123420155281545714083614174101032001001620010008155021225611102011009925551001001002219222279321890023874222371000011171701600153460100001001553315458154851548015455
102041547111728814528723852011915471956925101001001000010010000500729277149124220154541550914022714137101002001001620010016154431225011102011009926551001001002223722212321990023924222791000011171701600154110100001001553115503155681547815534
102041546911629214629223901013215486953525101001001000010010000500722922149123530154711548013987714207101002001000820010016154911219811102011009926131001001002214322231321980023892221251000011171801600153960100001001547115605155321547215466
102041556011628614528823908051715442950425101001001000010010002500724233149123060154801549914110614162101002001002420010008154631215311102011009925491001001002227122235321650023934221771000011171801600153120100001001546815488155131549315470
102041549011729114628923919014415565958225101001001000010010001500722400149123810154101542814001714189101002001000820010008154171223811102011009926141001001002210322217321820023884221451000011171701600154090100001001577115587154931551515502
102041542011728914429023875012315478955225101001001000010010000500722481149124640155391551314083614138101002001001620010008154761227211102011009927171001001002216922227321760023958221891000011171901600154050100001001556315400155081550015485
102041546211629314229323910012815540957725101001001000010010006500723980149125090155091550713999714144101002001001620010008155131226611102011009925751001001002220722152322820023896222551000011171701600153630100001001543115533155561545615465
102041543111629214528923882013515520950725101001001000010010002500721626149124480154571545514063614271101022001001620010016154701226911102011009926501001001002225622243322410023960221931000011171701600153530100001001551815465155661546915494

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5481

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)191e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100241548411628914429102395450415448957625100101010000101000050725752491247215491154221400231416410010201000020100001539415487111002110925601010102222822194321601238742213510000640316321534610000101551115487154981558115562
100241555211628914629002392751615477948825100101010000101000050724508491238815479155471408131423410010201000020100001543715407111002110925821010102217422162321430239272217610000640216321525810000101545615505156681545615552
100241545911628414528802391851715492955725100101010000101000050726973491240415473154271406831426610010201000020100001547015447111002110926311010102213722223322270238682222710000640216321539210000101558615509155221549115428
100241551011629314529402402449815486955925100101010000101000050726038491238215530154851402531425710010201000020100001543715438111002110926011010102224222143321600239682221310000640216221538810000101549115846154931553015540
100241552211529114729112383952115528954825100101010000101000050727694491242115471155231404031421610010201000020100001545815379111002110926471010102219222261321840239792216610000640216221532710000101547315536155321548115420
100241542511529214529402384451315477958025100101010000101000050726708491238915479154911410431415410010201000020100001543615393111002110925721010102214222225321930239512205810000640216321532710000101541815521155141546615524
100241552611628614428802397651915538956225100101010000101000050722403491243615461154951401731428610010201000020100001549915454111002110926461010102216722202321840239172216210000640216221537010000101553715436155501545415543
100241547011628914729002386413715460953625100101010000101000050723451491239715485154641404631428310010201000020100001538915434111002110925901010102226722178321920239392220410000640216321540010000101552415556155411549915529
100241542211628914629102397751715474954425100101010000101000050722488491241215449154371407831425210010201000020100001546215483111002110926141010102211422183321710238762218010000640216321535610000101553015509154731549615485
100241557711629114529402398012415444947225100101010000101000050724089491243615511154951400931427910010201000020100001548415421111002110925711010102216222203322180239312214710000640216221539710000101548515482155241555215484