Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLIL1STRM)

Test 1: uops

Code:

  prfm plil1strm, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100415651234163324421604898251000100010006986101580159112943144210001000100015791601111001232226222643265024432260100073116111502100016211595161015941604
100416191134183224441579884251000100010006985401608161113143144810001000100015831602111001242226322813304024612284100073116111492100015911602165616181596
100416031233173224411560887251000100010006959001572157813043143410001000100016081593111001256227923063299024612262100073116111506100016231570156915881595
100415751234173424421558905251000100010006840011575161112873143110001000100015951558111001221227622733255024552265100073116111491100015451579160015651615
100415711233173324471576869251000100010006944501570160612853145510001000100015601601111001250226523103250024592249100073116111506100015981602162415791624
100416231232173224611593877251000100010007007701597161513043148010001000100016211598111001250225822583292024482253100073116111510100015921592156616181621
100416211134163324891566878251000100010006941801565157713263141110001000100015661566111001247227922643274124672299100073116111483100016111585160915661565
100416271234173324401591852251000100010007043201565162912863143310001000100015751555111001236228122803279024422267100073116111491100016161616160115961580
100416021233183424411578880251000100010006893301575160013103148410001000100015691599111001240229122633282024432309100073116111490100015781574158015871576
100415781234173224881579860251000100010007022701595157012683145110001000100016051573111001232226222663270024712282100073116111477100016001618159016161566

Test 2: throughput

Code:

  prfm plil1strm, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5787

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202041582411713431801344244861156859826252022010224100001010010000132151740200031491273115692157261311831313920100102001000010200100001573815911202011009924691001010010022849227213289300243852270810000131221623157141009610000101001575715828157451576615930
202041577911813501811342244241157129727252021410220100001010010000133359733484133491274415718158881297931318520100102001000010200100001572215411202011009925481001010010022822228093285800244682276110000131221633155781008710000101001578415889158581576715606
202041564211713401781340245341157759676252024410229100001010010000132797736417034491272415784157401306031329120100102001000010200100001578514911202011009925111001010010022945227503277500245672284410000131231633156911009610000101001572715687157581571415741
202041565411913421791337245861157749645252021110226100001010010000132261733753035491274215844157491301731328120100102001000010200100001575815611202011009924821001010010022664227173280700243452275310000131231632155751012010000101001570715764158051584615936
202041586811813391771343244091157199832252022610199100001010010000132793740408027491261815755157651301931315020100102001000010200100001575615011202011009924671001010010022892228263294210243412278410000131231723154951011110000101001580815729158041569115939
202041565111813421761337245891156339836252021410193100001010010000132860730634031491264115724156221307931320720100102001000010200100001575215311202011009925881001010010022938228963280100244832254810000131231632156431009910000101001580615848157181581415818
202041575311813511811346245031157669704252017910193100001010010000133771737951127491271715699157611306331324120100102001000010200100001575214811202011009924761001010010022761228533289420244702287710000131221632156001009910000101001578315800158011583015884
202041571711913431741341245811157529829252020510214100001010010000132781737957140491272515735158351302531309020100102001000010200100001568115511202011009924321001010010022845227843267700248062278710000131231633156511009310000101001572415865158461571315775
202041578211813401871338245301157179932252020510208100001010010000132655741968027491273415816157391302331328120100102001000010200100001563714311202011009924181001010010022843227583280300246052282410000131231632155911011410000101001576015640156531569815714
202041605711713501781343246841157989685252019910214100001010010000131422741294049491274515890157671297931324220100102001000010200100001568714311202011009923851001010010022765227943280800244842273910000131231723158281014710000101001567615875158371591515774

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5797

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0f1e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2002416017119337180342024240015933975325201631014210000100101000013284874485605249126691576415713130693132302001010020100001002010000156561432120021109246210100101022700226263276113242672268510000127011511155741012610000100101576415791159211579915715
2002415806119341177339024367015691975425201481015410000100101000013386473548014349125831570315691130483133102001010020100001002010000157061421120021109253410100101022745226663292810244272281310000127011611156571015010000100101576015932158901553515789
200241582212034218034202451710815723980425201631011810061100101000013483374816204149126951574415744131363131922001010020100001002010000157701441120021109235610100101022609227063273800244612276710000127011611156391013210000100101573715914156881562615726
2002415834119338184340024535015750978025201511013010000100101000013548473889204449128011586215817130953133982001010020100001002010000157491441120021109250410100101022874228263263300243872275310000127011611156091012910000100101576715697159201584215670
2002415753119339182335024375015822982725201601013910000100101000013313974301313949126691583415883131423132872001010020100001002010000156581421120021109244010100101022705225633262700245952265910000127011621156501012010000100101578215803159101581915728
2002415816118344180348024511015831981725201331012710000100101000013363873930304849126451572315724130413132802001010020100001002010000157601431120021109238410100101022833228803282110244392271810000127011611156111013210000100101575815849158301580715935
2002415776120346183332024426015843993025201811015410000100101000013510274495004849126731575715717130533131882001010020100001002010000158211431120021109263710100101022750229673279800246612263910000127011611156041012910000100101579015798158561580315792
2002415726117340180336024523015778977125201421016610000100101000013267573485004349127521570515734130753132462001010020100001002010000159241431120021109266210100101022851227093283800244002270310000127011611156061017110000100101579215766158811581215960
2002415871119340177344024539015826972625201451016610000100101000013566873815904049127991582215782130943131712001010020100001002010000155981451120021109240410100101022681225803270400245672271010000127021611156311014110000100101567815722155531581615763
2002415788120348183341024421015769971425201661015110000100101000013402573714904849127171569415759130443133592001010020100001002010000156521431120021109255010100101022638227913256600244352273210000127011611157561014110000100101588715697157251582215971

Test 3: throughput

Code:

  prfm plil1strm, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.3533

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10204135811013793783792546313516788725101001001000010010000500633502049104751353013553120887122841010020010024200100161348710667111020110099361001001002376223703337380025479237581000011171711613430100001001359313506135981353013483
10204135051023813783782554613485785225101001001000010010007500632531149104721349913508120667122521010020010016200100161355110694111020110099321001001002370223688336752225456237181000011171701613475100001001356613492134571351213533
10204135751023783783792549613515786725101461001000010010000500628335149104731355213456120826122461010020010008200100081350910682111020110099661001001002371423777336000025483237141000011171901613469100001001354213500135721350813484
10204135341023773763782564213447779525101001001000010010004500634395049104701351913532121046122511010020010016200100081354110720111020110099601001001002370523715337130025474238111000011171801613445100001001350413487134561353013525
10204135301013803773802558213528781325101001001000010010000500629139149104791352413487121646122641010420010008200100081353010679111020110099371001001002371123736337050025441237701000011171701613442100001001354513564135951348913515
10204135021013803793802551613538780925101001001000010010000500629360049104211351713531120876122541010720010016200100081352710707111020110099561001001002384723715336960025586237911000011171701613492100001001367613538135421359913528
10204135051013803763802546913520781425101001001000010010000500630876149104131350313534120916122451010020010016200100081354510744111020110099511001001002371923683337140025532236391000011171801613440100001001355013539134731358113531
10204135571013803793792544113453780725101001001000010010000500632360149104451357013548121677122081010020010016200100161354110683111020110099321001001002368623687337610025520236301000011171701613439100001001354913521134701356013537
10204135221013793793792554613509779425101001001000010010008500630627049103991352213549121486122141010020010024200100081357110666111020110099471001001002373323764337279025595236761000011171801613438100001001348513518135481352513620
10204135861013783783792556913486776925101001001000010010000500631330149104421350413533120966122451010020010016200100081354410706111020110099521001001002364723798337350025580238111000011171901613454100001001355213542135641352113563

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5488

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100241554611629414728723900015489952025100101010000101000050724933149124221551115412140303143391001020100002010000154311544011100211092606101010222602209732248023828221881000000640216221532310000101544615501155651540415524
100241553411629014729323958015518954925100101010000101000050725048149124351538815571140583142131001020100002010000155011543111100211092502101010222222223432138023906221881000000640216221537110000101548115504154261555315468
100241548411629014529223937015446955325100101010000101000050729346149124251543715504140593142441001020100002010000154121541111100211092525101010221112219432253123952221751000000640216221538410000101548115467154911549815440
100241538111529014729223925015491951025100101010000101000050727722149124321539815470140723142381001020100002010000154811543311100211092550101010222222219232239023920221651000000640216221550710000101549715398154281547915458
100241554511629114429123958015478958725100101010000101000050728128149123351551015556141223142131001020100002010000154191540211100211092571101010221742213632175023884222171000000640216221533610000101541415484154691548515479
100241553911729014529523893015366952125100101010000101000050724984149123931541615409140613142361001020100002010000154631548411100211092565101010221322217432170023864222091000000640216221534010000101552815454155261546215500
100241542211529014629023881015434956125100101010000101000050724324149124111547815467139463141671001020100002010000154561540811100211092618101010222192218232174023890221411000000640216221539410000101547315462154201547415453
100241548811629414729123957015455960625100101010000101000050723833149123951554515449140053142181001020100002010000154971546811100211092471101010221412223832181023904222311000000640216221533310000101543015475155151548215446
100241544511629414529123837015427954725100101010000101000050726419149124001545615507139823141611001020100002010000155101543711100211092476101010222012222732191023968221641000000640216221538410000101547315504155141556515505
100241545811629214629423862015527955525100101010000101000050726309149123911544015464140333142041001020100002010000154501540511100211092518101010222512219432157023862221911000000640216221537910000101549215479154541544815465