Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLDL2STRM)

Test 1: uops

Code:

  prfm pldl2strm, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)181e1f3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0ea? ldst retires (ed)f5f6f7f8fd
100416141234173302469016169112510001000100069305115941584130331482100010001000155815691110012482298229232660244922211000731161114750100016231609162416151597
100415751235183402438015858732510001000100070472115871577131631426100010001000157515961110012292297230432800246622781000731161114870100015721594161515691632
100416241232173402479015768802510001000100068999115631597131731467100010001000157315691110012352304228132942244022491000731161115101100016031577156215691621
100416161234173302432015968802510001000100069337115961571127831452100010001000157515441110012442282226032890244822791000731161114790100016171578159915831584
100415891233173402557015528562510001000100071745115961640128331436100010001000158815911110012402273226232820242222781000731161115020100016071616160215641599
100415781234163402444015768862510001000100069791115651617131231450100010001000158715581110012392255228732990245323101000731161115010100016141583160115721596
100415881234173302441016058692510001000100069862115931624127931460100010001000159515731110012512260227032580244222841000731161115080100016131590162515971596
100415661234163402458015998872510001000100069619116031575130931484100010001000159915481110012432262230032970244322771000731161115530100016151577161116111572
100416061233183402429016049012510001000100067982115921621128731421100010001000156916151110012312268228532840244322641000731161114950100015981568159315971628
1004162012321733024778815928822510001000100069284116031614126931484100010001000157115661110012212266228432630248922551000731161114860100016031573159716021611

Test 2: throughput

Code:

  prfm pldl2strm, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5815

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202041583811733318733624201158609858252022610196100001010010000133738744073046491271015694157341312931321720100102001000010200100001567914911202011009925921001010010022690227203286000244862273910000131011611157081009610000101001576815739158691566315804
202041585511933617733424443156349783252021410190100001010010000131373740046035491277415685157051306631318620100102001000010200100001583015711202011009924911001010010022654226263261300244192272810000131011611158041010210000101001583415898159341580715796
202041573411934317834724286157699868252022010226100001010010000133143740675032491269315742157861309931340620100102001000010200100001580114911202011009924041001010010022858226853253300244012267410000131011611156841011110000101001586915892157961572515787
202041585611833417934024384157899835252021110187100001010010000133516738364140491272315868159111317331329320100102001000010200100001573014511202011009924391001010010022691227673285400244652263610000131011611156541011710000101001577615774157171573815726
202041595111833717233924349158849864252023510223100001010010000134282739500137491276315916158911305131319120100102001000010200100001570915711202011009924531001010010022800226743262600244642271810000131014111155591026210000101001573815861158101585615798
202041572911833818133624720157829860252022610181100001010010000133292736483041491279015780157101306131325420100102001000010200100001572415011202011009925881001010010022737228393267100243882265610000131011711157941009910000101001585115936158311573115848
202041566611833817634624539156829860252022010223100001010010000132664739298036491268315819157821304231318520100102001000010200100001589215111202011009924411001010010022738228663257800242192270210000131011611157111009910000101001585415788156521574015722
202041586411833917733924573156519817252021710217100001010010000133577737558032491272615598157431307231325020100102001000010200100001565315011202011009925351001010010022665228733270700243382275910000131011611156171010510000101001587815803157681590315715
202041572311933418033824401158069894252020810196100001010010000134503735745044491268715774157631315031316020100102001000010200100001574515911202011009925461001010010022557225923273000243162275210000131011611156291010810000101001587315777158851571715861
202041579511933818233524379157639718252021410217100001010010000135083739294031491275115682158721306931318020100102001000010200100001579216011202011009923971001010010022678226263280900245142279210000131011611157771009610000101001574315737159591579415707

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5784

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)181e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200241556811935018234602481911585096932520124101181000010010100001344227423704749127041577915857130433133612001010020100001002010000158381761120021109250710100101022787227113278802442922724100000127231643156001015010000100101572915748157601574615755
200241575211834618334402432211577597462520142101451000010010100001344077361054049126441572015788131063132442001010020100001002010000158941521120021109235910100101022697227683278302433422604100000127221523157731015010000100101581215811157251578215876
200241584511834518034702493511586198372520148101421000010010100001335987401814049127161584615817131653131742001010020100001002010000157151521120021109254010100101022671227843281602443522858100000127231623157031014410000100101564815760157841581115811
200241569111834918334302465211581197932520148101241000010010100001335827417824149126851582015747130843134532001010020100001002010000157231441120021109238610100101022635227213257902446922561100000127221623158431012310000100101578415839159231592715806
200241577911834217934602450511569898112520172101511000010010100001339897401855049127231577015690131783131702001010020100001002010000157071531120021109248210100101022916228373275102448722719100000127221623156511012910000100101585615859159741576815796
200241570711934317434702493811572497972520124101451000010010100001325767351033949126801572415931130813132602001010020100001002010000157441561120021109245410100101023136227593268802440422649100000127231632156141014710000100101578215770158141580815766
200241584011734618134502432911572099372520139101151000010010100001342057377124949126001579215935131153132502001010020100001002010000157441761120021109257110100101022891227143278102428722578100000127221623157581012910000100101567115649158111593815795
200241571811834918334802436411579699732520142101541000010010100001341767370843849127101566515638131083132782001010020100001002010000156831521120021109251910100101022722227303275202446922918100000127231623156701012310000100101568215740156731585015729
200241572611834218134702448111582296852520121101331000010010100001337027346155149128191575815831130663133142001010020100001002010000158421521120021109248110100101022657227093274402447922750100000127221633155791012010000100101584915770157851576315860
200241574011834518134502433811580898262520181101511000010010100001325517333695049126751584415695130263133572001010020100001002010000158261491120021109251710100101022755228273269502434223052100000127231543157291023810000100101593015832157951575115724

Test 3: throughput

Code:

  prfm pldl2strm, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5424

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102041545511530815330924345153689416251010010010000100100015007192211491237815399153401403171412210100200100162001001615404121991110201100992532100100100224382243032492024316224641000011171801615295100001001547715441153631539615504
102041534811530815330824693154339414251010010010000100100035007227641491232615375153781403261412210100200100082001000815374122181110201100992519100100100225872245132501024216224141000011171821615356100001001540615382153981547315475
102041545111530915530524256154489512251016710010020100100005007194931491227815372154341396071405110104200100242001000815416122141110201100992548100100100225402254832468024187223931000011171901615265100001001547515459154031528415439
102041543211530415330424277153809409251010010010000100100005007211001491222415468154911393471418910100200100162001000815429122121110201100992469100100100225302249632533024261225671000011171901615344100001001538415489154541538715474
102041543511531015530624267154149432251010010010000100100025007207631491240415400153971404661407510103200100082001000815360122621110201100992497100100100224482239932462024228224191000011171701615270100001001539615447154601547315456
102041544111530815130724268154809450251010010010000100100005007220091491238615415154631391761406610100200100082001000815312122431110201100992517100100100224602247732496024146224711000011171701615311100001001538715372154401544915455
102041547311530715430724224153599442251010010010000100100005007227160491244615395153161400371406210100200100082001000815378122431110201100992585100100100224842241932516024247223901000011171701615396100001001539415440154031547515457
102041547611630815430624217154379477251010010010000100100075007200270491227315480153551395761417710107200100082001000815365121661110201100992496100100100225732249432461024163224891000011171801615274100001001551415408153981539115428
102041545111530715430824161154679549251010010010000100100045007227041491236115463153651395761417110100200100082001000815457121471110201100992550100100100225262251532506024204224821000011171701615380100001001539715418154061542815483
102041545911630715430824273154119431251010010010000100100005007223381491242215422154691395661417210106200100162001001615357122201110201100992455100100100224982249232446024294224871000011171801615333100001001540715449154141553315431

1000 unrolls and 10 iterations

Result (median cycles for code): 1.4032

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100241404710542942142727248139708287251001010100001010000506555500491091814006140111262531278710010201000020100001408114037111002110949101010254552551535520273412547810000640716221396410000101410014087139831401814032
100241401810542642542827283139968240251001010100001010000506555020491094314048140311260731275710010201000020100001400614048111002110965101010255152562235496273352551810000640216221396810000101408114035139751399414060
100241402610542542242427224140108311251001010100001010000506555500491093714042140111260331276510010201000020100001404914042111002110939101010255952549135492272472550310000640216221394410000101404814029140081399314040
100241400810542742042927315140828223251001010100001010000506555020491096514016140351263531274910010201000020100001411714020111002110948101010254712549235559272482548110000640216221395010000101405413998140481404313983
100241403910442842142827354140088250251001010100001010000506569720491094414022140091256031277310010201000020100001406214045111002110966101010254682538635522273322547910000640216221396910000101404314036140181400414027
100241403210542642242627248140198246251001010100001010000506557740491093314070140701264331275710010201000020100001403314070111002110967101010254162555935526272312545910000640216221395310000101401214036139981399114061
100241405910542642142827363140378234251001010100001010000506551780491100014018140441264231279610010201000020100001399814042111002110927101010255312544535552271642548810000640216221396710000101405114019140561403614040
100241404310542742642627356139868212251001010100001010000506538210491097514043140051258431279910010201000020100001400314041111002110948101010255002545835473273012554610000640216221392710000101404114043140111401914035
100241406010542742242727317140208226251001010100001010000506583190491092714030139951265631273910010201000020100001410814026111002110927101010255582551535561273562547810000640216221399710000101403614084140381401714028
100241403110542842442927248139578240251001010100001010000506558170491099914008140181265531276510010201000020100001399614032111002110975101010255462553635565271912541610000640216221394910000101406113993140511399913998