Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLIL3STRM)

Test 1: uops

Code:

  prfm plil3strm, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
10041469114335452703143677025100010001000635201468149111703133810001000100014421456111001424962560356127712553100073116111386100014451467146614801447
10041487114336442711144576725100010001000630911469145811753133810001000100014541452111001525202517358527662522100073116111374100014871472148014761460
100414731145384427471429747251000100010006346414611452118031312100010001000144414531110011026072535356027422537100073116111391100014601468144814841468
100414511145354327541434777251000100010006320314561474117731302100010001000145714731110012225472587354326972561100073116111381100014721487147114691472
100414551145344427491434787251000100010006410614571468117931311100010001000146714601110011225362526356827082521100073116111367100014761478147714881456
10041483114436442758144378125100010001000625561474145711623132210001000100014661468111001025202561358527402529100073116111382100014701469148614881458
10041442114434442753146177325100010001000634761447147711833131510001000100014711484111001525332564359527552515100073116111376100014601465146114521472
10041472114537452721147177425100010001000632771478147211793132710001000100014721454111001025532541358627532565100073116111392100014561476145314551483
10041465114435442734147376625100010001000632981464146911643135910001000100014421461111001425712564353927342585100073116111378100014561459146214521456
10041458114537452756143177725100010001000634871473145111863134610001000100014701468111001625432595356527212556100073116111375100014881454145614651468

Test 2: throughput

Code:

  prfm plil3strm, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5727

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2020415697119350176352245051573297872520175102141000010104100011333827439341314912621015619158371301661314720100102081001210208100041581414411202011009923911001010010022904228303275602439722803100000111131801600156981009710000101001589815865157491571815727
2020415667118354186349245271561598492520187102241000010108100011322347384351344912740015769156601294231325120100102001000010200100001568214011202011009923331001010010022672228963310002440523180100000000131011711156431008710000101001582215706156911572415770
2020415749118350182351244781576799742520208101751000010100100001340317423260344912703015784158581303631315720100102001000010200100001566913811202011009923321001010010022871227393288302442922805100000000131011611155961011110000101001567315807156201558215777
2020415589117349186352243741568698082520220101811000010100100001319747379361334912744015749157271309531302820100102001000010200100001562314711202011009924961001010010022859228443285802449322828100000000131011711156081010510000101001595515829158951579515678
2020415752119353185356244811575497492520229101721000010100100001327097402261314912624015763157101307731309620100102001000010200100001560514811202011009923551001010010022921229673288702448223126100000000131011611156631010810000101001568615907157421566715843
2020415681118352184354243951569796822520226102051000010100100001323537372261384912774015716158031294431320020100102001000010200100001563713811202011009923891001010010022783229583283802469222873100000000131011611155711011110000101001588415807156681573015803
2020415600117351191354245081570195752520226102111000010100100001328277365331354912563015588156251308831312320100102001000010200100001560713911202011009924501001010010022895228493277302458322753100000000131011711157091012910000101001559715783157931578415714
2020415730118352186352246211573998092520229102111000010100100001330527319051274912613015708158311305831311020100102001000010200100001573014711202011009924201001010010022900226973287202445322892100000000131011611157031013210000101001571915813157281564815778
2020415637117350187348244891569397362520221101961000010100100001328027388041324912557015811157991298731315120100102001000010200100001566814511202011009924401001010010022892227313279302465722674100000000131011611155511009010000101001567615661157151558415728
2020415734118348189351245241583898222520232101841000010100100001317547370071394912662015752158011290231308520100102001000010200100001575115011202011009924491001010010022823227343290102450022783100000000131011611155311015610000101001576115661157361576515775

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5601

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20024156011163671983702481615594966425201241016910000100101000013090672977803949126321555715701129703130362001010020100001002010000156031451120021109205410100101023344231793309402481322954100000127011611152851013810000100101570015730155521566315699
20024156511173691893682505015682974225201301014210000100101000013155372590805349125601554815656129193131292001010020100001002010000156071431120021109230710100101022957232063322002485723060100000127011611155281013810000100101551115662155111605815609
20024155841153621963752470315605970525201391014210000100101000013193273788204649125381549015687128723130012001010020100001002010000155361531120021109221810100101023181230513311102481223130100000127011611155141014410000100101558715644156441559015586
20024155011173721823722496615630963725201601012410000100101000013170472659015049124671557415638127973132732001010020100001002010000156751431120021109229710100101023095231293330922490123302100000127011611154411015010000100101576215513155201563515623
20024156421173681933662493115609946525201331012710000100101000013108873436204449124411552615596130013129342001010020100001002010000156951551120021109219910100101023167233113301902482423070100000127011611153641013210000100101555916229156241537215650
20024155411163641813702468815585975625201361010610000100101000013007473012114049125831555215518128963130242001010020100001002010000154791501120021109217110100101023069233493305802479922812100000127011611154641013210000100101566415593155831563015541
20024156261183701913752490415602962525201661017510000100101000013168072588604249125361544315646129403130802001010020100001002010000154971511120021109226510100101023349233783315602469523112100000127011611155671011110000100101552115583154511553315661
20024155991153731913692493015634962625201511015710000100101000013084472911003149125271562915571128693130472001010020100001002010000154631541120021109223310100101023215231273321902493223178100000127011611154601012910000100101567715761155591558615540
20024157581163711873642482615702964125201031016010000100101000013662373047014249124891574815472129113130982001010020100001002010000156411411120021109217910100101023279233123312912458523174100000127011611155971011710000100101554315614156721565815535
20024155331173761893782487415584969525201751013310000100101000013028372963104049125821560615506128713130242001010020100001002010000155801521120021109217010100101023098230223339902487923186100000127111611154431014410000100101561915630156751556215641

Test 3: throughput

Code:

  prfm plil3strm, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5560

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102041564611612961501294238870154299656251010010010000100100005007273061491252115639154281417361427210109200100082001001615491121871110201100992660100100100222982228632304002399822299100001117171161515374100001001560915581155441554215532
102041552911612981481298239470156029650251010010010000100100005007283740491255415565155221414461426710107200100082001000815601156721110201100992692100100100222872229132296002405622292100001117234244415386100001001558515649155271560015618
102041548111712991481294239942154279593251010010010000100100005007230821491245715571156311416061420010100200100002001000015544122711110201100992718100100100222992223832306002399522284100001117234244415506100001001545615536155771562715574
102041558611612921461293239672155799610251010010010000100100005007277161491246315555155821411861425710100200100002001000015579123301110201100992632100100100224112219532331002393722303100001117234244415436100001001554715551155691553015626
102041556511612961471299239652155719584251010010010000100100005007287591491251515540156181414061419610100200100002001000015493122261110201100992697100100100223102227232300012401022328100001117234244415437100001001547815485155281560515613
102041556011712961471295240031154919710251010010010000100100005007265840491247515520155951418861423710100200100002001000015530122971110201100992673100100100223592230032349042396622277100001117244245415403100001001558415491155891617615590
102041555711612951490294240172155799711251010010010000100100005007297440491253415552155801423361429510100200100002001000015489123161110201100992642100100100222832220432264002407022285100001117234244415499100001001559015580155061557215495
102041550211612941481295240581155669604251010010010000100100005007308770491249015588155081412061429510100200100002001000015529121801110201100992726100100100223272226232307002397622249100001117274244815402100001001567415593155901553615487
102041556211612961471295239811156679569251010010010000100100005007267020491249915576155651414061427410100200100002001000015529122561110201100992730100100100223252224932244002397222270100001117234244415417100001001559315633156291558515581
102041559611602961480296239951155469664251010010010000100100005007314980491255115541155631404761426410100200100002001000015549122531110201100992709100100100222802231232289062398222312100001117224244415497100001001561815660157371561015559

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5458

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1002415539116334175333244691543695222510010101000010100005072440314912351153711544414080314231100102010000201000015380154091110021109253910101022875228323282700245512271710000640216221535110000101546915502154261550115389
1002415440116337172335245661545496462510010101000010100005072402014912390155011544914034314211100102010000201000015490154991110021109261110101022849226863276600244962273510000640216221538010000101547615484155081549515408
1002415362115335167334245491547994752510010101000010100005072626614912335154771543413960314160100102010000201000015366154551110021109256110101022756227593276000245222277810000640216221536910000101549115533155801547515401
1002415474115334179344245821545994372510010101000010100005072515404912410154041544714087314194100102010000201000015376154511110021109254610101022742227283280300245802282210000640216221535810000101547315458154821549915382
1002415514117341177342244991542995072510010101000010100005072272514912394154311544814035314228100102010000201000015440154671110021109263810101022798227713269900246052279810000640216221543110000101546715496154751540615454
1002415474115329172339245301544395042510010101000010100005072254904912400153921544014057314143100102010000201000015462155251110021109258410101022771227683279000245152269410000640216221535510000101544215462154911547215471
1002415475116332173333244951545494662510010101000010100005072636004912366154351554714021314087100102010000201000015471153971110021109261610101022675228393266700246032270310000640216221529610000101542515444155431549915421
1002415375116337179340245721537394862510010101000010100005072526704912445154001546914011314177100102010000201000015447155171110021109257210101022816227593284010257652270110000640216221536010000101541115415154411532615431
1002415449117335177335245411537495032510010101000010100005072534704912386154131547414020314149100102010000201000015457154611110021109261810101022767226883271010246452279010000640216221526210000101552615476153811542115424
1002415411116341167335245101539595272510010101000010100005072342204912402154631552113902314164100102010000201000015398155201110021109255110101022768227843276700246362277110000640216221532610000101548815435154471536915477