Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLDL1KEEP)

Test 1: uops

Code:

  prfm pldl1keep, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
10041608123517362454151608924251000100010006952111602160213043143310001000100016011592111001239226522633294024452276100073116111491100016171606159816081620
10041627123417332466531600865251000100010007053011604158113423145910001000100015881611111001255228723153292024302263100073116111496100016111625160716181628
10041615123519342439101570901251000100010006909701583160112793147310001000100015961619111001253226423083291024602283100073116111491100016241597158116151641
10041616123217352475111643893251000100010007046511585159513013148110001000100016141571111001239228822793274024642257100073116111510100016171591159215931639
1004162712341833248481595866251000100010006991811576160413333159810001000100016021583111001281228122953283024602308100073116111505100016061598160915991710
10041627123617352468101596868251000100010007068411593161813033146210001000100015921588111001256226322873284024532276100073116111524100016161611157616091599
10041592123419332455521577909251000100010007433611588156613153146810001000100015971610111001265225023083310024452251100073116111501100015991612159016191626
1004159812351834248071565894251000100010006883111597161913103149910001000100015941601111001256229123103299024452294100073116111491100016171606159816091620
10041627123417342486551608891251000100010006895611596161813243145310001000100015911583111001247225722663294024512273100073116111535100016151622159915821620
1004164112321736243471582870251000100010006980311606163112823147910001000100015761569111001255229522723276024672294100073116111498100015991620158415931589

Test 2: throughput

Code:

  prfm pldl1keep, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5525

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)181e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20204156151173811983820251077533154919661252024410211100001010010000129723732094137491243815459155191274631294220343102001000010200100001551915211202011009921971001010010023068233133333000250112335810000131021622155271012010000101001551615536155021541615571
202041570511638519838102522610560155199644252020810217100001010010000129128726458131491246015539154821285131304120100102001000010200100001555015111202011009922061001010010023255233133324500249492343210000131021722154481010210000101001550115509156011561715614
20204155051163892023790249117605155389594252019310223100001010010000130054729289142491248815536154871284431283320100102001000010200100001553115411202011009920711001010010023330233033319000249692322310000131021622154261012610000101001544815518156261552615530
20204155251153821933770248167669153419692252019610211100001010010000131567728441134491243115412155021282931308320100102001000010200100001541915411202011009921491001010010023302232813322400250692309310000131021622154081011110000101001550215587155741563815485
20204156711163831913830253077746155709609252022010211100001010010000130557725052137491240115454155041276231306920100102001000010200100001545514811202011009922021001010010023416233683320300251522320010000131021722153811010810000101001547415487154441539215580
20204154911173831953760249487696155219512252017510217100001010010000130176721805126491295815647155421282131284120100102001000010200100001545815111202011009921631001010010023315233633334700250452319110000131021622153851010210000101001541615616155271552115540
20204155441183782033790249457632154969594252022010181100001010010000130932730009138491237215478155781277831300920100102001000010200100001559215211202011009921591001010010023479233183322100249452326910000131021622153541009910000101001557315475154461542915609
20204154951163811933790252007676155909499252022310181100001010010000129308730742137491243815553155301277731294520100102001000010200100001548514611202011009921841001010010023223232503312400249272314710000131021622155251013210000101001546515622155271552615611
20204155451163741983790249677756154909546252020510238100001010010000130889730679132491255015527155371289731304020100102001000010200100001554213811202011009921481001010010023232232983345300250612311910000131021722152251012010000101001565715511154941554015620
20204155071163831973810248697580155209638252021710214100001010010000130586727977136491243515585154951275431306620100102001000010200100001552715211202011009921101001010010023272234353330100251292315910000131021622153281012310000101001559315532155891542715538

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5621

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2002415473117036019703600024945010260155219614252015710136100001001010000132175724342047491250815665156271282331310520010100201000010020100001544615711200211092327101001010230562294633069002499422921100000127011611154651012010000100101569215694156611563015513
2002415624117036419403620024640010442155289483252013910142100001001010000132196727660044491250016191157111287531307020010100201000010020101131559114621200211092158101001010229312299033126002464223096100000127011611154531015010000100101551115648155941556615611
2002415683117036619203621024849010433155319588252011810121100001001010000131256725594049491247915614156111272231305720010100201000010020100001562415811200211092346101001010231572298933104002470223077100000127011611154001013510000100101555715675155721569115555
2002415584117036519303590024657010329156949577252012710139100001001010000130359729068043491239615649155341287831316220010100201000010020100001559915511200211092094101001010228412289333287002464422934100000127011611154111014710000100101553315733156181552015653
200241554311803632010371002474207261154809703252014210133100001001010000130833724702045491254315624155281293131313020010100201000010020100001555615811200211092337101001010229932319132971002476823066100000127011611155701013510000100101556915675155311555615531
2002415632117036219003600024900010371156259634252014810133100001001010000130553728716042491245915554156091313831318420010100201000010020100001558815611200211092171101001010231362289833244002473923008100000127011611155251014710000100101559915473158681556615542
2002415482116036219903700024835010243156169642252011810166100001001010000131304731478041491246715532155421290731306720010100201000010020100001563414611200211092254101001010231092311233062002474723254100000127011611154831014110000100101562715613156601554615690
2002415578116036619603640024828010305156139571252016610124100001001010000132029731766052491245015574156821296031293320010100201000010020100001556116511200211092247101001010230522310533006002494023074100000127011611154401014410000100101551915592155921557115669
2002415542116036719503670024710010342156279529252012710142100001001010000132259722217050491251515565155871296631315920010100201000010020100001554314611200211092349101001010230502322033121002488122856100000127011611154891011710000100101554215505156181564915633
2002415527117036119203680024715010266156349655252013010151100001001010000131077742026041491256115501154901283731324120010100201000010020100001560832911200211092337101001010230032304732917002487823013100000127011611155251011710000100101568015537156841558915771

Test 3: throughput

Code:

  prfm pldl1keep, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5423

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020415436116330171328246055051536894792510100100100001001000050071513749126141542415472140416140941010020010000200100001529412201111020110099255510010010022755226813290802462522828100001117222242215248100001001540615390153741542015378
1020415379115333166328244844861541995192510100100100001001000050072771749122281532315453139356141091010020010000200100001538312192111020110099261210010010022865228743274502464322848100001117222242215405100001001541315437154231535215414
1020415505116330170333245505081540195282510100100100001001000050072477149122811543315501138916141581010020210119200100001545512170111020110099260110010010022820228193282812468422800100001117222242215318100001001539715362153831546915436
1020415360116328172337245344991543395072510100100100001001000050072005749123591544115518139216140561010020010000200100001539512249111020110099262110010010022731227063281802463022783100001117222242215257100001001548015450153631529015404
1020415395116334166329246295001537394712510100100100001001000050072552649123391542815454139816141791010020010000200100001537412250111020110099253510010010022832228623278102453322761100001117222242215342100001001537915451155041545815392
1020415414115329173328245261181543296022510100100100001001000050072423649124301542815413139736141101010020010000200100001541112155111020110099260110010010022760228183279402469422712100001117222242215259100001001533815455154671553215469
1020415382115331176326245174901545294822510100100100001001000050072401149124091538915376140336140781010020010000200100001534912224111020110099268210010010022840228103284712458522793100001117222242215319100001001537615415154291539015424
1020415518115325173327245964961547094902510100100100001001000050072151449123521537915446141016142141010020010000200100001538012182111020110099262510010010022739227173280612469022917100001117222242215328100001001551115323154861546815425
1020415475115327173327245514891537395752510100100100001001000050072030049122281532315453139356141231010020010000200100001538312192111020110099261210010010022756228323265312464122873100001117222242215292100001001538115439154671537115379
1020415367116329168325246204951545696352510100100100001001000050072124349123091541615435139296141791010020010000200100001535112206111020110099264610010010022855227183280502454322801100001117222242215331100001001542815407153601540215453

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5471

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)09l2 tlb miss data (0b)191e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10024155141200363183356024766125154809411251001010100001010000507251560491238315448153741400431418710010201000020100001539515438111002110924401010102307922969330090248562292610000640416551536410000101549615376154561542715450
10024154941150368195364024733112154279505251001010100001010000507269810491238115322154771407631417610010201000020100001539015471111002110924401010102299123014329560247882301310000640516551538110000101555015423154311541315487
10024154191160365189366024833514154249438251001010100001010000507245591491233215422154671406731423810010201000020100001542815349111002110923771010102286622932330040248582293110000640616651528910000101538315517154641544115457
10024155831190367185364024730483154489452251001010100001010000507236740491237815481154421396231424110010201000020100001542115403111002110923441010102307022901329370247722295110000640516651538710000101535815493155001550615445
10024154661160365194372024809534155049517251001010100001010000507277351491239915404154571402231419110010201000020100001536615448111002110923831010102300422898329110248502290710000640616461534310000101547115468155171547415506
10024153751150368192367024744114154059495251001010100001010000507242300491237515426155191401931424810010201000020100001540815441111002110924941010102303022973329790246872297910000640516551545810000101544115519154621546415473
10024154481150367197371024739119154679509251001010100001010000507216670491234215422154981406531418710010201000020100001539515438111002110924401010102299723087330181247442294210000640516661537410000101540915512154141548115429
100241547311603691923670247400154199518251001010100001010000507216441491234415457154281401431419110010201000020100001548315439111002110923621010102293322993329980247872305910000640516651539810000101546415462154851551515713
10024154251160363193374024790516154019494251001010100001010000507229391491232315489154891405231424510010201000020100001543915436111002110924531010102292622886329801248392303410000640516551535610000101542715469154401550515419
10024154731160371199362024759510154389490251001010100001010000507241141491235915501155011404431420310010201000020100001541315399111002110923811010102293122941329391247662298910000640616651526510000101540115501154901546515517