Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLIL2KEEP)

Test 1: uops

Code:

  prfm plil2keep, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)616d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1004161712301530237016038742510001000100069643515911571131431480100010001000158515821110012662240220332250238922111000735116111494100015871622159516151619
1004159212291529238815859152510001000100070156015911605131831461100010001000157215991110012552204221732340238921981000735116111528100016201606161716161609
1004160012291530240216009012510001000100069710015921601127531447100010001000159315981110012652217220832180240421971000735116111495100016171614162216101633
1004158912301530240215998882510001000100069287016001563130331476100010001000158115921110012422210222432070239322251000735116111512100015891627161316201629
1004161912301530239515908752510001000100068885015921589130031435100010001000160215851110012902199223032020237622231000735116111480100015951603158616241622
1004157912301430238616219212510001000100069670016041614129731449100010001000159016001110012602202220432190238722111000735116111506100016191625161915791608
1004160812301529239416119072510001000100069738015911598134131483100010001000159415641110012542205218731980236522191000735116111519100016191621159716151599
1004156812301530238715689082510001000100069570015971616130931459100010001000158615991110012452220223932390238822491000735116111511100016061572159916071591
1004162513301428239816009042510001000100070244015971623130131445100010001000159515831110012352228223732180239622231000735116111493100016271624159816371586
1004158112301528239016298932510001000100071014015921636134431478100010001000159415871110012432230221832160237322071000730116111490100016851651156816161614

Test 2: throughput

Code:

  prfm plil2keep, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5514

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202041550011637921237924913154509517252021710202100001010010000129766728094038491237715452155451276431303320100102001000010200100001553215511202011009920671001010010023405231963307302497423338100000131011611153261010810000101001541915499155571552715494
202041555111738620538424999154879580252021410205100001010010000127937721569035491245915513154591285831296620100102001000010200100001551115511202011009918861001010010023347230863343302491023112100000131011611152911009910000101001549215462154171554915643
202041558411638520638724930156019388252020510211100001010010000130285724019037491234615654156071270431290320100102001000010200100001542515611202011009919591001010010023250233063312102484523298100000131011611155581009310000101001552015582155111549615413
202041548511638721339024841155579472252020510226100001010010000128302722794046491237715679154331269571303020327102001000010200100001545015611202011009921621001010010023474235393326902623123516100000131111611153711011110000101001553415682156131574215658
202041565511737920938625016153859639252023510223100001010010000128016726921026491241715442153891281331295420100102001000010200100001558515411202011009921041001010010023407233823332712496723417100002131011611154141012910000101001538315510155081543415526
202041558411538521038925137155579437252019910196100001010010000128000727248033491236415472152781275631289620100102001000010200100001537415411202011009920081001010010023389231663353302491723436100000131011611153701010510000101001544615549154141551915435
202041548811637921338124842154819415252020510238100001010010000128102728242037491235815525155391281731291620100102001000010200100001538615611202011009919591001010010023277231743351702495923288100000131011611153611009910000101001555815392155431538415680
202041561311639020938124871154869545252018710199100001010010000130396726772135491249815561154091271031292920100102001000010200100001548515611202011009920341001010010023077235653329302508023107100000131011611153571012010000101001548515547154011552615538
202041554111637920638625193155859542252019910214100001010010000129475728439135491242115501155531298631299420100102001000010200100001559415611202011009920431001010010023410231243315102507723172100000131011611154191011110000101001549015494154301554815506
202041551211538019937825275155359598252019010214100001010010000130518725080131491234715451154871289831304320100102001000010200100001545115511202011009920701001010010023557231113326902489523324100000131011611153021010210000101001546015528155411539015673

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5704

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0f1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2002415713117352186355024695156539797252015410151100001001010000132113728981344912599154601565913134313168200101002010000100201011615852146112002110923351010010102287322847329330244662274210000127121522156221011010000100101576115699156991572815754
2002415701117357180354024584156769686252013610136100001001010000132952732799414912620156621565312935313255200101002010000100201000015729153112002110924121010010102294822990327480246322287810000127021622154231015010000100101580215621157111565915815
2002415695118361188356024646157459680252014210139100001001010000131891739968464912545156681560713002313099200101002010000100201000015686147112002110923811010010102289522887327970247532286810000127021623155861011710000100101575215685156991574515802
2002415688117354190359024492156029758252014810136100001001010000131815733114444912547157351583112976313290200101002010000100201000015669151112002110923471010010102283122870327680245902298510000127021622156871016510000100101568915697157151563015716
2002415705118355182354024735156119712252013010163100001001010000132299737444354912634157801569313017313212200101002010000100201000015617153112002110925351010010102296622688328200246692286110000127021622157071013810000100101570015631154831576915781
2002415561118355185356024556156569715252013910121100001001010000131159737630404912624156581573913007313186200101002010000100201000016054158112002110923531010010102290923053329240246942286310000127021622156371012010000100101582115606157741576215595
2002415671118358190353024559156389802252018110154100001001010000134583735037414912655158081563312943313298200101002010000100201000015613176112002110924081010010102285823141329350246902295910000127021622155561013510000100101574915744157041584615844
2002415717117357188353024580156159786252013310121100001001010000132524735649484912545157651575512988313274200101002010000100201000015655153112002110923951010010102285622878329650244862277110000127021622155011013810000100101561015727158151571215702
2002415555117352187353024399157109707252013610145100001001010000131684733832444912603156831565912931313145200101002010000100201000015643147112002110924481010010102290122838328440248692293510000127021622155181012310000100101563215649156671563115942
2002415704119361184356024614155869716252011510112100001001010000133577734017444912564157851569313046313113200101002010000100201000015766160112002110924051010010102293322845327700245992289710000127021623156081017410000100101567315841156001576715674

Test 3: throughput

Code:

  prfm plil2keep, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5506

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfmap dispatch bubble (d6)dde0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020415548116311156309242921551094952510100100100001001000650072797549124751554815437140716142621010020010000200100081544412229111020110099264310010010022552225803257500024374225731000011171716015341100001001538615427155211546715480
1020415576120311155312242851538795102510100100100001001000050072198549123721552415507139966142051010020010016200100081550312244111020110099272710010010022437225453260200024269225121000011171716015426100001001550915683155631541215532
1020415441115310157306243361545895432510100100100001001000450072082849125141550115418141057143801010020010016200100241538212154111020110099271410010010022509225053253700024334225251000011171716015359100001001556015508154871546615622
1020415508116308157309243391552495612510100100100001001000050072288349124361549015486139786141601010020010016200100081543712224111020110099266710010010022513225553244100024324225051000011171816015358100001001546215504154881552115451
1020415462115311159309242931557196242510100100100001001000050072858549124171543315486140357142681010320010016200100161546312185111020110099271810010010022490225403255700024271225641000011171716015437100001001555815476154811539515461
1020415433117312155315242681547995612510100100100001001000050072270449123271554515498140467142221010020010008200100081546712292111020110099259510010010022541225563255500024292225311000011171916015334100001001544715489155431548815477
1020415493116315153311242441546796082510100100100001001000050072429149124821544915521140137141561010020010359200100161542912155111020110099271110010010022555225003248300024288224701000011171716015365100001001546615544155221547015595
1020415579116307157313243341547994822510100100100001001000450072766849124411537815496139846141321010020010008200100081539012173111020110099262010010010022553225923256500024226226021000011171716015369100001001546315474154771539415454
1020415533116311157311242251554396272510100100100001001000050072304049124211544015439141057142831010120010008200100081550912200111020110099262510010010022578225103250100024282226221000011171916015417100001001547715481154621549615430
1020415564115315159308242901545695612510100100100001001000050072429249123531545615389140396141691010120010008200100161550212278111020110099263910010010022510225433254700024309225321000011171916015552100001001549015517155391551915510

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5459

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)181e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10024154791163341733340245261544494632510010101000010100005072324814912396154471546314083314263100102010000201000015445155251110021109262210101022773227613276000245712267710000640316221541510000101549715474154411548615447
10024154411153361673340245871542994252510010101000010100005072102414912427153951551414031314164100102010000201000015550154461110021109259410101022755227653268800245312281310000640216221534210000101549015487155201548015463
10024154021153361713370246511551994902510010101000010100005072142514912315154201548413890314220100102010000201000015469153821110021109259110101022897227173268700245822279710000640216221538310000101543215533154561543515424
10024155381163361763360246441535894202510010101000010100005072063114912420154761548214037314208100102010000201000015391155631110021109261310101022743227453279400245262271810000640216221532710000101548015469154611542515446
10024154401153331683330244581551995332510010101000010100005072637714912361154711538914072314168100102010000201000015323154451110021109259710101022728228053276300245932288210000640216221531510000101549915472155141545715404
10024155191163371703290245281540995062510010101000010100005072028114912340155011534714057314282100102010000201000015404154001110021109257110101022788227653274500245342275610000640216221523810000101541715382155501542715421
10024154271153361693340245271541495362510010101000010100005072242214912384153851547813996314177100102010000201000015467155141110021109260710101022699227733274400246032276510000640216221560810000101548415459154231541715471
10024154641163371743380246071541194452510010101000010100005072361414912334153841550113997314160100102010000201000015452154871110021109259410101022732227423273500244192271510000640216221536010000101546615452154251549815398
10024154321153351753400245481546094942510010101000010100005072690814912405154101546814059314105100102010000201000015442154481110021109253810101022799227953271200245712273110000640216221537710000101539415433153721549415434
10024154821163371743320245881542394392510010101000010100005072528414912397154861555014062314163100102010000201000015423155151110021109260710101022718227423284000245792272610000640216221535310000101544815539154581550515434