Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PSTL2KEEP)

Test 1: uops

Code:

  prfm pstl2keep, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100416181235183224531612906251000100010006807711546157713193145710001000100015661556111001258226922553268024472282100073116111494100015791616162415951600
100415791233183324541574893251000100010006929411571157512783147710001000100015531579111001246229822743288024312237100073116111505100015811618154715941566
100416071233173324651608881251000100010006922011591158612763144010001000100015781599111001259225822533263024622283100073116111495100016241574160216051670
100415911234183524581599875251000100010007038111599158012843147210001000100015551548111001263225522923285024662272100073116111513100016291618159215721617
100415731234163224741616870251000100010006937711592159613103144610001000100016101587111001219225322643266024472258100073116111507100016001572160215951599
100415931133193424801588879251000100010006908611575161013063146410001000100016401571111001251226522753276124412283100073116111490100015741576162415771593
100415821233193424411647874251000100010006811311582158113043142210001000100015811568111001238226822793245024492271100073116111502100016021578159715721594
100415991232183424401556850251000100010006858411601156712833145410001000100015751596111001236227422553271024412252100073116111490100015731616158416191610
100416181132163424471601876251000100010006898711571161713203146910001000100016031592111001250225922593272024572240100073116111477100015871602159416111628
100415901333163324471602882251000100010006888411564159712763146310001000100015611601111001242231022653246024392248100073116111501100016321629160116021574

Test 2: throughput

Code:

  prfm pstl2keep, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5581

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)181e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202041566611636919237302475515585956025202411022010000101041000012856372717214249124741545415682128077131162011610204100041021610016155581561120201100992180100101001002324623156330430248292301610000111131701600156491008510000101001562615593156551549915521
202041557311637319137502487615488956025202261019310000101041000013060972723614049124511551115678129366129752010810208100081020410020156311541120201100992107100101001002335022896331900246672301310000111131701600155271009310000101001563315582156531550915619
202041559211637619637002481915488967425201931021410000101001000013040672799514449123841546415621128233129602010010200100001020010000155841551120201100992259100101001002303523195329970250912322510000000131111611153591012310000101001546115614156391566215708
202041543911636919837202468315495964825202171016310000101001000013066873245613349123491556615713128843130032010010200100001020010000156621551120201100992124100101001002314123016331460249722316710000000131011611154351012610000101001560315659156201557115691
202041562511637519437302488815524963925202141019910000101001000012868972661913549125721561015507127613130132010010200100001020010000155691541120201100992236100101001002302523163331840247472307310000000131011611153531010210000101001556615618156261557115479
202041565811737519837102485215466973125201811020510000101001000013088972499414049123501559115573129183129982010010200100001020010000155051551120201100992158100101001002314623228332360248232341910000000131011711155291008710000101001557115590156021555215540
202041557611737519337102480615631963825202351020210000101001000013178172613013349124361562715609127473130412010010200100001020010000156061561120201100992126100101001002313423095331750249182301310000000131011611154081012610000101001555215608156351552215641
202041558311737519537002470815571951625202471020810000101001000013067173600013249124191546515537128473131002010010200100001020010000154971561120201100992178100101001002310023004331970249222300510000000131011611154601008410000101001565215545155451553215541
202041564711737519638202476315540949325202171021710000101001000013027172940112649125751553315524127883129872010010200100001020010000155001551120201100992230100101001002324623171331670248192321510000000131011611156321013210000101001545915668156021543115558
202041556711737219137302469915435963925202231020810000101001000012809972749713149124391552315592127867130462010010200100001020010000156231541120201100992153100101001002319023053331400245732309610000000131011611153821009010000101001556015581155321555115661

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5513

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20024155141163872003872498515598957325201301013610000100101000012974972007336491237115433155021286331296620010100201000010020100001558414811200211092055101001010236002338733506251042329910000127111611154531015010000100101552615542154741548515560
20024155911173912003852517615518952225201241013910000100101000012937372414744491230715504155051278831307120010100201000010020100001546414911200211092206101001010232912323433122251082321510000127011621153531011710000100101548315488154971546115611
20024155021153922033872502115575962225201571016010000100101000012971073086340491247915474155531288231293220010100201000010020100001551315211200211092165101001010233872342233271250762311810000127011611155031015310000100101542315534155591548415465
20024155621153882033852525015347953925201751014810000100101000012927973353342491239115452155561294131286820010100201000010020100001549215411200211092057101001010235552308133218250152331710000127011611155071014110000100101548915544153851550515614
20024155911153871953822502715521944625201121013010000100101000013046772531336491246215533156501291931303920010100201000010020100001550214611200211092171101001010232232339333200252112351310000127011641154001014710000100101566715462155781541515586
20024154891153912083872515415407948825201481012110000100101000012963072639541491242915484154591285231296320010100201000010020100001564917511200211092082101001010230722319733628251492320810000127011611154011012610000100101551515538156481545215577
20024155481173892023892495715557949525201541013310000100101000013147872647646491233415460156231282831290620010100201000010020100001553117611200211092096101001010234362356733469248682329710000127011621153521012910000100101551815490154631546115418
20024154571153972033872510415459959525201631017210000100101000012890372446942491242115490153731278031297120010100201000010020100001553614611200211092095101001010232042328733667250592338710000127011611154981013810000100101555915489155141550615607
20024155041163891973972500715618958225201271014810000100101000013008473168955491241115579154861283631303220010100201000010020100001536517811200211092243101001010232442328433422249412326910000127011611153661014110000100101534815556155201547215469
20024153991163852003852492615528953625201181013610000100101000013005272416048491248515558155871284331301120010100201000010020100001555117511200211092078101001010232412343833240251672314310000127011611154091013510000100101562215532154401537015433

Test 3: throughput

Code:

  prfm pstl2keep, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5415

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10204153641163071513062424600155039539251010010010000100100055007227041491230015566153651430271409410220200101272001000015400121761110201100992589100100100224262243832426024206224921000001117180160015415100001001543015407154221536215425
10204153651153071523072423500154179489251014910010000100100005117327711491255015360154731394771428010100200100002001000015417122041110201100992528100100100224612247532566024322224661000021117222242215370100001001534615333153641544515450
10204154141153081543092423101154179431251010010010000100100005007197311491229615450153951388761433910100200100002001000015446121251110201100992571100100100224592247332494024288224831000001117642242215353100001001542815369153841547715398
1020415405116302157309243561761153949436251010010010000100100005007247600491227015339153511398961413210100200100002001000015342122711110201100992491100100100225072249632471024262224471000001117222242215339100001001544515343153621550515436
10204154141153051533102426301154699552251010010010000100100005007236791491273415396154511398061409510100200100002001000015381122441110201100992508100100100225522248732524024329225461000001117222243215309100001001543515382154721540115448
10204153931163071533062426000153709449251010010010000100100005007255010491222415390153641395261416010100200100002001000015409122181110201100992530100100100224052242032473024293224981000001117223242215413100001001545615398154871543315413
10204153771163081543082430801154189488251010010010000100100005007241470491233615400153261396261403310100200100002001000015422121171110201100992527100100100225132250432498024264224671000001117222242215358100001001542315463154361537415461
10204154801153101523082423901154439488251010010010000100100005007198390491228415452153631398061411910100200100002001000015419122231110201100992507100100100224802248432534024239224651000001117222242215337100001001547415404153861538615349
10204154381183081533022425301153909457251010010010000100100005007195190491239115423157091407661409110100200100002001000015343121761110201100992544100100100224202252932519024236225831000001117222242215370100001001546715327154341547515423
10204153841153041563052425901153589483251010010010000100100005007229611491233615350153761406361406510100200100002001000015391122901110201100992574100100100224872243832470124278225841000001117222242215247100001001542315355154561539715386

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5399

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10024154461163231753272458515334947625100101010000101000050722873049122910153771539813979314087100102010000201000015387153841110021109260410101022789227813266002446122690100000640216221535410000101535015430154081537215473
10024153401153351633302459415405948825100101010000101000050718615049123430153231546113968314075100102010000201000015416153381110021109253310101022851228073290902460822736100000640216221525010000101543015572154401538515440
10024154481153231683332451015374942625100101010000101000050720543049123780153781543313930314273100102010000201000015458154781110021109253610101022773227783277202450722726100000640216221545410000101542715389154641546415367
10024153631153311703292459815395947025100101010000101000050727278149122680153921546613963314200100102010000201000015386154201110021109258910101022804227673266602456622803100000640216221527010000101539415447154161538515397
10024154091153301673332456615453938525100101010000101000050719274149123120153731534413933314173100102010000201000015411153671110021109255710101022814226823275302453422700100000640216221531510000101533015444153501538615438
10024154801153251713322461615347946925100101010000101000050724730049122583154581546813873314184100102010000201000015360154291110021109246010101022838227303274102456922787100000640216221532910000101544015344153591528515444
10024153541153301683292462615306953825100101010000101000050722257049122490153681540513979314080100102010000201000015403153601110021109255610101022786226893275702460622751100000640216221534210000101556415351153921537815417
10024154151163321653272450415451953725100101010000101000050723213049123020154051542614046314132100102010000201000015402154651110021109258710101022700227623280402466222742100000640216221528410000101534415359154431541215414
10024154431153301703342460615348946625100101010000101000050718971149123850153431541113982314113100102010000201000015402154191110021109249010101022817227553279012460022702100000640216221531010000101539315454155161546815400
10024153501153261713332454815440942625100101010000101000050720387149123350153961536513972314080100102010000201000015392154431110021109255610101022773227753270702448322701100000640216221538710000101540215366154561544315418