Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLIL3KEEP)

Test 1: uops

Code:

  prfm plil3keep, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100416271233163224491574890251000100010006963211563157913033147710001000100015771601111001259226822703290024762276100073116111489100016221624161115881588
100415871235183424611580850251000100010006861711574161413173150210001000100016051585111001260224822603270024122267100073116111499100016091604156516121581
100416261233183624651598898251000100010006923011599161213073140110001000100015721576111001257229022913284024512283100073116111489100015931623161316141597
100416051234183524181592928251000100010006890311594164313263141210001000100015771545111001238230822893277024372257100073116111500100016071607161916051627
100416231235153324721619903251000100010006917711593158212773143310001000100015951558111001241227022503276024472281100073116111498100016081629157615971595
100416191235173624391558856251000100010007010411597160812963148710001000100016111617111001249229322863275024772250100073116111499100015851597161615911605
100416201234163524541621881251000100010006935311585161913053145710001000100016001558111001261228922963281024682264100073116111456100015891594163515831590
100415541236173424471581859251000100010006868211605161612783145310001000100016061591111001241227922503270024412256100073116111517100016131588158615861616
100415931233183324881583915251000100010006997211600162112843149310001000100015951596111001235228122863260024602282100073116111472100016051620158615881604
100415861134173424731589877251000100010006881911554161612853143710001000100015991597111001234227622723279024522247100073116111522100016221613159915811584

Test 2: throughput

Code:

  prfm plil3keep, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5512

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2020415406117381198385250720154779528252022610214100001010010000129006722173148491244815571154621285231295820100102001000010200100001554714911202011009920561001010010023161232673326602487823380100000131021622154541013210000101001566015658154591559415453
2020415475117380202384250480157309731462019910324100001010010000129466725117131491261015604156081283831323720345103231000010326101121553015211202011009920931001010010023451233543345802515223344100000131021622153931011110000101001541915528155291549315552
2020415484115384196385251550155189645252016310205100001010010000130441726014137491238615345154831269531300620100102001000010200100001544314911202011009920191001010010023182234713333802510323246100000131021622154341013210000101001543915444154511555715634
2020415580117384203389250500154149551252019010202100001010010000130103731144142491238515493154361284431309420100102001000010200100001545315411202011009921151001010010023125232563332802493723546100000131121622154191012010000101001549215511153861534815554
2020415509117385201379249960153769584252020810205100001010010000127978718158130491248515551154841298031285020100102001000010200100001543715111202011009921241001010010023367233733322602504823353100000131021722154371009610000101001552015564155731557015503
2020415470116384200385249980155669599252021710208100001010010000129947728969139491255315597155141276031305420100102001000010200100001541714211202011009921261001010010023436233033351102524123124100000131021621154481012310000101001558215547156071543615466
2020415412118382204386248260155289519252019310190100001010010000129948732058137491252515414155781278431291020100102001000010200100001550814711202011009920581001010010023335234013324702505923236100000131021622153961010810000101001543715561154081557015468
2020415531116385205388250480154289604252017510208100001010010000129029726832140491239815295154701276331297120100102001000010200100001549815311202011009920341001010010023389233013324002498323394100000131021622153771010510000101001539715521154621552715488
2020415651116387198387249840154649412252021110226100001010010000131135724540131491252715441155321290231309020100102001000010200100001546714911202011009920381001010010022964232423338102499023565100000131021622153401010810000101001540415551154611536915431
2020415433116389200385250380154969493252020210226100001010010000130919723493137491250715495155191271731298520100102001000010200100001544615411202011009920871001010010023237231483301902492123389100000131021622153261013210000101001560315436156241537015501

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5553

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)191e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20024154201173821923800249171546196622520154101271000010010100001310397220780444912473156131560612859313022200101002010000100201000015630150112002110920631010010102318823181332190248482308510000127111611154921011410000100101551915608156591557415544
20024156491173881943770249131559297652520115101151000010010100001317587305990424912352155531548212889313082200101002010000100201000015563151112002110920981010010102312323285331950250832318910000127011611154021012910000100101569915625156321565415656
20024154531163771973820249121551794682520139101421000010010100001300467291380504912465155601552712936313072200101002010000100201000015434150112002110922091010010102320222992330570250122336510000127011611154061013210000100101562715464153681569015578
20024155771173771923780249811547396052520130101361000010010100001309347240120504912555155311551612908313033200101002010000100201000015579151212002110920641010010102332023338330580251532317810000127011611155521012010000100101558215608156001554015565
20024156791173812023800250071552296312520160101571000010010100001312997254110424912503155521564312824312986200101002010000100201000015609149112002110920951010010102309623016332920247352311110000127011611154101011710000100101564015669154791558915561
20024155721153731943840249881558396952520118101541000010010100001303547287640424912433154501559912882313102200101002010000100201000015517150112002110921991010010102318023040333790247432334210000127011611153331009310000100101559015540156201549415520
20024156001163742013790249781548595252520127101511000010010100001308397317460454912499154191548212944313037200101002010000100201000015627152112002110920591010010102315723245332220248322324210000127011611152621012310000100101549415628155281544815581
20024155531163781923780247081553396622520139101661000010010100001302807221591394912425155261566312828313058200101002010000100201000015475148112002110920371010010102314923173332930250642294110000127011611153901011110000100101560815603157261551915744
20024156011173781923790246801557795962520157101451000010010100001310247240051434912700155151561312917312985200101002010000100201000015438147112002110921161010010102323123160331870248132304910000127011611154101016510000100101540715481155481555015587
20024156821173821983790249231544993962520130101601000010010100001300817263031354912536155481551112796313119200101002010000100201000015406152112002110920151010010102301323142332030248642320510000127011611154721017110000100101564815554157171554015658

Test 3: throughput

Code:

  prfm plil3keep, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5486

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
102041546811629314629400238530154949583251010010010000100100095007253884912393155201555114079191469810100200100082001001615455122241110201100992640100100100222242222832140223810222161000011171701600154290100001001554015466154161545615490
10204154721162931502900023876015362952925101001001000010010000500723395491243015468155451404061428510100200100162001001615455122591110201100992561100100100222572217932156023867222311000011171901600153480100001001550115501155561554415428
10204154511162941472900023934015497955625101001001000010010000500728274491247615484154681406571425310100200100082001000815481122131110201100992537100100100221822220132148023910222731000011171901600153200100001001554815539154531540515501
10204154781152931462900023961015445947225101001001000010010000500727174491247115385155751408361419310102200100082001000815443121911110201100992589100100100221652220132194023890221571000011171901600153550100001001550715522154421544515432
10204153771162901462920023842015515958625101001001000010010000500724572491241715526155461404861421310106200100162001001615362122521110201100992611100100100222302214332223023875222991000011171801600153030100001001545015553154651544115466
10204155081162901462920023862015416955525101001001000010010006500723115491241715420155461405571413010102200100082001002415500122571110201100992582100100100221902215932228023875222121000011171701600154010100001001550515498154881553315469
10204154651162931442910023921015528960825101001001000010010000500727595491237415540154371402461416610107200100082001001615488122611110201100992588100100100221602218932209023878221721000011171701600153970100001001540015475155041546115473
10204154861162931462930023918015458960025101001001000010010000500724116491241515522154901401061421910100200100162001001615423157781110201100992576100100100221752225832262023944222751000011171701600153730100001001552515568155491548115408
10204155331162941452900023860015486953125101001001000010010005500721725491250215469155011403071423010100200100082001000815467122291110201100992626100100100221862226332268023894222091000011171701600153630100001001548615630157321548215517
10204155211162911482920023842015439972925101001001000010010010500727274491246315519154771402861425010100200100162001000815431122341110201100992573100100100222312214532192023916221941000011171801600154440100001001554015530155461548215473

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5486

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)191e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100241551611731415831502431715494954225100101010000101000050723643049124591551015447139753141761001020100002010000155001547511100211092735101010225462242432536242452246110000640316331533710000101547015488155321548415505
100241552911530915831102421715489954325100101010000101000050723876049123151552615505140603141901001020100002010000154421545111100211092659101010226072254932442243592252910000640316331535610000101550315465154211543615398
100241544511531015731312430515497959725100101010000101000050722370049123471549915452141033142531001020100002010000154891536311100211092724101010224932250332483242782254310000640316331533310000101551015442154961544915539
100241546711630915631102434015482941625100101010000101000050724218149123101545315437140413141091001020100002010000154921556611100211092626101010225252252632514242952251210000640316331537210000101556515550153891542515509
100241548311631215631402429115415956425100101010000101000050728619049123541552415461141223141331001020100002010000154441546811100211092618101010225262256732624243132245410000640316331534510000101542115541154081545615441
100241546911630915931202433815444958525100101010000101000050725590049123581544915513140563142851001020100002010000154141546911100211092663101010224472258732495243162252710000640316331532910000101537115580155591551615476
100241583211730816031102431715471953625100101010000101000050725915149123831547215493141193141981001020100002010000154811549011100211092698101010225122255332538242932246110000640316331544010000101551815483154991560115471
100241551611631315630922425215500959425100101010000101000050722201149123871546415485140203141161001020100002010000154181534011100211092707101010224702259732538242832257510000640316431540410000101544515488154551550715414
100241542811531015631002427915541956625100101010000101000050725826149124411551415438140993142861001020100002010000154621542911100211092724101010225692266532550243212257510000640316331536910000101546115542155551550515527
100241543711631215931902430015426957725100101010000101000050721908049123601552015510140993142321001020100002010000154461546111100211092668101010225742247632529242952254510000640316331532710000101548815520155271548415465