Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLIL2STRM)

Test 1: uops

Code:

  prfm plil2strm, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
10041625123315322429161588525100010001000701531573161813063147110001000100015741607111001265223922643246024312241100073116111510100016061610162016391641
10041619123216302440159990025100010001000709121626161213023144610001000100015911597111001262226722603249024172268100073116111550100015961614157816101580
10041618123117312411160689325100010001000690081593159913493146210001000100015851594111001263225522493239024302268100073116111507100015911623160216391562
10041615123216322578153989925100010001000699221634161412853145910001000100015771588111001248222722333242024522245100073116111501100015851614162116261577
10041600123215322431161790525100010001000698771571158413063149010001000100016041575111001256226222393240024592249100073116111477100015871619159816211616
10041614123217322415159290725100010001000701121595161513033143110001000100015731578111001252225422883276024282272100073116111495100016361629163516291638
10041639123215332444157585825100010001000694541607162513113147110001000100015731589111001241226922563247024412249100073116111477100016041587159516041610
10041620123017322427156689125100010001000687971556160313213143410001000100016071586111001242226122323244024172278100073116111508100016351620157016161578
10041630123216302434159787425100010001000681371610156413103146110001000100016001559111001237226022543231024072244100073116111480100016201639162916201603
10041619123016322445161587525100010001000705141567161813273145910001000100015971591111001267224322283251024222245100073116111521100016221605164016161602

Test 2: throughput

Code:

  prfm plil2strm, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5663

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202041566311736319536800248781569996542520226102351000010100100001307647274843449125311561315575128103130992010010200100001020010000155031481120201100992249100101001002305722989328030245222298610000131011611155741011110000101001568715730156211574115603
202041561811736919436800247651566596302520202102081000010100100001309137354903549124961561215705128543131322010010200100001020010000155701561120201100992322100101001002310022946330530245572274010000131011611155091012310000101001558915709156391565915635
202041582511936119736900249871574096652520202101961000010100100001318977352172849126611556715552128643129572010010200100001020010000156251501120201100992402100101001002308222971331510245932310210000131011611154211007510000101001566115694156501567115633
202041568011736320036300245421557497882520220101901000010100100001307837322063749124361571815588129733131562010010200100001020010000155901551120201100992261100101001002319522948331300247812304610000131011611156141010810000101001560315630156561565615523
202041565011736519036800247171569396332520238102081000010100100001319917291583049125881550615722128723130532010010200100001020010000155801481120201100992337100101001002285523142329020247342317210000131011611154861011110000101001573115721156011580015606
202041557411737019436900247681560297142520211102111000010100100001318817312734349125051582815688128533130742010010200100001020010000155591411120201100992217100101001002296822998329370248582291410000131011711156361013510000101001561715580156531577915739
202041564911736619335700245771567797762520208101991000010100100001309877285183549126731566015734128673131612010010200100001020010000156791581120201100992400100101001002314322906330270245402294610000131011611154541011110000101001555715623156851551815755
202041569811736419136000246041559196262520232102141000010100100001307807370793849123791567115829129533130262010010200100001020010000155551541120201100992293100101001002312822954330040246332291310000131011611155691012910000101001564715624155911568015677
202041574611737019336000250281570696722520208102141000010100100001312607339903049126581553115659128303132032010010200100001020010000156251551120201100992330100101001002285722893330700247382306710000131011611154751012610000101001574415765156421568515753
202041568011636319636500246211556897432520187102251000010100100001318017327973449123931556415625128963130042010010200100001020010000156041581120201100992421100101001002344722752329750247862326010000131011611155121009010000101001567715647155951571915581

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5595

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20024155771173611973642467415483958725201511013010000100101000013020772855004049125251554115510130693129952001010020100001002010000156441501120021109219310100101022986228583302000245452302810000127041611154591011710000100101560715484155901557315591
20024155901173621913632472815602971525201181016610000100101000013327572561004149126231565915675128983131052001010020100001002010000156721481120021109224710100101023185229673301200245592324210000127011611154441015910000100101567415610155411565415700
20024158031173611913652467315632974225201601015710000100101000013158272731004449124701559715522130193130452001010020100001002010000155081491120021109220910100101022876230963295800245062289710000127011511153481010810000100101547115597155791560615655
20024155891173591923602483815587961325201751014210000100101000013279173756104549125041559315742131383130492001010020100001002010000157201511120021109236210100101022763228093341500246772307310000127011611155571013810000100101577615492157531551515599
20024156301173561873662467115667967725201391015710000100101000013273173499703449124781564015606128633130482001010020100001002010000156481501120021109217110100101023070229193320400245532302610000127011611153911014410000100101555715716156431568815607
20024154821163671973592474315460977525201421015710000100101000013276773570904549125131562315471130523130192001010020100001002010000155541501120021109217010100101022894230033300800247642307910000127011611154271016510000100101555215651156521583515563
20024156661173641923572492015710964925201091012710000100101000013194473644604349125101556415620129623129742001010020100001002010000155491491120021109231010100101022920229503301200247602295610000127011632154421011410000100101550715620156371555615636
20024156071163621933572502715595967125201481014210000100101000013345773402304949124191564115635128923130582001010020100001002010000154901511120021109230810100101022911230883306600247712295810000127011611155431011710000100101565115682157781557115532
20024155881173621913662511015523951625201721015410000100101000013214173088803949125541554915702128533130162001010020100001002010000154501511120021109221110100101023041229993298300246982311710000127011511156071013210000100101568515725155041565415649
20024156671163571903582500415570968125201571014210000100101000013207873009215149124371569715604130393129812001010020100001002010000155701521120021109226210100101022905230443294300248372298710000127011622154711012910000100101559915586156771567615538

Test 3: throughput

Code:

  prfm plil2strm, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.5440

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1020415499116353188359247580154529541251010010010000100100005007206124912362154141544813936614266101002001000020010000154171221911102011009925491001001002303422908330050247712293210000011172222422153340100001001548315399154801539715448
1020415378115348186355247331153729466431010010010000100100005007187644912377153741541513868614172101002001000020010000154411229411102011009924741001001002289422934329480246372293210000011172222422153880100001001538015407154051540615399
1020415413116366188351247161154119393251010010010000100100005007196614912390154011548213959614163101002001000020010000154151221411102011009925071001001002297122978329030247732298810000011172222422153190100001001533915463154841533915421
1020415385115352182342247951155059427251010010010000100100005007245724912367154531544914066614172101002001000020010000154011221911102011009924471001001002292623003330520247372292610000011172222422152620100001001538215417154051540615380
1020415413116350187350247671154119393251010010010000100100005007196614912324154661537813990614078101002001000020010000153731219111102011009924411001001002290822987330490247542290810000011172222422153790100001001540015414153991545015462
1020415440116359184351248211154589419251010010010000100100005007252544912458154321546113902614208101002001000020010000154391220811102011009924911001001002301022889328350247912301010000011172222422153460100001001557115437153561542715484
10204154621153511903522471811545095192510100100100001001000050072408249123661541415405139154414057101002001000020010000154201217211102011009924251001001002294522785329960247782302610000011172222422152971100001001542315466154761542915413
1020415442115343185351247091154529541251010010010000100100005007199214912336153281552513984614178101002001000020010000153691216611102011009924431001001002298922917329010248372298910000011172222422153680100001001548715478154911538215468
1020415354115352192352248121153919472251010010010000100100005007230894912406154081555013934614068101002001000020010000153561219311102011009924151001001002294022994329320247862290210000011172222422153190100001001533915463154841533915421
1020415385115352182342247951155059427251010010010000100100005007245724912389154501535513957614109101002001000020010000153811215611102011009924981001001002290122988329770248522300410000011172322422153330100001001550915483154361544515411

1000 unrolls and 10 iterations

Result (median cycles for code): 1.5561

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acbbl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
100241551211629914729624001155349641251001010100001010000507267180491253215627154651409731433610010201000020100001551815563111002110926981010102232222271323281240382233410000640516331539510000101556815564155711553015564
100241564611729514829224001156079525251001010100001010000507278291491246215503155461408931432310010201000020100001556415542111002110926821010102228022271322840239322227510000640516551550110000101553615571155301551115624
100241557911729814829223987154529631251001010100001010000507284970491249515548155531420331438810010201000020100001553015449111002110926671010102231922292323050240072223810000640516551536010000101558015480155821562815533
100241556811730114729424039155109605251001010100001010000507278970491244115731156301416731433310010201000020100001545815544111002110926621010102219722306323190239292240410000640516541540010000101560915475155121563615598
100241557211629514629423999155609642251001010100001010000507286790491255915592156531417531432110010201000020100001545915527111002110926241010102229922283323680240282227110000640516551540610000101552915554155671547115595
100241564311729614729623972156119615251001010100001010000507266710491251615506155211407531439010010201000020100001550915508111002110926551010102226522247323420240252226110000640516451536110000101560915579155571559215555
100241558011729414629323998155629743251001010100001010000507259590491250315550155851415531431310010201000020100001555515464111002110926991010102224222312322720240222228710000640516541541610000101558815502156291556915539
100241560211629514929524000154479533251001010100001010000507249861491256215568157111411831436110010201000020100001552015495111002110927171010102225622264323190239192224210000640516451543610000101560715612156071563415568
100241555211629614929524024155989591251001010100001010000507305830491249815598156131418931426010010201000020100001549415544111002110926561010102234122352322810240242231210000640516451546710000101551315509154461552415526
100241565011629814929824003156029635251001010100001010000507311310491243615577155721407431428010010201000020100001553515529111002110926681010102226722221322290239872226910000640516541544110000101544815579155861543215554