Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STXP (64-bit)

Test 1: uops

Code:

  stxp w0, x1, x2, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 1.000

Issues: 2.963

Integer unit issues: 0.000

Load/store unit issues: 2.962

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f223a3f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f606164696a6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
71005454103400690062000211207445379306122954295729521479012191000494232745394454024848108412949294988624540045387100117100110001000294822943229401132936100001000002217460434933206581584522230444478821441244529220388202531848010004540045400454094541445405
71004453893401661168100210204945370306502964295629581481552191000494232345373453814845108512956295088714539245392100117100110001000294012943229530132945100011000122219960215144209461284577232244468791381324528220384202171846310004538145392453954539145392
71004453923390630064000300203345390306232960296829451476651191000494231145389454094839108472947295089074540445401100117100110001000295832936229480162957100011000122218660785151208971194579231244558781401344532120390202101846210004540845387453794539545421
71004453993401571056100210209845383305852959296829501472632191000494231445395453964830108422954294988534541545409100117100110001000295902954029420002941100001000002216660375134208731214521231744538861251244528620370202251847510004539545390453934537745381
7100445402340063006100000020624537930562295829612955148350201000494231145390453824840108432968295588684537945392100117100110001000293902962029340002954100001000002219660195110209961194588230544448821381364524820374201991846410004541145392453964541845385
7100445375340069006800000020344537230526295629592966147570219100049423224540945403483810844295029518865453994543210011710011000100029410294702960104852945100001000002219560335134209041204548229944428771371354531820378202021845410004538545381455804539345446
71004453873400670056021000208445374305632950296529541480542191000494240645410454064832108512953296488504546245385100217100110001000294802955029440002949100001000002219660455146209031184501232544438841391164530220381202301847710004538945393454014539145392
71004453873400610062000000206045364305812962296129631481512191000494231345395453954860108582960295788954538245387100117100110001000295602952029390002949100001000002221660435140210151244615231744488771461404531020385201841847010004538845403453854538245413
71004454173401641159100210205545384305812943294129441475512191000494230745405453944832108282967295088984556145384100117100110001000296532950229500232958100011000162220760325118208571264430231444408811381314530620382202321845410004538645386453874537945383
710044539034005900750002400207345368305792959296229651476512191000494231345394453834849108562971295788804539445390100117100110001000294522948029450162954100011000112220759895137208761194506230644438811381274527820376202161845110004538945392453944539745390

Test 2: throughput

Code:

  stxp w0, x1, x2, [x6]
  add x6, x6, 16

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.9264

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f202223293a3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202143892242917200300002191672228102344121124389121215946928055852429893925960078881669750612745918117190251100000493860620389264389140834765920418118114863027944957786837945152336043389137441000112020110099100100001010010000100232159153912416010188323029677225141122025823217410000714601402094840711097865783890832914321000010100389143389141389266389265389264
2020438914129152020000022275722171023281101243892242181469279558522298939259591788816697498127453131171769411000004938606603892643891408353859212181190148630879449077867779447923360433892634410001120201100991001000010100100001002321937376841575019042302836002500542026423213710000717721402094810741097561743890802914391000010100389142389263389266389261389144
2020438913929152002000021828722201023441101243891301930926928055854029893425958978877969749912745677117210861100000493860580389266389263835405907918130514862807944947786207945082335881389267441000112020110099100100001010010000100232144193628716280188423028468025111182017823217010000715921002094660751097665793890962914421000010100389143389261389262389140389142
202043891392915303000002232695220710235212312438912520292694045585242989522595977888196975461274528911718206110000049383106038914438914283482592021813041486278794498778620794486233582438926644100011202011009910010000101001000010023219315402021588018762302767402520502026123218810000713721202094830751097851833892062914371000010100389269389281389262389144389145
20204389266291520020000220137221110224813112438912221034692795585402989342595947887866975001274514811718508110000049386189038933038926483536592041811921486311794514778614794489233582438926744100011202011009910010000101001000010023220225395911581018852302888202534542026323218310000716281402094810691097965783890842914511000010100389265389265389265389141389264
20204389268291530000200222637221710233613212438924920994692805585242989432595927887906975121274556611717767110000049386194038926438914883597590881811771486316794512778681794494233584838913944100011202011009910010000101001000010023219229293961612019092302818102499542024323216210000715761002094800701097265783890882914411000010100389186389270389151389264389276
20204389150291630000000216487221410235214036438912621693692795585462989412595917887826975041274545111717553110000049386188038926738914283476592061811861486281794607778621794509233601938914144100011202011009910010000101001000010023218815361021616018772302857802519542026223217210000713641032094851721097065803890772914431000010100389145389267389265389149389141
2020438930929162002000022277722243023441361203892482125369279558542298939259592788817697504127454021171772111000014493860600389264389141834735920318118714863007944947786817945312336112389267441000112020110099100100001010010000100232194154077015630188223029572424971082021323216910000712801402094820701097564773890792914441000010100389271389272389273389142389145
20204389141291530300000219937218910232813112438912521623692795585302989442595937888176974961274531411717670110000049386062038914138928683533590781811811486321794491778680794493233584238926744100011202011009910010000101001000010023220015364561636019362302878902529482028523216810000714001002094820701097865783892072914391000010100389150389260389266389140389141
2020438914529153000000022096722061019761572443891272186369404558538298937259592788808697497127452481171823511000014493861800389266389143835355920618118614862877944907786737944792335848389270441000112020110099100100001010010000100232204173967615870191223029583225161022022223215310000717361002094810691097059783892062914391000010100389140389266389262389268389142

1000 unrolls and 10 iterations

Result (median cycles for code): 38.9950

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2022293a3c3e3f404f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
20034390002292144400225030219312280710132389934209107009955020429056325966376026069756912689929117650671100000493868660389955389956708225989518885814578277608287591817608232277552389955170215100011200211091010000100101000010232247403553215830186823036373825206420266232183100001178010200998361097033353899032897981000010010389958389959389959389963389951
20024389951292150000219620221012192800124389950206807009955022829055925967176027669757712690056117651291100000493868720389957389938708235988818885314578207608127591717608552277546389951389945100011200211091010000100101000010232254243810616340185623035576625425420225232243100000155203201001351097833343899002898031000010010389945389956389956389955389948
20024389955292033300222010223312256700124389933210507009055022429055325966176024869757112689486117648050100000493868733389953389941708175990918884514578217608247591887608342277564389950389954100011200211091010000100101000010232289243967716060185523036484025205220228232196100007146008200999351097433353898812898021000010010389942389943389939389953389941
200243899532921404002168202233123206601243899442042070100550216290563259663760252697576126898671176523101000004938687103899543899457083659892188851145782176084075919976084822775823899423899421000112002110910100001001010000102322452435622162001886230362720252718220270232260100000174403201001351097733353898922898031000010010389959389946389953389953389939
20024389954292033000220360221412328740124389924199577009955020629055025966176027269764012690181117649470100000493868720389939389940708285989218885714578227608477591777608222277510389956389954100011200211091010000100101000010232278363077215990189823037472425205620215232232100000165208201005361097033353899012898021000010010389938389972389941389955389954
200243899552921300002198702231123361010244389937212807008855021229055125966376026469757312689742117654270100000493868810389942389946708255990318885714578607608227591817608312277609389952389939100011200211091010000100101000010232240363727515520187723036573025285620235232208100000168404201002351097733773899182897971000010010389937389958389957389944389947
200243899542921441002162002231123447901243899252072070088550206290547259662760268697576126900611176509501000014493868740389947390097708125990318886214578417608207591877609352277543389941389952100011200211091010000100101000010232251273839216040190523036678624945420231232241100000160003201076361097033353898812897991000010010389943389941389942389959389945
20024389942292130000219460230712360510124389937205107008855020629054825966576025869756812689594117650611100000493868730389951389955708765990318885814578517608527591817608222277537389938389947100011200211091010000100101000010232267243397615890187923037272025065420213232219100000157600201079351097633353898812897961000010010389946389939389981389960389956
20024389953292140000221140220112224740124389928198107009955020629055125966176027069757112689542117649000100000493868700389943389950708295989918886114578307608367591777608242277507389943389956100011200211091010000100101000010232229243674315930190823034975025045020233232203100007167203200999351097833353898942897961000010010389947389944389954389956389960
200243899952921403002193202222123208301243899321960070119550247290560259670760256697577126898781176527401000014493868580389944389960708275988818885114578377608227591997608282277510389953389942100011200211091010000100101000010232245363987816210188923035579424885620215232222100000167608200998351097733353898832897991000010010389954389952389954389939390026

Test 3: throughput

Code:

  stxp w0, x1, x2, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 38.0115

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f22233a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606467696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10205380117284710013120010141018138010227027152927110697518574106979157365438410461868010000098377032038010738010713043759953248989926435191009384181910028152543801163801121000111020110099100100001001000010045888115458868014588820220012458867100000100000010172639113311081130130380051119710000100380121380119380121380119380119
102043801182847111100199981400182380097070276529272106975185751069791571554384104617590100000493770303380121380110130429599512489749264121910093841819100281525438011838011810001110201100991001000010010000100458881144588670045888200200124588671000014100001010172636113211070130130380051119710000100380158380119380119380119380119
102043801182847101100199981410323801070702765292701069751857710697915717543841046214101000004937703803801183801181304365995124899892641419102938426191002815254380112380110100011102011009910010000100100001004588660458866004588660019998458866100000100000010173672113111156130130380051119710000100380111380113380111380111380111
10204380110284700000019998000183380103070276529271106975185771069791571754384104624200100000493770310380112380110130429599532490039264121910093842619100281527838012538014410001110201100991001000010010000100458882154588670045888200200124588671000014100001110172636113011068131131380052119710000100380111380113380111380111380111
10204380110284700000019998000183380103470278529270106975185771069791571754384104621410100000493770380380118380133130429599572489809264161910093842619100281525438011838011910001110201100991001000010010000100458882164588670145888102200124588681000014100001210172639113111071130130380051119710000100380119380119380119380120380119
1020438011828471101002003414001823800950702785292721069751857510697915715543841046175401000004937703003801103801131304295996424897892641219101938434191002815254380110380110100011102011009910010000100100001004588660458866004588660019998458866100000100000010172638113211070131131380051119710000100380119380119380119380119380119
102043801182847111100199981400183380097070276529272106975185751069791571554384104617540100000493770300380110380110130453599612489879264201910093841819100281525438011838012010001110201100991001000010010000100458882154588680145888200200124588681000014100001310172637113111081131131380051119710000100380119380121380119380121380119
102043801182848101100206171410181380092070271529272106975185751069791573654384104618630100000493770270380109380107130437599552489799264331910093844319102281577038011538011510003110201100991001000010010000100458882154588680145888212019998458866100000100000010172638113011072130130380048119710000100380116380118380116380116380116
1020438011528471110001999815101803800922570271529272106975185751069791573654384104618630100000493770270380107380108130437599572489799264331910093844319102281535338011538011510001110201100991001000010010000100458885154588710245888202200124588681000014100001010172636112911072130130380048119710000100380108380108380108380108380110
102043801072847000000206330001813801000702725292701069751857410697915738543841046225501000004937703503801153801151304445994724898792643319100938443191002815329380108380107100011102011009910010000100100001004588660458866004588660019998458866100000100000010172637112911072130130380048119710000100380108380108380108380108380108

1000 unrolls and 10 iterations

Result (median cycles for code): 38.0011

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22233a3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6064696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive fail (b4)l1d cache miss st nonspec (c0)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
10025380019284600000019998010037999607026851973210675186651067915805542410472510110000493772260380012380013138348599402493309170241910954081191028616763800353802761000111002110910100001010000104589604589560345895622199984589561000010000001696460611108041413799441171000010380054380269380012380012380012
10024380261284700000019998010237999607026851973210675186651067915805542410470394110000493769310380011380011138348599412493299168781910953892191028616643800113800111000111002110910100001010000104589564589880045895650199984589561000010000001696460401120940403799441171000010380012380012380012380012380012
100243800122846001910419998000037999607026751973210675186651067915805542410470394110000493771870380011380286138351599522493319170061912953892191028616763802713800111000111002110910100001010000104589554589340045896600200114589561000010000001696720421107842413799441171000010380014380012380012380012380012
100243800112847000000206910200379996070267519742106751866510679158055424104703941100004937693133800123800111383485993924932791687219109538921910286167638001138001110001110021109101000010100001045895545895600458958700200024589561000010000001696460411107241413799441171000010380015380012380012380012380012
10024380011284601000020064010037999607040351973110675186651067915805542410470487110000493769310380022380018138351599392493409168721910953892191028616763800113800111000111002110910100001010000104589564589550245899720200014589561000010000001696460411108040403799441171000010380012380012380014380012380014
100263800282847001300419998010037999607026851973210675186651067915805542410470394110000493769310380037380017138348599392493279168721910953892191028616853800113800111000111002110910100001010000104589564589560045895600200054590131000010000001696460401107941413799441171000010380012380012380012380012380012
10024380035284600000019998000037999607026751973210675186771067915804542410470394110000493769310380033380015138348599472493279168721910953892191028616763800113800111000111002110910100001010000104589824589560045895800199984589561000010000001696460421106742423799441171000010380012380012380012380012380012
10024380012284600000019998001037999607026751973210675186651067915805542410470394110000493769310380039380020138348599392493299168721910953895191028616763802823800111000111002110910100001010000104589884589560045895600199984589561000010000001696460421108042423799441171000010380012380281380014380035380012
10024380048284600000020010000037999807026751973210675186641067915805542410470336110000493769310380026380019138348599392493289168761910953892191028616763800113800351000111002110910100001010000104589574589560045895620200124589561000010000001696460421107842423799441171000010380012380012380012380012380012
10024380011284600000019998001038002007026751975610675186551067915805542410470394110000493769310380027380024138348599392493409168721910953892191028621953800113800111000111002110910100001010000104589564589560045895810200064589561000010000021696460401116441403799441171000010380013380012380012380012380013