Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CAS (64-bit)

Test 1: uops

Code:

  cas x0, x1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 4.001

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f18191e1f2223243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f606163696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)l1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
74007342322551261811001046100053203393401221323003300630092291011001493082533760341426263009100230091002601234036272221710011000100003006421003200200430069991121491610690079913527570237693578444016465532971190171670917766300010013414334005341873413534172
74005340982560172100301021000054863390800220993006300930062285120014930906337833424214263006100330061001601234021272121710011000100003003021001200600130079991101502410613179813540757238383535443718475732957190931650917578300010013420834105341253414634159
74005340742550181900001001000053313393500220373003300630062291610014931066337583412282530091003300310026018341192722217100110001000030064310032006001300699911014900105080799035901048237993599443910525833016192861641317821300010013406334110340363415934142
7400534054256017220000100101005245339421022037300930033009229073001493102633774340628253009100230061003601234048268421710011000100003003401002200400130069991101513210579079873601558239173846444513545833355189361641517670300010013432334329342323435034239
7400534227265026190000101300005250339050022047300330033009229444001493100333772341377263009100330091003601234053269921710011000100003003421002200600130069991101478410727080083516552237863639443814605732948190381632517507300010013415134053341223406434052
7400534088255020220000100301005347339480022026300630093006229548001493109733713340807263003100230061003601233989270021710011000100003006001002200400130069991121491410760080563549749237993533443714545433027191151651117597300010013403633999341043409033987
7400534016256020190041103401005288339030022046300930093003229492001493096633772340066253006100230031002601234019268921710011000100003009001001200400130069991021498310731079493506651237873501443812495333000189231659017601300010013408734052341143402634108
7400534049255017190000100101005318339780022217300930063003229064001493095933769340327253006100330061001601234084270521710011000100003006421002200600130069991121488110568079153534944238053643444110464332990192191656817784300010013407134086340353400934126
7400534138255012100000100311005363340101022027300630063009228974001493105633806340737253003100330061002601234002266821710011000100003009401002200400130069991101493710596079703555115323788354044413505332996191121682617559300010013400434221342113411733956
7400534072254011201000100301005370338320022068300930063003229463001493103033734340577263006100330091001601234016272221710011000100013006601003200410130069991121495310771079513600750238043525443613454733007191511641817780300010013414333984340923400134098

Test 2: throughput

Code:

  cas x0, x1, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0067

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f20222324293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
502143007622533300005746080210079211914030052790252152709972254010210155300001010030000521501402651024926987300673040819881032005040100204243000020200600003006774112020110099010010000101001000001003111292421123123110562041123686770440128331298999823443927948094200013091171130064100016630000201003006830068300683006830068
502043006722544040005996379410074411812430052761221902269798254010210102300001010030000518391402578014926987300643006719884032004940100202003000020200600003006772112020110099010010000101001000001003086914249112501106020293253187942132331299999823143417477698000013101171130064100006630000201003006830068300683006830068
502043006722540040005805181210066413510030052770261892019743254010110104300001010030000518411402617004926987300673006719882032005040100202003000020200600003006772112020110099010010000101001000001003090810263121601095620300289860042130831294999823542638117284300013101161130064100016630000201003006830079307433007930068
502043006722540000045847279810069620140300527852421023399252540101101013000010100300005183314027380049269873006430067198840320049401002020030000202006000030067741120201100990100100001010010000010030922102221177011006202872721865361236313359998218449675672100400013101171130064100026630000201003006830068300683006830068
502043006722544440005890381310067291120300527242624024397962540103101033000010100300005184214027150049269873006730067198820320049401002020030000202006000030067721120201100990100100001010010000010030877132281120010981203002351034134031332999822841718007570400013101171130064100016630000201003006830068300683006830068
5020430067226331000058172790100776311483005279030201231965925401011010130000101003000051822140262201492698730064300671987803200504010020200300002020060000300677411202011009901001000010100100000100308717233115601098520295270874048116431281999823341416897670400013091171130064100006630000201003006830068302233006830068
50204300672254000000591068181007201139230052760282402189532254010110101300001010030000518121402668004926987300643006719884032004940100202003000020200600003006774112020110099010010000101001000001003091372461074011009203252638700301287313319998230397180080120000013101161130064100006630000201003006930068300683006830069
502043006722540040005898279210071237220300528082424625698762540101101023000010100300005183714027160049269873006730067198820320049401002020030000202006000030067741120201100990100100001010010000010030894152321143011024202902581877321348313039998226420669585100200013101171130065100026630000201003006830068300683006830068
50204300672264001100579447801007044414430052787272252349966254010010100300001010030000518121402711014926987300673006719884032004940100202003000020200600003006774112020110099010010000101001000001003086313230109401101420294246188348127931294999822337608437789000013101171130064100026630000201003006930068300683006830069
50204300672264044000591537851006881361363005280025212232100022540102101003000010100300005187214026131049269873006730067198840320049401002020030000202006000030067741120201100990100100001010010000010030858122511170011005203092411853301313312739998230415270279131400013101171130064100016630000201003006830068300683006830068

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0076

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6b6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
500343007622522000000587211815200712341003006179823209242958325400111001130000100103000051398140321201492699603007630076198910320058400102002030000200206000030076751120021109010100001001010000010309121522210360109172030424090603013263130199982273761176460191120012701161130073100019930000200103007730077300773007730077
500243007622530000000588312773100728301403006175827268221987625400131001030000100103000051404140323401492699603007630076198920320058400102002030000200206000030076771120021109010100001001010000010308902521310361109892028929200361141312969998227344127316419010012701171130074100009930000200103008830077300773007730077
500243007622531000000611520808100728271323006177929195225998125400101001030000100103000051326140316102492699603007330076198930320059400102002030000200206000030076751120021109010100001001010000010309011920710960109742028124700321216312279998226329127204919040012702171130073100019930000200103007730077300773007830077
5002430076225210100005862247871007523214030061772242052159931254001010011300001001030000517471403333004926996030073300761989103200584001020020300002002060000300767511200221090101000010010100000103093815223111012311054203162578960381206312669998222373126596619020012701171130073100009930000200103007730077300773007730077
500243007622533020000589114787100752126136300617762119822796202540011100103000010010300005140114032070049269960300763007619893032005840010200203000020020600003007677112002110901010000100101000001030854162241028110938202952558580341229311909998223337136396118020012701171130073100009930000200103007730077300773007730077
500243007622632000000595715822100800401563006277324232241997325400121001030000100103000051421140306501492699603007330076198920320058400102002030000200206000030076761120021109010100001001010000010309401623410830109912029328720281160312849998221378137446917820012701171130073100009930000200103007830077300773007730078
5002430076225220200005834178001007041251483006179232209285954725400101001230000100103000051357140319100492699603007630076198910320058400102002030000200206000030076781120021109010100001001010000310309011722611200109172029026370361185312319998219337127704818010112702171230073100009930000200103007730077300773007730077
50024300762252100000058481582310074437144300617881721022699092540012100123000010010300005144514032900349269960300733007619893032005840010200203000020020600003007676112002110901010000100101000001030948182161089010943203082819240421143312179998223366126956416320012701161130073100039930000200103007730077300783024930077
50024300762252000000058161579710076026236300617853020719598552540010100123000010118300005138414031170049269960300763007619893032005840010200203000020020600003007679112002110901010000100101000001030927191811033011022203082449080341220312399998230359146826916040012701161130073100019930000200103007730077300773007730077
50024300762253300000060201579510071242144300627602020520899942540012100123000010010300005141614032730049269960300763007619893032005840010200203000020020600003007675112002110901010000100101000001030928162311031010930202762768860341265313199998222341126776019040012702171130073100049930000200103007730077300773007730077

Test 3: throughput

Code:

  cas x0, x1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0110

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606367696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2st memory order violation nonspec (c4)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d1d5map dispatch bubble (d6)ddfetch restart (de)e0e2e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40728301202250000001293121013009508714383435367501873663518958078964838715010492702730113301163125456082225775632297218208996123020875123522301182552110201100991001000010010000010035588183430122047721290021004541102999917081002001346112662005564932300961787090030000106143009930102301143012230114
407183012222500000012737200330095200153332813676720736541202580179718398240104927039301013012028944411823957544309669172107361554209621239723010124321102011009910010000100100000100349910004200172124900969541272999917114212001368112666905136681715301101764166030000106113010830111301023009930109
40716301112260000001254620013009518615043255364602053632819757520940836459010492703930128301292800456383185755130937413208776165320860121794301012342110201100991001000010010000010035186034156202972129500962141177999917068212001375112667607364533301201796006030000106063011630102301023010230115
4071430123225000000131712000300921961441336036615182363241855772810138299650119827051301063012627984487823557238295270982038760732206781228143011323421102011009910010000100100000100351680281432055521438101031541181999917188212001451124666401166571919301071899296030000106123011430111301113009930111
407153010122500000012994210130095077153934333650917937253189577499768411720104927024301293011728904618829257520305769282104061155209631233603011329821102011009910010000100100000100352921734285203342134401102094118999991762614123113221126654025126711920301041768299030000106103012030109301193011930119
4071430119226011100132991400230101912101575341936565204362392095757910148302450114927030301043010130274597831457474326669212105661620208251227183010129021102011009910010000100100001100355470001020385212820010010413289999172330120014002136713101367433301101830000030000106083011130100301113011230114
407163011022500000012966200030090208146533923695820736351208572111027829300010492704630110301122861457180125787227957134209756138920898123060301012402110201100991001000010010000010035185034161220228212630010274411089999171872120013391126680011566833301071862090030000106113010230102301143009930102
4070730110225000000124890101230086007148332593647916536811185575399738378940104927030300953011031224463797957152321770452079761068209561228083011324221102011009910010000100100000100350480340620150212590010079425329999172380120013561126601013565854300951812096030000106133010830111301023011630111
40720301102250000001282620073009711010152834403681620736946199581871036836443010492701830107300982928446482915734330317254208576132620904123318301132392110201100991001000010010000110035384034148201612136600103194091399991701920200133211267090121268935301151819090030000106103009930111301003009930111
4071330101226000000126702000300921681408357337007196365142025826610068349720104927018301113011629284659817556941339175462065161029210011236483010129221102011009910010000100100000100351240341515197652125800101084109899991726921200136621166040163650315301081827066030000106143011430120301113010230102

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0125

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)606367696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2st memory order violation nonspec (c4)cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e2e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
405433013022510100002142913001301240171695735703944119394452060921100792920010492704703013330123180949596316610672225882170464665215751297983013630721100211091010000101000011037851194040827940219260119460499979999113590141231107663014319600141230120188901313030000105263013130137301243012630139
4054130123226111000021537141003010930010023571396272039397216121610079419301049270530301253010918104942635461215192547217206461421633129762301312572110021109101000010100000103785019410727995219350019200497929999113587131230106662933814607151330121187101313030000105273013130129301383012930125
4054230133226110000021647130003011940169513566395772139371226130610579325211049270490301083013018084894626661137210530216966470121571129642307302532110021109101000010100000103782119414116278972191153219355499559999113705120230111162914015605121530128192701313030000105283012630108301173013630135
4054430127226111000021528120003008941717103536323938922394662260988105792307110492704003011730106182649216288609312025292162064602215881296063012031321100211091010000101000011037892204201227865219330019368499939999113591141231109363063913631111330120193011313030000105283012430134301383012530124
40539301342251110000215911410030112515169523537394782239405226088810079937611049270440301303012718704860625461135216620216306468621602129570301232612110021109101000010100000103795220424102796921911011914749646999911356812023010856272401265913143012118902130030000105293013130124301243012430114
4054230110226111000021379140013009791616100535853938821395342060960105791002110492704303012530123182249366416610871965692157064755217111294443012524221100211091010000101000001037975190421427885218160219138496559999113594131232108462943815592121530120190211313030000105293012830124301243012530128
40542301252251010000215341400030109617181047371539457213937320609851057919031104927049030124301251783486364246105520554721609646682170312942030127259211002110910100001010000110379192142431428015218380319250497759999113515141231108362793411594121330119188711313030000105283012630126301243012430135
40542301232251010000214801410130112900964355239637213937421610591057942731104927044030107301301784492762036118920753521648646892156212943830123307211002110910100001010000010378012142437278802203301192734996199991135741312311077632538146131514301051917200030000105283012730124301343012430134
4054330123226110110021437140003011041809703632395142139326216089010079449111049270520301303012517854902640161399215558216906476721580129936301252542110021109101000010100000103783321400527917219270019323499699999113576121231110263084017597131330119192821313030000105273012830126301243012830109
405453012822610000002143712100301016015100535943939122394432161085115797078110492704803012530125183749446269610632135652162564590216091295703012526021100211091010000101000001037868204241142781821915011930449752999911369114123111316285361460416153012018923013030000105283013630124301243013030130