Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASP (32-bit)

Test 1: uops

Code:

  casp w0, w1, w2, w3, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 6.002

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f2223243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f606163696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
760143418825603101270011038210055643388600278663006300630091598231614930999337013394382630062006401220047014340691342217100110001000030144021007200601830149991620232015841105773795135561074236083576443720646233045190441652717479300030023406434149340073409334082
7600634086254127112511013222100532533884012803230093009300916035410149311033380034187725300620044008200470213398112962171001100010000301054010082008021330099991421031015127106972798237631163236783547443719605832975188611609517461300030023417234076340913403834044
7600634098254127113210013042100525233955002789530093006300915988100614931063337483412882530062004400820067014340611339217100110001000030114001011200601103014999142123101506910633279623609116423680360444338566333044189361637517507300030023405434065340343404934116
76006341912551241129100134821005430339641027673300630063006159931000149309273366634137891300920044012200470143398413362171001100010001301144210072008021430149991621231015378109452799435931162236933785443617596533348188791642017374300030023411134396340303411233709
760063374025503400250001132010053533392711279273006300330061599970014930961337933415172530062004400820027021340101325217100110001000030030421004200600113009999150120001501910875379733601961236583563443722546033006188371637117460300030023402334196340993404134173
76006340302560260023091132601005339339261127832300930093006159911100149310043372834176724300920064012200470143394713522171001100010000300604210092004001130059991501200015029106705813635531458236023653444016626233010192141656917128300030023413634074341013412934042
76006340912650310024000132101005328338831027956300930063006159799101493099533797341046253006200640042002701434065131721710011000100003006060100820040011301499913012006015052106382806136111063236083619444317546333286191271640917588300030023405934038340843406534096
7600634009254025002800013160100530933851112790230063006300315991816149309733379434043826300620044012200670213400913352171001100010000300900310052006001130139991601200015122107653799536531162236763705444611615933083192281644217515300030023408134228342943412933997
7600634123255022002301113170100534333923002786230063006300616035400149309953370634204725300620044012200470213418813042171001100010000300604010042006001130069991501200014992107452793035601667236803623444526666433014185791607417603300030023420734112339873408934014
76006340852540200026011129901005280339110127960300930093009160521000149311063378634149826300920024004200670213398713212171001100010000300606310082004001030149991501200015014107151796235581257235723635444012625932972189031657117267300030023404634210340803408933973

Test 2: throughput

Code:

  casp w0, w1, w2, w3, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 14.0250

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
702191402491050110000024601079317126314014021877247685120976254010010100300001010030000505526669197049137164140217140240129406313014640100302004000030200700001402755011202011009957851100100001010010000010030927218967083445301742023125038613425302513249998232099402726220213101171113972501000010103000040100140249140222140235140211140244
7020414026710512000000211140788175283152140222801476711204792540100101003000010100300005055266670380491371351402451402251294273130125401003020040000302007000014025550112020110099575511001000010100100000100309031920971582257306792025025110120342169650761999825209549246461401413101171113969101000013133000040100140280140292140182140255140241
702041402611050333010020157979817048214414022979407875120410254010010100300001010030000505506671095049137166140253140280129354313014440100302004022830200700001402465011202011009958056100100001010010000010030854161917248372233311202402553862302212051443999824215019262621601313101171113969501000013133000040100140238140268140256140211140216
70204140274105033000002306807501680761041402467716888111976625401001010030000101003000050551666941414913715614025214024912939331301414010030200400003020070000140231501120201100995792410010000101001000001003088031847288354533412202382453891342025851078999824190980280621601413101171113972501000010103000040100140230140254140209140228140256
702041402651051200000023978078316887211214019076358287120574254010010100300001010030000505506668697049137163140213140219129328313013940100302004000030367700001402395011202011009957036100100001010010000010030892219868183401322062022825038853420599511379998231750102846215153013101171113971501000010103000040100140232140250140210140257140238
70205140259105130000002212307851696721401402337367758412057525401001010030000101003000050550666909004913713014023614021412938731301654010030200400003020070000140234501120201100995777310010000101001000001003088531956968300432110202312593875441950551536999823205130301631731713100171113972601000010103000040100140211140218140241140235140237
70204140235105030000002215107501680771321402167866827212038525401001010030000101003000050552666926804913711314027814017312934131301074010030200400003020070000140240501120201100995765510010000101001000001003092551826898445032804202402213871102229875203499982418599031962150713101171113975201000010103000040100140258140249140254140221140233
702041402621050330000022966080516887814414021677368072120509254010010100300001010030000505526668298049137117140264140242129389313012240100302004000030200700001402905011202011009956976100100001010010000010030870220269584112302312024022598972622021533989998232129602856217101013101171113973801000010103000040100140242140218140234140245140268
7020414024810504000000222200811168871140140208754486811200752540100101003000010100301535055266682270491371351402411402421293643130104401003020040000302007000014028150112020110099575241001000010100100000100308764202664832002986920217257386532215525060299982419760029362170713101171113968801000010103000040100140234140243140257140249140284
70204140251108730000002276908181744681401402327646768111988725401001010030000101003000050552667025504913715514059414021812931331301244010030200400003020070000140260501120201100995615810010000101001000001003089531896528350531691202412562186746214945085899982418324028962187513101171113970301000010103000040100140254140254140277140265140216

1000 unrolls and 10 iterations

Result (median cycles for code): 14.0249

retire uop (01)cycle (02)030e0f1e2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7003914040910500022105821177670801402118211410210211977225400101001030000100103000050102667124104913716501402361402721294633130295400103002040000300207000014034950112002110956591101000010010100000103089601807028414831970202352723871362188549900999824221040305620190012705174413972201000010103000040010140260140274140221140282140257
700241402591050002158877517287212814020877813948912071825400101001030000100103000050101666815304913713201402201402451294233130199400103002040000300207000014022350112002110957248101000010010100000103091001956778352737691202422763955822597054881999824197020304621550012705177613971501000010103000040010140247140233140247140245140232
70024140252105100273208071704831161402317872095120119904254001010010300001001030000501006669695049137169014024814022712943131302224001030020400003002070000140255501120021109573101010000100101000001030942018874383984349852023327338973821884537499998241954403006221100012708174613967501000010103000040010140233140232140227140233140270
70024140268105100227508161784731561402258351310412012066325400101001030000100103000050102667018004913717701402291402301294573130214400103002040000300207000014026750112002110957048101000010010100000103090601936728307428855202382773921602288251118999824217630298620140012706176613970301000010103000040010140271140283140269140280140260
700241402181051002203080717768711614026183321103117120338254001010010300001001030000501026669265049137154014027414026212945031302504001030020400003002070000140242501120021109569891010000100101000001030882019872983162339182024429815927362064452017999823236320314623110012705176613972301000010103000040010140279140268140261140232140280
70024140234105000219638151768561521402178171810810012074925400101001030000100103000050102666842204913718101405451402751294343130178400103002040000300207000014020950112002110957080101000010010100001103093402027118378232781202432749895362168756908999824238260306620130012706174413973701000010103000040010140216140246140260140275140325
70024140222105000219188201704741081402368071810611711983125400101001030105100103000050102666830404913713301402261402361294433130222406313002040000300207000014027650112002110957220101000010010100000103091001806898328429682202042543490340204915256899982425999029562950012706177613973601000010103000040010140238140245140256140242140263
700241402561051002607484817927310014023080226971101206082540010100103000010010300005010266689060491371860140245140241129443313023440010300204000030020700001402525011200211095715810100001001010000010309660202698832973197620200285394942229045110999982419249031662020012705175613973801000010103000040010140242140262140236140281140239
700241402381050002233687617205615614023784815941041195982540010100103000010010300005010266685690491371550140228140254129431313017040010300204000030020700001402485011200211095526810100001001010000010309500193670835153041920235277392134218175077599982420434031162620012706176513970621000010103000040010140279140242140228140212140165
70024140270105100335298581768839614021782616115112120657254001010064300001001030000501016668260049137190014021514027512942331302224001030020400003002070000140234501120021109577181010000100101000001030928018171483555339482023527521879782077850457999823223070291625150012706175513974601000010103000040010140270140277140249140255140288

Test 3: throughput

Code:

  casp w0, w1, w2, w3, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 17.0084

retire uop (01)cycle (02)030e0f1e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6021117009812590045331200017007841010139421323018710030099100301175008111999114916501701700991680991573221216003630216002026240156202787028716809911521102011009910010000100100000100300960412111270845338200620453216539499991353222101117292960711800169565301303000030102170096168103170086168084170101
602061700851259014532100001700794111013941932301991003009910030117500801686911491670040170084168098157276111580243021600202784029220278702731680841152110201100991001000010010000010030099002111423645321200620453186541599991353092121117342964922822167567501003000030102170096168095170100168088170082
6020617009512590045342200117006940013856633301971003009710030111500811259211491670140168086170088159270111600233021100202744015620274702591701001052110201100991001000010010000010030085035011423245358200560453146542399991353262121117342960322822167564501303000030102168089170096168102170100168098
60206168097131900453442001170086408139422323018710030099100301115008016740014916704001681011700841592751116001630216002027840148202747027317010010521102011009910010000100100000100300850021114231453282006204533165393999913532421211173429613228221675493010103000030102170094168081170096168100170094
602061701001259004533420011680794881394263230197100300871003011150080166851149164988017009816808715927211160028302170020258401162027470203170084115211020110099100100001001000001003009804121114227453272006204530465379999913530921211173429603228221695420013103000030102168093170089168084170086168078
602061680921274004531721011680840111113943633301871003009710030117500801664101491650170170098168086157284111580173021700202784014820278702591680991152110201100991001000010010000010030098041211130404534020056045336653919999135290012111734296472282216756250003000030102170096168100170098168094170086
602061701001259004531500011700794781394303330197100300971003011750080167120149165026017009416809615726711158020301870020278401482036670259168097120211020110099100100001001000001003009604121114239453082006204533065373999913531521211173429770228221695564013103000030102168102170089168078170096168096
6020616809112740045308200117008548813759633301971003009710030087500811283511491650150170099168094159269111600233021600202784012020258702591700951052110201100991001000010010000010030085002111422745309200620454866540199991353132001117342961422822167563010133000030102168085170096168097170100170089
602061700941259004534220011700780001394253330199100300871003011150080167741149167015016808317009515927811160022302170020274401482027470259170088115211020110099100100001001000001003009904121114524453282006204528965412999913531221011173429675228221695455010113000030102168088170096168094170095168090
602061680971274104531020011680874071375763330197100300971003011150080167741149167015016809817008515927711160022301870020274401482027470203170076115211020110099100100001001000011003009900011418945336200620453056540799991353062001117342960622822167563000103000030102168089170095168097170079168097

1000 unrolls and 10 iterations

Result (median cycles for code): 16.8104

retire uop (01)cycle (02)03090e0f1e1f2223243f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
600341700881259101453272000168075481013944232301071030100103011650811199711491670191700771680921572928160035301270020098401162009870273170085115211002110910100001010000110300854121112610453412006204533965406999913529000200652296013193316955930103000030012168097170095168078168069168098
6002616809012741104531420001700834117137676333010710300871030117508016758114916501417008416809915729881580393012600200984015620078702731680791152110021109101000010100000103009841211149414532620062045312653969999135319202006522974931933169547313103000030012170094168081170101168095168100
600261680971274000453302000170069470137819323010910301021030117508112834014916700416809517009915928781600313009710200984015620098702731700931052110021109101000010100000103009635211130854533520062045319654209999135316200006522959031933169544410103000030012168094170095168091168144168069
60026168102127400045315200016808740101379027930109103009710301145081128440149167008168091170093159285171580383012700200984015620098702731680951152110021109101000010100000103009941211148394558120116050442654969999135320212006522977231933167564413103000030012170104168099170096168100168103
600261680771274000453442000168087410013942933301081030097103011750801667801491650171700821680871572998160035301210020098401562007870203170094115211002110910100001010000010300983521114230453462006204528965426999913530700000652297663193316956730103000030012170100168085170094168088168103
60026168099127400045320210016808049813762933300991030099103008750811262811491670111680921700941592908160034300970020078401162009470203170078115211002110910100001010000010300844121112368453162006204529465373999913529300200652296542193316955450103000030012168103170101168091168109168085
600261680991274000453032000170075410013763733301111030101103023150811204201491670141700881680931572988158038301270020098401562007870273168099115211002110910100001010000010300994121113147453082005604529665385999913532800000652296463193316756001003000030012170116170087168098170095168113
6002617008512590004531400001680840081394313230097103009910301165080153830149165019170098168111157298815803330097002007840116200987027317008011521100211091010000101000001030085350113205453572006204533865515999913530021200652296613193316756740103000030012170100168094170079168091170100
60026170094125900045299010017007040813944233301071030099103011650801708611491619961700981680981573018158036301270020078401562009870273168087105211002110910100001010000010300853521114227453322006204530465535999913529800000652296273193316755200103000030012168068170094168085170092170079
60026170100125900045317210017007840013760632301121030097103011150811262501491670141680861700781592848160038301210120094401482009470203168088105211002110910100001010000110300963521113165453292006204530265416999913531121200652296724193216755531303000030012170089168087170086168088170094