Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASP (64-bit)

Test 1: uops

Code:

  casp x0, x1, x2, x3, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 6.002

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.003

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f22243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f606163696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
760113297225004002100010050105974327730027010300630033003159465001492971132610328606243003200240082004701432880129721710011000100013003002100420020053006999120120015969118903840739651071226133799444214606432439166311404715587300030023296032974330663335932993
7600633061246027023001100501059023266910267243006300630061599340014929691326583277072530062004400820047014330101326217100110001000130030421004200400543005999130120016156117141834639511059224993945444420676432498169081453715248300030023300533171331213294632991
760063284724603002400010050105941327421026732300630063006159814001492973432692328027253006200440082004701432763135621710011000100003003040100520029053009999120120016060119071841739831674224673864444611616432418165761482215704300030023286333002329113291932850
76006329732471281290001232200605032764112671130063003300315946800149297453273332785142530032004400420027007328841289217100110001000030105421017200902123009999152123016100117431844040241267224423996444415626332603168591408715249300030023292332813328553297533004
760063286624803203200010050006016329340026837300630063006159800101492984132789327657253006200640082004701432729132921710011000100013006042100420040073005999130120016044121111829339661265224133915444518626632516171871419515288300030023292632848329713302433293
7600632943245130127100100921059873273311267213003300330031592660014929869326983289662430032002400420027007328081350217100110001000030095421009200601103011999152123216058116111839639751173222943919445126697032452167841419915761300030023292032944326773282833137
76006329742460230220001007010577833091012663030033003300315925000149297773286232829624300320024008200270073294013422171001100010000300304210032002007300599913012001611111864283523954966223793925443217626632792170211448815586300030023302333005333073295632944
76006327482471281201001011200575732832002676430033003300315945000149299793270833104728300620024004200270143290813302171001100010000300304210042002007300799914012001638511873283963974965224773917444320585932431173631479815263300030023271732858328253291432906
760063290024802502400010050005988327300126727300330033006159455001492989032659329946273006200440042002701432927131521710011000100013003042100420020073006999130120016270119681840740021258224124001444621606732396164431426915100300030023306232994328963281633117
7600632872247024021001101001058793303011267303003300330031595150014929957326913284772730032002400420027014329961338317100110001000030096421008200602123010999142123115902117371847039711065226193848444617636632383166481439715793300030023289333104328123280432993

Test 2: throughput

Code:

  casp x0, x1, x2, x3, [x6]
  add x6, x6, 16

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 14.1004

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f202224293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7023414083510563020026931924732010404161214106523292313912184125401001010030000101003000050552669210704913801514095514107013012631309594010030200400003020070000140502531120201100995758210010000101001000001003249816498212784351293792042815391475995410177075267899982017139996621341313101171114053211000013133000040100141051141017141025140955141105
70204141001105620200270011023434010649740141073233222333121468254010010100300001010030000505526707591049134730140754141050130144313086340100302004000030200700001409705111202011009957759100100001010010000010032491155582131841873087020456152714791004372225665184299982022128917862141413101171114050501000013133000040100140775141005140808140939140974
702041406911055322002734160723432088086641404612300133311205722540100101003000010100300005055266956600491376201409741410401300883130995401003020040000302007000014119951112020110099589171001000010100100000100324971554521148462529425204541541247610043102263247468999820153659133621411413101171114048201000013133000040100141028140995140970140762141004
7020414101210543200023990923012010326788141017230643243121063254010010100300001015630000505506712749049137772141033141028130123313021940100302004000030200700001405735111202011009957585100100001010010000010032493163222092840583185420361148914942503522285950287999820209699111621301413101171114009001000013133000040100140791141025141037140971140830
70204141099105620002279159238120124879761407862305041301216052540261101543000010100300005055067045100491378791410821410211302173130918401003037240000302007000014107352112020110099576891001000010100100000100324951950322748410227408204541477146624841418272503129998201617999262143213101171114040511000013133000040100141040141016141031140994141096
702041409641056300022401892346209046904141012231602325121531254010010100300001010030000505516693338049137901140782140727130151313093440100302004000030200700001410945111202011009957342100100001010010000010032488185432083843842787620519153724981005310199664682399982020421910862132313101171114049821000013133000040100141069141035140463141030141047
702041410461053333002197892400202482824140937228213536120720254010010100300001010030000505516708296049138002140994140787130153313094940100302004000030200700001409885111202011009957947100100001010010000010032500175612297834582789120519151314781000634221105298699982015691914762130313101171114021701000013133000040100140509141045140923141060141059
7020414101310563300026117923572011926712140505229323428121145254010010100300001010030000505526704843049138010140973140555130220313067740100302004000030200700001409955111202011009958663100100001010010000010032507165622081834582625420475152314851000364183955077299982013378916762140013101171114057301000013133000040100141040141105140879140983140546
7020414101910572000224568923172013047756140785228124020122026254010010100300001010030000505526708110049138002140996141054130111313063040100302004000030200700001410465111202011009957484100100001010010000010032502205602141841832684320514150314889965262325146858999821123371010562140413101171113979711000013133000040100140995141044140760140781140724
70204140447105733002247059232320108814812140950228112536121315254025910100300001010030000505506709087049137950141019141068129787313094240100302004000030200700001410145111202011009958323100100001010010000010032488175252088841142863120361154514881004424204734746799982116438912562130413101171114043801000013133000040100141000141072141011140980141004

1000 unrolls and 10 iterations

Result (median cycles for code): 14.0886

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e18191e1f202224292b3a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9e9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2c3cfd5map dispatch bubble (d6)dbddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
70054141113105500020037126022971014800622814090422553403411983225406461022830000100103000050103670093114913778201409041408771300723130911400103002040000300207000014087750112002110955860101000010010100000103249412491216784257401062038015402248597622668635379998201683201176205127021701114031311000010103000040010140812140769140798140791140822
70024140826105410000033486022781012400821614087022643304112013925400101001030000100103000050101670099014913778501408841409201301463130903400103002040000300207000014054150112002110957266101000010010100000103249710490218084217402812043515102247576632777493679998202687801636202127011702114030211000010103000040010140847140807140809140785140797
70024140840105510000023207023051012480101761408462255333321206012540010100103000010010300005010266997161491377940140792140871130111313089140010300204000030020700001408525011200211095729610100001001010000010324758437221583908267852038215012249997621692613449998201585601066307127011701114040301000010103000040010140884140942140871140871140887
7002414088910552000002636302284101504091001408352299428311220962540010100103000010010300005010366966491491377940140810140862130127313078740010300204000030020700001408125011200211095466810100001001010000010324838435218284427386432038316122247397632569596629998201367301266208127011702114035011000010103000040010140890140881140898140899140854
700241409081056201000368590230110148801019614081122903393612155825400101001030000100103000050101669436514913771301407971407911300613130806400103002040000300207000014079050112002110955487101000010010100000103247594432182842523916020383151922499976203925636299982018416014963115127011701114031001000010103000040010140808140811140824140813140855
700241408371055100000383470233010148801012014084623024323911882425400101001030000100103000050102669788714913773401408251409271300993130773400103002040000300207000014081350112002110957423101000010010100000103248110487217284370378622042915554249176620251588919998211778101606320127021701114032001000010103000040010140863140829140894140858140856
70024140900105520200026313023011015120921614083722933374512007925400101001030000100103000050102670036414913780731408641407691300183130799400103002040000300207000014080750112002110953980101000010010100000103250310492217284281398552043515414249176620885593589998201653401376210127011701114035301000010103000040010140883140883140885140911140869
70024140883105521200025959022921015120757614098922644385011973225400101001030000100103000050101670256004913780601408671408751300983130828400103002040000300207000014092350112002110955014101000010010100000103248113493217284251382902043915152248376621400489419998201376601216320127011701114010111000010103000040010140897140914140862140902140880
70024140860105621000026447123131015040822814089022573463112098625400101001030000100103000050101669825614913779001408741408821300743130926400103002040000300207000014087850112002110954909101000010010100000103247312487215684878292052043015163248776630939580699998201622301206313127011701114034401000010103000040010140780140833140823140798140801
700241408151055100000249270231110139208216140808230442936121754254001010010300001001030000501016701663149137817014088914086713006831308394001030190400003002070000140899501120021109583961010000100101000001032494249121348413027508204331548224917661915147103999820170040786200127011701114037501000010103000040010140883140867140881140857140935

Test 3: throughput

Code:

  casp x0, x1, x2, x3, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 16.8118

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
602111701041259110100453521310016808241191394453230197100300991003011750081128181149165022170084168100157321121600333018754492028240164203707028716809711521102011009910010000100100000100301181942401143024521720073014517765269999913516413123111172929629018001677490013133000030102170107168088170120168110170108
602061701051259100000453371301116808371613137596323019910030093100301155008113751114916703716809717010515927811160030302170020282401402028070287170100120211020110099100100001001000001003008503521112372451832006200452056526199991351782020011173429768228421679752213133000030102170101168112170107168108170104
602061703101262122000451041301117008551101394413330199100300871003011150080169761149165004170094168098157283111580253021736702027440156202587027316809816421102011009910010000100100000100303192442391142574519720192335690265738999913533413123011173429668228221695631013133000030102168105170107168109170100168114
602061681001274111000452001100216808570013760832302991003010210030118500811294011491670181680931700991592781116003330216002026840168202667026617011713821102011009910010000100100000100301171842401131174535720071014534965421999913511011023011173429594228221675702013133000030102168110170115168092170107168109
602061680911274111000451071101216808671415139453323020010030099100301175008017388114916501417010416810715729111158046302171211932028240136202767027316810712531102011009910010000100100000100301171942391126044511420073004509565123999913505013103111173429750228221675621013133000030102170108168095170093168102170110
602061701061259101000451941300117008771414138105333019810030096100301175008040866114916702516810017009315928911160033305804048202664015620278702731701061122110201100991001000010010000010030117184201125874520820073004507065024999913515113123011173429773228221695631013133000030102168107170106168108170101168105
6020616810812741110004503513001170081714151394563330203100300931003011850080173651198167025168096170098159280111600403021846542028240136202827028717009512521102011009910010000100100000100301171942411144874516520073004518165292999913501513123211173429687228221695631013133000030102168108170103168102170107168092
60206168108127411000045100130011680769140137600323020010030099100301155008113028114916701616809017008915928011160032302179997202784015620266702731681031122110201100991001000010010000010030117200401145534502920073024505065116999913504713023111173429698220221695591013133000030102168100170101168103170108168101
60206168089127411000045156130011681043001376113230194100300991003011450081137510149167024168105170117159285111600333021847020268401642026670266168100125211020110099100100001001000001003011920041114636453452007300453276541599991353181312311117342983322822167575001303000030102168111170108168110170106168105
6020616811812741100004533813002170102215161394233330198100301031003011850080173750149165019170104168098157283111580303020210202784013220282702871681075821102011009910010000100100000100302181842411131374537620073004535865417999913534113123011173429671228221675722013133000030102168977170095168109170107168092

1000 unrolls and 10 iterations

Result (median cycles for code): 17.0084

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f191e1f22233f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60031168074127410100453202001700783801394263330097103009610301085080167440149164997017009216809515729681600363011857200784014420078702521681031112110021109101000010100001103008603301126274534720056004534465398999913528520000065229685419221675604003000030012170095168081170095168094170088
6002617009412590000045287200168719378139422333010410300961030108508016203014916501501700861680921572978158028300971920088401162008870238170088111311002110910100001010000010300950001128874533220056004530565408999913532601000065229740319221695500963000030012170095168097170085168090170086
600261700911259000004534220017007808813942833301061030096103010850801665901491619880170093168084159282816003630097482009240144200927023817008410521100211091010000101000001030095033161144604530120062004531765385999913529501000065229700319221695534963000030012168087170086168085170085168094
600261680771274000004531021016807227713759833301061030094103010850811170601491650030170092168092157299815803330112272007840136200787023816809310521100211091010000101000001030095033161118154535020062004533065389999913528421000065229714319221695430993000030012170098168090170094168087170094
60026170094125900000453502101700693781376173330097103008710301085080162870149167005016808817007815928281600293009747200924012020078702031700971052110021109101000010100000103008500161142794533720062004529365418999913529421200165229700219221678970063000030012170094168087170088168094170085
60026168080125900000452922001700703081394253330097103009610301085081125101149167014017009217008715928781600293009717200964014420092702381680961112110021109101000010100000103009300161146154535520062004531065402999913531921200065229705219221695525603000030012170088170085170079170085170095
60026170084127400000453192001700754001394243330097103008710300875081120470149167005017009317008415929081600353011828200784011620088702381680701132110021109101000010100000103009302701123264534820062004532465405999913531721000065229723219221695520963000030012168091168090168078168094168094
60026168093125900000453312001700793881394233330104103009410301025081121660149167014016904316808615729781580313009726200804012020088702381680891112110021109101000010100000103008502701144564532020062004532665384999913529201000065229684219221675600003000030012168100168087168085168081168090
6002616809612590000045334010168074380137587333010610300961030108508111994114916700701700841700941592888160038301182102008840144200927023816808911121100211091010000101000001030085027161146134533620062004531265397999913529721200065229691319221675634963000030012168096168094168096168097168090
60026168068125900110453062001700780771394233330104103009410300875080167341149165007016807616809315729581580313011835200924011620092702381700931112110021109101000010100001103009500161144584533520062204532165423999913531121200065229713219221695372963000030012170095170091170095170085170079