Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASPA (64-bit)

Test 1: uops

Code:

  caspa x0, x1, x2, x3, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 6.002

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f22243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f63696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
76011333772471011100100921057003288800271933009300630061605751492966632794331417263009200440122006702133217131721710011000100003013463100920061012301499913012001590411544283384022286223793944444620707232403179701408516262300030023283632766331923288533233
760063281124690000010050105842331731026684300630063006159987149301373311133113725300920044008200470143280512982171001100010000300604210052006007300999913012001624211739284004018166223293999443523787132600166291436115053300030023286733243328953293232953
760063289625061100010982005728331810126814300630063006159696149300353304933200725300320044008200470143283812962171001100010000300604210052004007300999913002001632611894181534032466224253851443120666132635170221465515517300030023277532834328563315232877
760063325524591110010092005803327561126709300630063006159997149297563309833243825300620044012200470143282513042171001100010000300604210052004005301099913010001564011711181943794369224683719444020626232392172951499115061300030023275233264332073278532905
760063318324661110010092005848326861127058300630063006159982149297153271333493725300620044008200470143299512882171001100010000300604210072004009300999913012001565412037285034010168228283632443416706932606178871445116411300030023273132821326543273733381
7600632961245101110010092005888327271026618300630063006159952149296833306732884725300620044008200470143325913072171001100010000300604210052004007301099913010001548811581183874056468224033748443717596332479165581406014880300030023316733212328063291233283
760063290924971110010092005608328541127112300630063006159727149297683264333124727300620044008200470213290612892171001100010000300604210042002009300999913002001623712067183933961469225363968443425707332382167771442215305300030023278532921329243305032848
7600633169247111100010112105595328571126544300630063006159753149301443312332851725300620044008200470143281012952171001100010000300604210052004007301199913010001605911839284974108367227444023444030726132466163181460715450300030023293232764331803292733227
760063265124691110010112005929332390126695300630063006159655149302113305733315725300620044008200470143276612993171001100010000300604210052004008301099913010001642411773285484113165227863945444316636132410166821433416206300030023287532682331513271932762
760063319924710111011148198005928326521126881300630063006159917149297183274833155725300620044008200470143289813012171001100010000300604210052004006301099914012001638211890183403979264228423944443816707032445166881435915577300030023295233278327243269633349

Test 2: throughput

Code:

  caspa x0, x1, x2, x3, [x6]
  add x6, x6, 16

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 14.0688

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f202224293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7023414062310551010000020541602213101712928014062122563263169040401001010030000101003000050612192537049137508140639140620313062640100302004000030200700001407693561120201100991001000010100100000100324642772421202228220591150502453256149794284399982102933517328330300131011711140414100006603000040100140685140556140577140722140689
702041406551053330310002060030224810192073161405082199028276883240100101003000010100300005061219276704913769014064814064331305684010030200400003020070000140699351112020110099100100001010010000010032466277842055222592054915508341936184148754279699982103073215927230500131011711140524100006613000040100140711140673140757140752140698
702041407491055212200002043227232810168810236140648219612720688944010010100300001010030000506121927780491374961407311406661213063340100302004000030200700001406573601120201100991001000010100100000100324653297119772224320619151182616247501492342772999821027728160291130000131011711140534100006603000040100140796140676140569140681140774
702041405751053303200002051637221910169672921406042208617416862640100101003000010100300005061219284304913755814067314052831305944010030200400003020070000140782360112020110099100100001010010000010032462147222051221932055515128212116242149494276099982102976317733600100131011711140556100006603000040100140699140668140654140697140748
70204140665105230300000203795721561016961048014065721950291768770401001010030000101003000050612192706049137512140599140776313052740100302004000030200700001406913561120201100991001000010100100000100324911669621722222320536150682514322561495042708999821026540206305172000131011711140553100006603000040100140796140631140653140635140709
7020414052210541000000020769452189101704524414057822035312568902401001010030000101003000050612192762049137622140569140634313046740100302004000030200700001406353511120201100991001000010100100000100324481957620542225120541151283219203221494842811999821023133115282147100131011711140576100009903000040100140559140684140752140663140502
7020414062510553302000020566392230101688132681406582215326326878940100101003000010100300005061219236604913750714065214066531305374010030200400003020070000140736356112020110099100100001010010000010032473167802045222492053815431732354210150404279099982103153715536004000131011711140540100006603000040100140585140750140616140542140685
702041406161054330400002046716230510168011961406462213033276882340100101003000010100300005061219280604913752214073314069031305314010030200400003020070000140568351112020110099100100001010010000010032466237912117222382062215125241776234148944273199982103142615222300600131011711140412100006603000040100140664140698140720140691140829
702041406561053330000002045339222910169682441405402197026196870740100101003000010100300005061219257504913751314068514069631304484010030200400003020070000140692351112020110099100100001010010000010032453106212135222152055515412731792238149254272899982102973814318900200131011711140494100009603000040100140658140661140530140601140698
70204140717105310000000206874022361016966264140657218522627688004010010100300001010030000506121922990491374611408171409373130530401003020040000302007000014066736011202011009910010000101001000001003247057482103222862055414418301608256148434274999982102763817329762000131011711140538100006603000040100140667140525140672140691140696

1000 unrolls and 10 iterations

Result (median cycles for code): 14.1105

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f2022293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9e9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
70054141322105711000119827262181116961533214097721980544769234400101001030000100103000050072192455049137906141087141129313113540010300644000030020700001410973551120021109101000010010100000010324398746202422120204641493199841626014990426739998201026920181406000001270117121408681000010103000040010141024140972141037141077141059
7002414103910561100002013719216411656296140934218513447696324001010010300001001030000500721918911491380311411521411943130981400103002040000300207000014109835511200211091010000100101000010103242708091944221222053114962011194025414880426659998201018016197326000001270117111408791000010103000040010141025140948140928141138141152
7002414106210580000002047311221611848320014097021220426569570400101001030000100103000050072192094149138146141223141252313114340010300204000030020700001409823551120021109101000010010100000010324310889195722206205171477561188028214752426309998201021623179277000001270117111408351000010103000040010141058141028141117141076141007
70024141202105600000020141232230116161248141065218404918695824001010010300001001030000500721923031491381571411791413083130923400103002040000300207000014092235511200211091010000100101000000103240887572113220912059314612051188425014762426359998201021929163338000001270117111410021000010103000040010141160141112140988140809140977
700241413061057000000199891923081191274361410202103149476928540010100103000010010300005007219237214913798914103414105231311514001030020400003002070000141204355112002110910100001001010000001032432970320392208120531145781933667214845427079998201021419221425000001318117211411391000010103000040010141188140994141107141060141131
70024141135105700000019962182206116961223614084621290484569271400101001030000100103000050072192419149137838140994140965313095040010300204005930020700001410313551120021109101000010010100000010324508807203222181205131493205418801181480342651999820102632415538200141001270117111409291000010103000040010141164141279141167141069141118
7002414110710552022002004515220011616930414099821542455369057400101001030000100103000050072191995149138123140937140951313100240010300204000030020700001409183551120021109101000010010100000010323749101219432211120532151554032826014819426699998201019320148323000001270117111408991000010103000040010141268141277141016140787141034
7002414107710550000002010026220811704620014099222350443069787400101001030000100103000050072192317149137882140936141132313107840010300204000030020700001410643551120021109101000010010100000010324318927200222197205241487916120825814800426329998201025429198344000001270117111409691001410103000040010141046141057141013141041140941
70024141137105711100020203222235117521836014099622035406069412400101001030000100243000050072192159149137962141178141005313106640010300204000030020700001410593551120021109101000010010100000010324480782194622133205321512739149625214880426869998201014916126232010001270117111411361000010103000040010141087141447141084141109140994
70024141211105600000020121172169113849240140917217604252694524001010010300001001030000500721921271491380291411451411503131036400103002040000300207000014101235511200211091010000100101000000103246816793204222142205421516546161618814908426759998201024030106304000001270117111409361000010103000040010141245141219141332141252140970

Test 3: throughput

Code:

  caspa x0, x1, x2, x3, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 17.0070

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6021116806912741010002434501001700583783330142100300421003004250022188351149166999016806917007011158039301420020228400562022870098168072530211020110099100100001001000001003004226152436620028002402344392999911434901211172877520180016792610603000030102170071168070170065168064170074
6020617007012590000002402700001700550663330142100300421003004250021931960149165005016806517006411160032301420020228400562022870098170073530211020110099100100001001000001003004226132435720028002435344394999911434811011172777310182216790500003000030102170074168076170071168064170063
602061700741259000000243600102170049366333013910030039100300395002192909014916699001700691680631115802630139502022640052202267009116806953621102011009910010000100100000100300390152435520026002435544379999911434901211173377382282216989410603000030102168067170074168073170074168076
6020616807212730000002434710011700493603330139100300391003003950022191541149164997016807117007011160027301391020226400522022670091170070530211020110099100100001001000001003003928172435620026002402144382999911434201211173377412282216990009603000030102168076170074168070170074170074
602061700701259000000243580101168060306333013910030039100300395002192886114916499201680711700671116001830139002022640052202267009117006453321102011009910010000100100000100300392802436220026002435644436999911435001011173377512282216790509603000030102168078170065168067170065168078
602061680751273000100243590101168060066333013910030039100300395002218966114916699001701581680691115802930139102022640052202267009116807253621102011009910010000100100000100300392602435520026002401544390999911434911011173377402282216790009603000030102170074168073170065168076170065
6020617007312590000002435901011700490063330139100300391003003950021931521149164989016807117007311160027301394020226400522022670091170073533211020110099100100001001000001003003928152435620027002434544381999911434900011173377472282216790506603000030102170074168067170065168073170065
6020617007312590011002402600011700580663330139100300841003003950022189271149164989016807117007311160018301391020226400522022670091170073530211020110099100100001001000001003003926152436020026002401744386999911434601011173377442282216990306603000030102170065168073170070168077170074
602061700701259000000243560102168057277333013910030039100300395002218824114916699901700861680701115802930139112022640052202267009116807253021102011009910010000100100000100300392602436320026002435244385999911435300011173377502282216790509603000030102168076170065168073170065168079
6020616807512740001002434500011680573553330139100300391003003950022188211149165176016826317015911160018301396120226400522022670091170070530211020110099100100001001000001003003901424362200265902435644388999911434801211173377462282216990306603000030102170071168076170070168076170072

1000 unrolls and 10 iterations

Result (median cycles for code): 16.8132

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
600311680721274001100240281101168054366333004910300391030039502192927114916699001680681700708158041300491020046400522004670091168072530211002110910100001010000010300390281324359200270024019443889999114348102006517749219221679040903000030012170074168075170065168073170071
6002617007012590000002402410001680573883330049103003910300395022189471149164989017007216806981580393004910200744005220046700911680695362110021109101000010100000103003900024360200270024020443919999114351112006517742219221679071663000030012168073170065168070170071168073
60026169894125900000024023010016805908633300491030039103003950221893911491669880168071170064816003730049402004640052200467009117007053521100211091010000101000001030039028024363200260024020443899999114345112006517739219221679010663000030012170071168073170071168070170074
600261700701259000000243560001168057366333004910300411030039502218823114916699001696241680788158036300496120046400522004670091168067536211002110910100001010000010300390261424363200270024019443879999114345110006517752219221679100663000030012170071168070170071168073170065
60026170073125900000024022100017005536733300491030039103003950219292501491669840168068170070816003730049102004640052200467019616807253621100211091010000101000011030039028024353200270024344443809999114380112006517734319221683220663000030012170065168500170074168061170477
600261700641259100001243511000168054370333004910300391030039502218907014916499301700701680698158036300491120046400522004670091168069536211002110910100001010000010300391281524361200260024377443879999114342110006517745219221698960663000030012170074168070170071168073170065
60026170070125900000024023100117005536633300491030040103003950221881511491669900168062170070816004030049002007440052200467009117007052421100211091010000101000001030039001624368200260024026443899999114348100006517746219221699020663000030012168063170072168067170065170071
600261700701259000000240261001170055360333004910300391030039502193144114916699301680711700708160037300491020046400522004670091170070524211002110910100001010000010300390261324363200260024024444379999114346112006517778219221699021063000030012168073170071168064170071168073
600261680721273000000243630001168057306333004910300391030039502219066114916699001680711700708160037300491020046400522004670091170064527211002110910100001010000010300390261424361200260024351443899999114349112006517735219221679041963000030012168073170071168073170065168073
600261700701259100000243631100168053306333004910300391030039502192967114916699001680681700708160037300491020046400522004670091170073524211002110910100001010000010300390261524363200270024353443819999114337012006517749219221679061663000030012170074168067170071168070170074