Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASPA (32-bit)

Test 1: uops

Code:

  caspa w0, w1, w2, w3, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 6.002

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f606163696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
760113418725811261125100001019210540334071278943006300630061592901614931114339123416372530032004400420047007339841321217100110001000130115421006200702103009999142103215776107412812836701186236843699444721595733280189101637717559300030023421434313342133414834289
7600634175256012811221100010092105473340382805330063003300915982000149311303384934167725300620044008200470143412613522171001100010000301044210072006018300999914210311503210731180203621964238143642444622556433088188151624617364300030023410734159341843417634239
7600633960255012811261000010112105354339362808730063006300615991000149311053392934180725300620044008200270143414513412171001100010000301034010072006018301099913212301520010895180543779961236933612444619636033023190341640117420300030023413534066341093417434132
76006340772560129112700000100620054173398027992300630063006160494001493108133782341127253006200440082004701434050136421710011000100003012542100720060073008999142123315321109411797236061457237013714444022605833024185481611517598300030023419034323340083408934141
760063403125701261123100001007210535733937279743003300630031598900014931132339483425872530062004400820047007341771333217100110001000030125001007200602123010999162123114950111712797236461152236633627444017535433355184681626017331300030023413234173342403413734158
76006341862550122113010000100920054363404328007300330063006159840001493109233851340777253006200440082004701433991132221710011000100003010500100720060211301499913200321503010579180153598953236223869443821575933065188811641717651300030023401734125342783418534172
760063418925601231122100001008210554934041280173006300630061598820014931087337963407972530092004400420047014340631322217100110001000130104401006200601133006999132123214963106333799436481057237143615443722575933099188301623817603300030023414534127341583411034138
76006339892550121112410000100921054603406028108300630063006159280001493109233894341897253006200440082002702134140134121710011000100003011540100820081183014999132003114965106632805835311259234853696443820645733021188641631917390300030023413134192342093418534161
76006341672550127012200000101820055493403327935300930063006159820061493106533991341677253006200640082004701434027132221710011000100003011542100520060293014999132123115512109151800037431560236893680443816556032999186111591417379300030023418534239342003432134118
76006341192570123012510000100720052663402827998300630063006159300001493103033912343237253006200440082004701433975133521710011000100003009342100720060293009999152123115060106002796635431251236103570444123656133041189831617717481300030023411634059340703420034114

Test 2: throughput

Code:

  caspa w0, w1, w2, w3, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 14.0170

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7021914026510500000000012040127811003520140140179764897100683954010010100300001010030000506121924340491370970140155140158313008940100302004000030200700001401343551120201100991001000010100100000100308600594618209302029319584704011060412699998211038827240484007013102171114008510000101003000040100140135140154140153140192140170
702041401811050000000001217822765100600229214015072159092684684010010100300001010030000506121924240491370720140139140187313002640100302004000030200700001402043551120201100991001000010100100000100308800557566209802029920488703611107412859998211038737213382002013101171114006510000101003000040100140205140173140158140165140154
7020414014010500000000011995985510056012081401577661094876846940100101003000010100300005061219231404913706001401591402123130056401003020040000302007000014018035511202011009910010000101001000001003088005505692094820300230861011011105411879998211036737219444005013101171114003010000101003000040100140163140162140170140173140144
702041401551050000000001185716799100424517614019675369487684564010010100300001010030000506121923850491371090140180140154313003540100302004000030200700001401243552120201100991001000010100100000100308700611577210042029522584403211024413029998211039030237432008013101171114003710000101003000040100140164140135140180140154140137
702041401411050000010001178315818100616528814017877887692685194010010100300001010030000506121924181491370720140152140136313002840100302004000030200700001401643551120201100991001000010100100000100308363649573209362027920386508011126412619998211035336224458302013101171114001510000101003000040100140172140165140173140140140172
7020414018310501010000012004247751005125120140156757107885685094010010100300001010030000506121923360491370780140155140151313003040100302004000030200700001401673551120201100991001000010100100000100308374597548209612029823388106410976412399998211039238247392318013101171114002110000101003000040100140155140135140149140168140164
70204140181105010100000119552776210068811921401557370787768713401001010030000101003000050612192024049137140014014414014231300664010030200400003020070000140161355112020110099100100001010010000010030872351850520873202952178170761093341227999821103223921044832110013101171114010410000101003000040100140163140135140163140162140194
7020414016710500000000011888177801005680961401867720949668698401001010030000101003000050612192001049137104014018114017231300924010030200400003020070000140198355112020110099100100001010010000010030877053253020909202632048690421103141172999821103483422246801513013101171114003910000101003000040100140171140147140204140178140170
702041401361049000000001179924805100744092140132769276806871040100101003000010100300005061219200214913706201401401401503130028401003020040000302007000014014035521202011009910010000101001000001003084505835182088720309223838030110474126499982110337322164400011013101171114002910000101003000040100140203140146140197140188140155
7020414013010500000000012126247771005120252140137783286926856540100101143000010114300005061219197704913710101401371401683130076401003020040000302007000014018935511202011009910010000101001000001003079035535272088320293203865074110524121899982110325432323900615013101171114006810000101003000040100140130140153140202140163140216

1000 unrolls and 10 iterations

Result (median cycles for code): 14.0152

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f202224293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
70039140194105000000000118535277410664212414016077236663689464001010010300001001030000500721915171491370760140157140141313012440010300204000030020700001401833511120021109101000010010100000103090405705592092720346224909040109974125099982110275502444073040012702172414004310000663000040010140134140181140143140148140206
700241401231049000000011203944764107363114414014975646872689404001010024300001001030000500721914541491370880140154140172313011640010300204000030020700001401313511120021109101000010010100001103089305845752103620327241885046110744119899982110319432464520310012702172214001010000663000040010140181140129140156140155140161
7002414015210501000000011874527901060821441401477903706468996400101001030000100103000050072191566149137069014015014014512130136400103002040000300207000014014035111200211091010000100101000001030862563055520941203462118180341102941249999821103035826951530130012702172214005010000663000040010140210140175140133140169140140
70024140167105000000001118664880910720512014010178345966689474001010010300001001030000500721915631491370740140161140148313012040010300204000030020700001401493511120021109101000010010100000103087305675792093720335243873026109814121999982110282452554460020012702172214005710000663000040010140134140151140170140116140114
70024140107104900000000119457379810720268140112772335466921640010100103000010010300005007219141814913708631401591401473130116400103002040000300207000014016235111200211091010000100101000001030983060055621055203432458990421112041223999821102877222143330150012702172214001910000663000040010140135140158140136140130140131
7002414013010500000000011856747821075239214011578736167688664001010010300001001030000500721915471491370730140125140143313014140010300204000030020700001401473511120021109101000010010100000103087205535592097120340226911036110494125199982110263482734910350012702172214002110000663000040010140153140125140153140138140159
70024140153105000000000117864979110616212014014178667174689934001010010300001001030000500721915120491370780140133140129313012340010300204000030020700001401483511120021109101000010010100000103091805925592094820359262895032110324127199982110311552564360020012702172214006710000663000040010140149140147140159140157140150
70024140150105000000000118245379010712210814015178436174689564001010010300001001030000500721915971491370450140136140142313013140010300204000030020700001401803512120021109101000010010100001103086405945442097220352223886038110024116199982110293482224550350012702172214001210000663000040010140129140170140159140116140158
700241401431049000000001187748786106962144140164757771736905040010100103004410010300005007219151314913706501401301401473130084400103002040000300207000014012735111200211091010000100101000001030892051252220962203182288620401111441207999821103014325441506130012702173214002910000663000040010140142140143140162140157140168
70024140185104900000000119526283510632210014010475447668690334001010010300001001030000500721915770491371040140170140173313012740010300204000030020700001401343511120021109101000010010100000103085805495332097320345234905036111484125599982110266482534210690012702172214001010000663000040010140167140166140137140149140127

Test 3: throughput

Code:

  caspa w0, w1, w2, w3, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 17.0070

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60211170070125900000000243480100168069366333014210030042100300425002218957014916698401680711700641116003630142102022840056202287009817007352421102011009910010000100100000100300421728152436520028002435444393999911435011200111727773401800169924000603000030102168073168064168390170087168070
602071700701259000000002434600021680503703330139100300391003003950021929160149164995017006316807111158017301392120226400522022670091168072536211020210099100100001001000001003003900142435520026002400644379999911435001200111733774422822169894009003000030102170065168067168096170068168076
60206168072127400000000243580101168057366333013910030039100300395002192979014916498901680681700641116002430139102022640052202267009117007310142110201100991001000010010000010030042026152435520027002434544389999911434110200111733774622822169903009603000030102168067168079168058170074168064
602061680661274000000002402600011680573063330139100300391003003950021928290149164995017006316807511158023301391020226400522022670091168075530211020110099100100001001000001003003902802436020026002435744388999911434911200111733774622822167899000603000030102168067170095170070168073170065
6020617007012590000000024359001116805707733301391003003910030039500219283601491649950170072168072111580293013910202264005220226700911680665362110201100991001000010010000010030039026142436220026102402344383999911435001200111737774722822168849009603000030102168080170066170063168076170071
602061700731259000000002402400011700490663330139100300391003003950022188150149166990016808617006411160018301391020226400522022670091170070530211020110099100100001001000001003003900142435620026002401244389999911434800200111733774122822167905000603000030102170065168078168077168662170074
6020617006412580000000024360101116806007733301391003003910030039500219296501491669840168074170073111600363014200202284005620228700981700735302110201100991001000010010000010030042001524356200266402401844385999911434001200111727774101800169927000603000030102168064170071170055168067170074
602061700731259000000002402600101680783703330142100300421003004250022189570149164992017006316806911158032301421020228400562022870098168063536211020110099100100001001000001003004200152436320028002401344386999911435400200111727774501800167929009603000030102170065170067170078168068170071
6020617007312590000000024026101017005836033301421003004210030042500219290301491649830168379170070111600273014210202284005620228700981700735352110201100991001000010010000010030042026132436520028002435844383999911434700200111738773801800169918000603000030102170071170071170074168069170074
6020617006412590000110024360001016805700033301421003004210030042500219284901491649920170069168072121580323014210202284005620228700981680725362110201100991001000010010000010030042028132436220028002435244391999911434601200111727774701800167931000003000030102170065168073168086170068168070

1000 unrolls and 10 iterations

Result (median cycles for code): 16.8075

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f22243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60031168069127512110024359010017004930733300491030039103008450219301511491669840168074170070815804230049612004640052200467009116807253621100211091010000101000001030039001424365200260024352443869999114349012000065177332192216990219603000030012168074170071168077170074170074
600261700641259000000243630100168057076333004910300411030039502218815014916699301680651700738160040300490120046400522004670091170064528211002110910100001010000110300390281524361200260024018443829999114349112000065177352192216989609603000030012168075170070168070170074168075
600261680751274001000240251101170058300333004910300391030039502218938114916699301680711700738160040300491020046400522007670091170070530211002110910100001010000110300390281424363200260024019443929999114348012000065177442192216989609003000030012170076168076170065168073170071
60026170064125900000024344010117005806033300491030039103003950221890511491669840168068170064816003130049502004640052200467009117006452421100211091010000101000001030039028024363200260024351443829999114349102000065177422192216989600903000030012168076170074168064170071168076
6002616807212730000002434610011680573673330049103003910300395021928501149166993016807117007381600403004950200464005220046700911700735302110021109101000010100001103003900024355200260024343443899999114352012000065177412202216791210603000030012168064170065168067170074168073
600261680721274000000240250000170058267333004910300391030039502218956014916699001680661700648160040300491020046400522004670091170070530211002110910100001010000110300390281524363200260024012443899999114345010000065177402192216790409903000030012168073170074168067170065168070
600261680751273000110243580000168057306333004910300391030039502192965114916499201700631680728158036300491020046400522004670091168075536211002110910100001010000110300390281324353200260024018443889999114349012000065177392192216990200603000030012170097168072170065168077170065
60026170073125900000024347000017004906733300491030039103003950221893811491669930168068170064815804430094112004640052200467009117007352921100211091010000101000001030039028024356200260024021443909999114350012001065177442192216990200003000030012170074168073170071168073170065
600261700701259000000243571000170058366333004910300391030039502192947114916698401680711700738160032300941020046400522004670091168069530211002110910100001010000110300390261724363200260024346443889999114342010000065177412192216990509003000030012170071168073170074168076170071
600261700701259000010240271100170049206333004910300391030039502218815114916699331700721680608158035300491020046400522004670091170073533211002110910100001010000110300390281324355200260024032443889999114348012000065177492192216990509903000030012170082168076170065168073170074