Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASPAL (64-bit)

Test 1: uops

Code:

  caspal x0, x1, x2, x3, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 6.002

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f223a3f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f606163696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
760113412425512712611100100821543033879279183006300630061602500014931080336613410972630062004400820067014339531319217100110001000030114021007200632153011999152123115043106752796535379742365135604436145956332969191141646217655300030023405634001340603407334050
76006341372561181231000010062053163402028027300930063006160090061493115733592340407243009200440082004700733949134821710011000100003010542100920060283010999142123214908107132799735819492363236144444174952533037189611676117520300030023409334082340443409034104
760063414825512101910040102720537733971281483006300330061599200014930999336743413182530062004400820047014338491355217100110001000030104421010200601123009999162123214975106941796235838532359735394446155859433075189771662717562300030023412434047340353410834080
76006340852561211151000010052053763402327915300630063006159910001493110733631340587253006200440082004701433935131521710011000100003010402100820060293009999142023114943108462795035616542350235864441135049333075188701644217235300030023409434076341953411634186
76006340312551231181000010082153983392427968300930063006159945001493109333596340197273006200440042002701433878133921710011000100003009302100920060173006999172123014987106702801836008502364235974441145054433045190971650717726300030023408034110340523402233985
760063408625512002210000101221533733932278963006300630061598900014931076336343410272430062004400820047014339321329217100110001000030115001007200610830069991421231150001064228017356410432374236114441165446432975190181661217502300030023409534047341233411634124
760063400525511901800000105920540833932278953006300630031599040014931038337083416372530062004400820047014340271303217100110001000030115401007200601730099991521231152071076937928356510502357436054440144547333004185971662117487300030023406234040340093410134000
760063413725611801400030100520536233928279313006300330091600370614931173337043400572530062004400420027014337921351217100110001000030103421008200801123011999152123015167107862800935569512362836134443184651333000191841657317483300030023418434117341713414534128
76006340802561201221000010082053083395427921300330063009159948001493094933655341587253006200240082004703534066135121710011000100003009442101020060083010999152103015120106332803335339462355035914448165651333098188691639617442300030023403934052341393425534178
76006339852561241210100010052053083401827884300630063006160012001493099333719340427253006200240082004701433899135421710011000100003010342100920060283010999162023015247106972796835717492364835854442164645333165185321651917443300030023412634025340293405234042

Test 2: throughput

Code:

  caspal x0, x1, x2, x3, [x6]
  add x6, x6, 16

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 18.0457

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7023418048813521011000204253722681001680049618042722602101093054010010100300001010030000506121916761491774031805181806173170295401003020040000302007000018049943611202011009910010000101001000001003244606452146223312052114832424019014882427409998210247341061410020001310117111802621000010103000040100180564180483180537180462180478
7020418051313520000000203463522271001488217618045722781301093454010010100300001010030000506121920341491773831805331805093170417401003020040000302007000018052143611202011009910010000101001000001003243606402077223242045914962438024414831427169998210247331271940000001310117111803441000010103000040100180459180475180499180477180521
7020418045413530000000204092322191001488144818040522509381092804010010100300001010030000506121916371491775091804971805733170407401003020040000302007000018044243611202011009910010000101001000001003247907202027222722052115042406017414938427349998210258441151920000001310117111803301000010103000040100180521180407180586180495180499
7020418047713520000000200762622501001704122018032421831101093604010010100300001010030000506121917491491773331805531804563170387401003020040000302007000018049043611202011009910010000101001000001003245806732109222852056614992419026814797427419998210255251111730000001310117111803731000010103000040100180521180528180489180461180477
702041804131352000000020271292258100147213201805192198400109323401001010030000101003000050612191617149177360180515180376317033840100302004000030200700001805824361120201100991001000010100100000100324310645209222230204561466199002401490542765999821025427611720010001310117111802981000010103000040100180492180445180449180355180402
702041805381352000000020391412197100127224841805082215102109338401001010030000101003000050612191591149177390180468180462317031540100302004000030200700001804424361120201100991001000010100100000100324290795204022297204211504245016162301486442745999821028332981560030001310117111803031000010103000040100180588180463180546180526180496
7020418045813510000000204553822051001944222418036922233001094064010010100300001010030000506121916281491774101804901804463170387401603020040000302007000018048343611202011009910010000101001000001003242205412097222672046015152446194621214936427389998210248341581970340001310117111801131000010103000040100180374180502180463180521180509
702041804281351000000020302322269100146424921804912224810109430401001010030000101003000050612191788049177348180608180476317031240100302004000030200700001804294361120201100991001000010100100000100324230741202622270204871489492025614891427419998210265261161310000001310117111802171000010103000040100180430180464180472180428180410
702041804691352000000020435312217100146412641804552163201010933740100101003000010100300005061219156904917736518046718046331704004010030200400003020070000180446436112020110099100100001010010000010032455063320232227920525152624484566414755427369998210282361121670000001310117111802671000010103000040100180445180484180617180473180453
702041805631352000100020241372214100147212721804212235100109368401001010030000101003000050612191655149177371180502180443317036540100302004000030200700001806194361120201100991001000010100100000100324430623204822214205241512246005081482242754999821026734771420010001310117111802411000010103000040100180431180484180469180401180470

1000 unrolls and 10 iterations

Result (median cycles for code): 18.0465

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f2022293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
700541804301351200102022818227511208342018031921852415109220400101001030000100103000050072191696149177368018047318046731704724001030020400003002070000180379432112002110910100001001010000110324352256821732214520493158224890266147634259499982010199131842303860012701171118031110000663000040010180464180400180443180489180522
7002418043013514040020444212235112165260180503221525710931040010100103004410010300005007219184304917742601805141804073170516400103035640000303987000018069943211200211091010000100101000011032433196382027221212050914752456025214751425049998201013412882513000012701171118022710000663000040010180514180465180448180441180448
700241804881351300002020914222711696626818048921934861092384001010010300001001030000500721917890491773920180492180461317044940010300204000030064700001803834321120021109101000010010100000103246624599203022134205791508244997626014887426479998201013914962213010012701171118017310000663000040010180399180499180397180422180434
7002418045913513030020400122360117204260180463219837710925240010100103000010010300005007219165704917742701803891804523170472400103002040000300207000018054043221200211091010000100101000001032460246542037221372051514851502216518148154263599982010144161222250330012701171118021110000663000040010181197180467180389180531180503
700241804591352303002052122223011280291441804872220259109162400101001030000100103000050072191847149177429018046118040931704524001030020400003002070000180426432112002110910100001001010000010324801453520892219420511149814830350148524263099982010192211042103620012701171118025010000663000040010180461180488180518180463180428
700241805161352330002025717222011456428818038622972781091964001010010300001001030000500721917661491774370180531180516317039040010300204000030020700001804594321120021109101000010010100000103245227650210222190204481519148146428614722426269998201011823742780000112701172118032210000663000040010180519180522180366180432180461
70024180482135130000203131823071129643921804882288198109195400101001030000100103000050072191661149177425018053918051231705024001030020400003002070000180476432112002110910100001001010000110324602468821122214120515149102439272147434263999982010180221482180000012701171118040110000663000040010180506180409180388180485180514
70024180441135130000202932722671186465001805062208197109245400101001030000100103000050072191588049177281018048218044131704994001030020400003002070000180327432112002110910100001001010000010324722458921252220420413149524550266147564261399982010192231362070340012701171118035210000663000040010180425180466180466180467180601
700241804971351333002081314222711384283361804932282056109265400101001030000100103000050072191825149177467318049018046831704374001030020400003002070000180437432112002110910100001001010000010324672768120962218520489151325029685814960426419998201012016851990000012701171118034110000663000040010180489180434180447180559180386
7002418041113523000020374122226111445260180393220446510918840010100103000010010300005007219177514917744401803921804343170395400103002040000300207000018037243211200211091010000100101000001032464197152084222002045315232445025214723426029998201019617883053000012701172118032110000663000040010180446180582180495180456180486

Test 3: throughput

Code:

  caspal x0, x1, x2, x3, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 18.0089

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)63696a6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60211180753134801000002436101011807383063330142100300421003004250022977661491776751800811807481217004430142102022840056202287009818073757021102011009910010000100100001100300420281324364200280014356443939999114349012111727775001800180598009603000030102180079180746180071180749180079
60206180081134900010002436400001807400703330142100300421003004250022891021491770061807551800701117004130142102022840056202287009818007056621102011009910010000100100000100300420261424364200280014357443939999114344010111727774801800179933009603000030102180753180087180738180076180748
60206180753134900000002435000001800633063330142100300421003004250022890661491769981807441800781117004430142102022840056202287009818074556521102011009910010000100100000100300420281424364200280014356443879999114348002111727774101800179913000003000030102180071180738180090180757180759
60206180756134900000002436400001807383763330142100300421003004250022891031491770091807561800891117004130142212022840056202287009818007857121102011009910010000100100000100300420281524365200280014356443939999114343102111727774601800180590006003000030102180090180746180079180742180090
60206180089135300000002436100011807413003330142100300421003004250022889591491769901800781807451117070930184102022840056202287009818075258121102011009910010000100100001100300420281524363200280014357443939999114350010111727774601800179932009603000030102180749180071180748180582180756
602061807551349000000024360000118072230633301421003004210030042500228896014917699818075618008911170052301421020228400562022870098180089583211020110099100100001001000001003004200024365200280014356443869999114346012111727774601800180585109603000030102180087180746180079180754180071
6020618008613540000000243490001180301377333014210030042100300425002297663149177672180081180747111707053018421202284005620256700981807555812110201100991001000010010000010030042001424460200281014357443899999114368012111727774601800180583009603000030102180754180082180756180071180749
60206180752134900000002435500001807290663330142100300421003004250022891031491769981807561800891217004030142102022840056202287009818007956721102011009910010000100100000100301262281424362200282022010444909999114345000111727773701800180590006603000030102180743180076180753180082180746
6020618074413490001000243481001180066067333014210030084100300425002297773149177661180078180745111707113018621202284005620228700981800705832110201100991001000010010000010030042028024357200280014351443869999114345000111728773901800180594000603000030102180090180738180071180747180079
602061800891354000110024360100118006030633301421003004210030042500229776114917700918075518008911170033301421020228400562022870098180070571211020110099100100001001000001003004200024364200541014369443899999114347002111727774601810179918006603000030102180082180754180240180756180754

1000 unrolls and 10 iterations

Result (median cycles for code): 18.0090

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
600311807431349111011243961210118074679633300491030039103003950228897901491770071807501800958170729300941120046400522004670091180076573211002110910100001010000010300571927024379200380114388443919999114362121230065177552192318058000903000030012180085180753180096180762180077
60026180076135410000124374111011807320453330049103003910300395022974921149174241180089180755817004530049322004640052200467009118073757021100211091010000101000001030081026142435920026001435544380999911434911001065177432192217990500603000030012180343180757180087180745180082
60026180081135400000024348010118073706633300491030039103003950228894121491770091807551800818170042300490020046400522004670091180089567211002110910100001010000010300810002436120026001435344382999911434301200065177442192217990000603000030012180746180079180756180090180738
6002618074413490000002435810011807262073330049103003910300395022890561149177009180744180070817005630049112004640052200467009118008957221100211091010000101000011030039028152436220026001435144387999911434900200165177382192218056200903000030012180130180745180082180748180077
600261800751353000000243581000180732077333004910300391030039502288973114917700318075518008181700373004910200464005220046700911800705722110021109101000010100000103003902802435820027001435044396999911434411200065177462192218056600903000030012180757180090180756180079180090
6002618008913540000002436500001807403603330049103003910300395022975830149177676180081180748817071230049102004640052200467009118075556521100211091010000101000001030039028132436420026001435444396999911435010000065177462192217990509003000030012180745180870180071180745180087
60026180081135400000024361000018073030633300491030039103003950228908301491769981807441800708170048300491020046400522004670091180081582211002110910100001010000010300390002435620026011435644386999911435210200065177462192218057100903000030012180071180745180082180754180071
6002618007013540000002435000001807373063330049103003910300395022975590149177662180075180737817071530049102004640052200467009118074858121100211091010000101000001030039028132436420026001435644395999911435001230065177602192217990609933000030012180096180763180096180756180096
6002618009513531000002437712001180747710933300491030039103003950229786911491776811800951807538170054300492220046400522004670091180087580211002110910100001010000010300581828024377200370014389444039999114364121231065177492192218057909923000030012180088180763180088180753180096
6002618009513541010002437612101180069088333004910300391030039502289274114917701518011518076281707173004913200464005220046700911807515902110021109101000010100001103005717242524377200380014386443999999114358121030065177512192217990309053000030012180096180763180097180759180088