Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASPAL (32-bit)

Test 1: uops

Code:

  caspal w0, w1, w2, w3, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 6.002

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e1f22243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f606163696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7601134220256110060010090105262340030028007300930063003160628061493098633679340667253003200440042004701434032133021710011000100003006401005200606300299915121515710673279703616185236803575443717716833020189951611117619300030023402034152340573417334126
760063416125550000110090105295338980027851300630063006160726001493092433631340717263003200440082004701434030132121710011000100003006031005200205300999913021488910642180453578163236673562444413726832991191901668617803300030023408234134340623416834071
760063415925680020010050105378340360027962300630063006159592001493106433674340748243006200640082004701434007134921710011000100003006401006200426301199915121502811120279723592270236173637444527747133005189371660117669300030023406734208340733408334132
760063401725660110010090005329340460027981300630063009159375001493096333720341697253003200440082000700733872129321710011000100003006021007200401301399914021505910597279833606173236443601444120686733012187381668017468300030023420634142340873419434022
760063415025570100010050005312339230028096300330063006159885001493099033618341677263006200240082004701433899134121710011000100003003421006200405300999913101498510623280223590167236873881443916666632946190251660317540300030023416634210340473403234116
760063414125660110010060005370339780028096300330063006159366001493104333679340717253003200540082004701434038136021710011000100003003421003200414300999914121485010564279633581067235773640444617717233004185661687717645300030023408634152341383404034157
760063412425540100010060105288338870027998300930033006159925001493099833641341487253003200640082004701433920132521710011000100003006401005200204300699913001492510638280123607072237013529444420747133022191591666517861300030023412934156341533412234144
760063413225650000010080105255339630027961300630033006159805001493102933706342307263006200440042004701433936135921710011000100003006401006200406301599913021495310670380083592166235593537444517556632972192151673017667300030023419634114340493411134081
760063410025550101110081005340339910027991300330063003160725001493107433682341487253006200240122004701434067134321710011000100013006021008200205300799915101498911285179983782167236453557443515706532939193091667717579300030023411634194341353409834203
760063409425470101110080005215339920028019300630063006159408001493101233710341407253003200240082004701433983132721710011000100003006401004200206300999913121500010658279793567160236233615444114676232998190981652817524300030023420734201341263414534096

Test 2: throughput

Code:

  caspal w0, w1, w2, w3, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 18.0144

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f20222324293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7021918019013494000011000118653476610072848148180140754143634108995401001010030000101003000050612191574004917705501801511801343170027401003020040000302007000018016044111202011009910010000101001000011003084523440545208162026524217866864109654109999982110224251783271912400013100021722179979100009913000040100180156180127180147180158180143
70204180173134920000100001205627768100696813218012274715313510910240100101003000010100300005061219155500491770760180151180143317002840100302004000030200700001801424411120201100991001000010100100000100308691648853020835202502163335048210967410929998211021827207370170600013100021722179957100009903000040100180138180125180128180138180140
702041801351349300000000011973257631006564610418012375011343610898840100101003000010100300005061219144800491770430180144180130317004640100302004000030200700001801438551120201100991001000010100100000100309412048956520861202702011815284210960411089998211021527193371178500013100021722179957100009903000040100180132180147180137180138180137
702041801441350300101000012187287551006804492180128739193537109110401001010030000101003000050612191539009817706401801441801323170044402143020040000302007000018012344111202011009910010000101001000001003085922473545208442025720417068476109974105099982110227301833491901600013100021722179979100009903000040100180130180171180127180116180147
702041801351349300000000011875257451006808923618012675112333210897440100101003000010100300005061219159100981770690180135180146317003640100302004000030200700001801294411120201100991001000010100100000100308992544458020870202722542066887211090411429998211031834199346200400013100021722179955100009913000040100180166180154180129180159180152
702041801381349400001000011928327501006967513618012574114373510903240100101003000010100300005061219157200491770670180135180156317004440100302004000030200700001801648551120201100991001000010100100000100308762246957720840202562471657004211059411519998211021723184363206910013100021722179956100009913000040100180134180156180145180143180169
702041801841350301003110011885277461006726112018012377293036108987401001010030000101003000050612191561004917705801801511801413170022401003020040000302007000018013744111202011009910010000101001000001003088928471571209102026123117885286109544110599982110233281883682001200013100021722179951100009903000040100180162180153180131180162180169
7020418016313494041000000120783179210070466112180130747113536109042401001010030000101003000050612191525074917704901801401801403170022401003020040000302007000018014444111202011009910010000101001000001003088623455585208982027022338907010994411109998211024928185373175900013100021722179957100009903000040100180135180120180120180202180132
702041801381349300302000011828297571007045410418010676412323210900040100101003000010100300005061219145400491770820180122180164317003140100302004000030200700001801564411120201100991001000010100100000100308702349856620894202532211606804410935411169998211025232175358233900013100021722179952100009903000040100180145180172180178180144180133
702041801711350403000000012026357651006727180180137746123428109041401001010030000101003000050612191477004917707501801341801223170036401003020040000302007000018014344111202011009910010000101001000001003087822470557208482027421419869284109574114299982110226321763322312600013100021722180035100009923000040100180146180211180136180114180176

1000 unrolls and 10 iterations

Result (median cycles for code): 18.0152

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f2022293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
70039180160134920000012008167991720461921801137731334371094194001010010300001001030000500721909900491770321801211801563170109400103002040000300207000018014543211200211091010000100101000001030878104495572080020293257358724361092340983999821101224217433126750127011711179952100009903000040010180149180138180144180127180146
7002418015313494001001192635815155264144180109744932351093654001010010300001001030000500721910700491770481801611801383170113400103002040000300207000018011944111200211091010000100101000001030912174505202073120268226881336461095841028999821101362318136116350127031722179952100009913000040010180161180130180122180151180172
7002418015613492002001193438797171233160180124742841341093344001010010300001001030000500721909660491770491801811801423170111400103002040000300207000018014443211200211091010000100101000001030854124485492081420242264710168381093341057999821101362418136796190127011722179933100006603000040010180116180136180159180103180116
70024180138134931000012016187781752411321801277669353510928440010100103000010010300005007219096404917706218015118010231701074001030020400003002070000180141432112002110910100001001010000010308948448579207542026922690935250109784109299982110175201783240650127011723179944100006603000040010180136180106180120180150180142
7002418011813494120001203831809173659218012277727910964640010100103000010010300005007219077104917706218015618015931700864001030020400593002070000180150847112002110910100001001010000010309119407510207752024726518069242109654096299982110077271573149630127011731179976100006603000040010180160180185180191180155180140
70024180140135020020011959248551680608818014680917510970440010100103000010010300005007219073104917403918015618016931701364001030020400003002070000180152432112002110910100001001010000110309116360499207922023627452235230109744098899982010093311533226080127011712179927100006603000040010180141180121180121180162180140
7002418015213493000001175828797172032132180125780116141095754001010010300001001030000500721907960491770571801661801403170116400103002040000300647000018012543211200211091010000100101000001030897174124792079820248264532728681092841001999821100902717230630100127011723179979100006603000040010180160180167180183180137180167
700241801341349200000119572680716966712818014581411517109580400101001030000100103000050072190800049177046180111180163317009740010300204000030020700001801404321120021109101000010010100000103085084295182077920240241533352401088940980999820100722915828366160127011722179978100006603000040010180202180164180156180129180131
7002418017713504002001191732831172844144180135797177109661400101001030000100103000050072190651049177099180152180189317010640010300204000030020700001801604321120021109101000010010100000103095210422502208202024527119073266109684098599982110087221803093050127011712179960100006603000040010180134180164180158180171180184
7002418014713492000011203336828173632100180331787177109673400101001030000100103000050072190648049177082180171180175317014440010300204000030020700001802044321120021109101000010010100000103089664055012078320233261542352281096041129999821101082917333360180127011721179991100006603000040010180202180143180145180175180144

Test 3: throughput

Code:

  caspal w0, w1, w2, w3, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 18.0750

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6021118076113491110000243831210118006971003330142100300421003004250022892610149177682018009518076211170705301392320228400562022870098180751579211020110099100100001001000001003006017242424374200390114383443919999114364121030111733775642844180578000913000030102180085180763180085180762180792
602061807621349110000024366121031807368803330139100300391003003950022890350149177015018075218009511170716301391120226400522022670091180761590211020110099100100001001000001003005718282324368200370114389443899999114355120230111733775832843180577009913000030102180085180752180096180762180077
602061800841354110000024371121021800806983330139100300391003003950022891970149177681018009518076211170716301421420228400562022870098180750579211020110099100100001001000001003006117272224381200400114384443929999114357111232111727775611811179938009943000030102180752180085180763180085180752
602061807511348101000024364121021807474993330142100300421003004250022892100149177004018075118008411170061301422420228400562022870098180095592211020110099100100001001000001003006118242324377200400014381443999999114358121232111727775811811180594009923000030102180085180751180096180763180085
602061800841354111000024385121021800690783330142100300421003004250022978830149177015018076218009511170719301422420228400562022870098180750582211020110099100100001001000001003006018242524368200400014384444039999114359121230111727776211811179919009923000030102180762180096180752180085180762
60206180762134911100002437212100180080799333014210030042100300425002297742114917767331807611800841217072430142222022840056202287009818075359021102011009910010000100100000100300611702424379200400014387444029999114356111230111728774911821179931000943000030102181073180527181200180519180766
6020618164513551220099255066301001812657983843064810230042100301685112309809014917743801810531809693817093030648232036840336203127009818008758121102011009910010000100100000100301051827024375200390014376443889999114365121231111727775511821179938009923000030102180751180085180763180096180763
6020618076113491101000243721210118077531093330142100300421003004250022978500149177663018008418075212170706301421420228400562022870098180084582211020110099100100001001000001003006118242124376200650214390444019999114357111231111727774911811179927009943000030102180763180085180753180096180763
602061807621349111000024372111011800610803330142100300421003004250022891300149177670018008418075011170047301422420228400562022870098180084592211020110099100100001001000001003006119282324366200410014382443979999114368121231111728775311811179938009923000030102180755180096180762180087180085
60206180084135411000002437712002180072488333014210030042100300425002289059014917700401807611800841117005830142242022840056202287009818008457421102011009910010000100100001100300601902424376200400214379443959999114355121230111727775411811179927009923000030102180096180754180085180752180096

1000 unrolls and 10 iterations

Result (median cycles for code): 18.0736

retire uop (01)cycle (02)03090e0f18191e22243a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60031180756134911000243551001800640063330049103003910300395022976131149177666180081180738817006030049002004640052200467009118075658521100211091010000101000011030039281724364200260143534438399991143491230065177536192218056301043000030012180094180760180083180750180094
600261800931354000002438500018073546633300491030039103003950228913511491776671807571800798170049300491020046400522004670091180760576211002110910100001010000010300392817243632002611435644389999911434412000651774721922179909131013000030012180738180083180760180091180740
60026180750134900000249750001800664603330049103003910300395022890961149177001180095180757817071330049102004640052200467009118008757621100211091010000101000001030039002435720026014354443839999114352100006517748219221798890043000030012180761180083180749180080180750
60026180750134900000243580001800784063330049103003910300395022976701149177656180748180093817004630049102004640052200467009118008257521100211091010000101000001030039280243572002611435244447999911434612000651774921922180577101013000030012180083180738180083180751180083
600261800841354000002435610018007546633300491030039103003950229779411491776681807611800908170049300491020046400522004670091180760585211002110910100001010000110300392817243632002601435244382999911435102000651774921922179889131013000030012180080180737180083180758180083
60026180082135400000243511001807454063330049103003910300395022889921149177002180093180798817071630049002004640052200467009118075757621100211091010000101000001030039281724363200260143554438399991143531200066777502192217991201343000030012180750180071180760180094180704
600261800941353000002436000018073400033300491030039103003950228913611491770101800931807478170714300491020046400522004670091180093586211002110910100001010000010300392817243632002601435444382999911435002000651774021922180574131003000030012180754180083180746180082180751
60026180094135400000243620001800674603330049103003910300395022889931149177002180090180756817071430049102004640052200467019618076158521100211091010000101000001030039281724361200260143484438799991143440200065177422192217990001313000030012180738180094180748180082180761
600261807491348000002436600018073646633300491030039103003950228899211491769901807481800828170048300491020046400522004670196180754574211002110910100001010000010300392817243622002601435244389999911434812000651774221922179898131013000030012180760180080180738180091180749
60026180760134900000243501001800754663330049103003910300395022977601149177658180085180748817071430049102004640052200467009118075058821100211091010000101000001030039017243652002601435144383999911434312000651774941922180578131303000030012180750180083180760180082180738