Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASPL (32-bit)

Test 1: uops

Code:

  caspl w0, w1, w2, w3, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 6.002

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e22243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f63696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7601134332257263000100810546434108002804630063006300616001714931246033677342607253006200440042004700734038132521710011000100013006421007200204301099915121521310867281193612677237983634444817485133078189381665617535300030023428534234342393423334219
76006342182571923001246005443339960028079300330033006159921014931189033689342697253003200440082004701433931132021710011000100003006601008200406300999914101495810561279743616753237523641444615595333116192311652517807300030023421634336341973422534295
76006341982561724001026005279340990027943300630063006159413149311620337473422082430032004400820047014340821309217100110001000030064210052002043010999130215237108171800136051057237603611444514525732815190181686817555300030023424334201342403423734240
760063421225720280010270054843400500281323003300330031594013149311720336643428472530062006400820047007340351334217100110001000030034010072004043007999130014947106702797635921365237823625445216556332743190101638817829300030023423334308341973423934308
7600634215256242500104310549534105002811730063006300615980114931106033813341787263006200440082004701434029133621710011000100003003401007200205301599916121516210857280153586952237803545444215575333010187841660417228300030023424734201341613416134250
7600634336257252700103900538034028002809230063006300616000014931027033827342307253003200440122004701433917132421710011000100003006021008200404301099915121501010832379563589650238013543444014524933087193131654617474300030023410734179342993418534171
76006341792562524001036105400339940027992300930063006159381149311110336623418582430062004401220047014340811310217100110001000030034010042004073012999121215306108662799936311056238473583444313565233096190581669117484300030023411234255341573418434292
76006343402572519001033005243340920028086300630033006159312149311080337563424172630032004400820067007340561319217100110001000030064010042002083011999161214994107612803735701251237893654443914545233116192241662017719300030023431434310342383427034218
76006342662572222001038105424340710028062300630093006159871149311480337343423772530062002400820047007341051326217100110001000030030010032002083010999150215070109241802535851147236643572444112495333170189331656617691300030023423134179343333423934220
76006341722562025001361105310341010028004300330033003159372149312090337093416062630062004400820047007340911327217100110001000030064210052004063006999131215288107602794836151562238013668444012495233133190951657617587300030023426534258342123421734119

Test 2: throughput

Code:

  caspl w0, w1, w2, w3, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 18.0213

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f202223293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
70219180282135044000004199810839107121515218016879215757516044725401001010030000103243000050552858134604917711801802471802641691273170059401003020040000302007039918021450112020110099974391001000010100100000100309339204653123399263532024626638673218577463269998221572102116244800131011711179468010000663000040100180224180197180196180222180164
702041801961350300010002061208581063288104180197810168280160582254010010100300001010030000505508583103049177105018020618021216911331720754010030200400003020070000180172501120201100999834310010000101001000001003087932056271231412493120256262269053422396500149998221655302466210500138211711179446010000663000040100180222180178180180180178180224
70204180185135030000040311080841107368144180171795107074161362254010010100300001010030000505508583224049177102018018218251417105331701364010030370400003020070000180191502120201100999819210010000101001000001003134241916321230502665120254282389734183624989899982319458023062416800131011711179455010000663000040100180197180214180226182507180169
702041802041349440000102124008051074476152180172801128085161173254010010100300001010030000505528580063049177112018019518025516908431701104010030200400003020070000180191501120201100999810910010000101001000001003091641926111234922553520249278108953418902484939998232098602336240100133411711179491010000663000040100180200180236180174180223180215
7020418021213494000000024974352806107601413218015478612708016068825401001010030000101003000050552857798404917712201801831802311691033170101401003020040000302007000018019050112020110099889541001000010100100000100309004195658123467255022050426418918421063455569998221911402166244800131011711179517010000663000040100180216180195180245180227180241
702041801951350400000002325708201069613144182450859875841608892540100101003000010100300005055185788600491771190180244180273169112317008440100302004088030200700001802225011202011009995916100100001010010000010030874420964512333736758202602641947176174674754899982220003022862411801131041731179453010000993000040100180202180206180192180283180213
702041802371349410001001928510845107761015218021476787774161016254010010100300001010030000505518582667149177137018021518026216908431701114010030200400003020070000180218501120201100999764110010000101001000001003088331926401235832923820239252138993416437459499998221783002486233200131011711179467010000993000040100180236180219180224180228180199
7020418020913504003000028550083510704901521801457801478881601012540100101003042010100300005055285809691491771040180209180231171087551701024010030200400003020070000180207511120201100999754010010000101001000001003095817181659123080282132025924393003622984485649998212054610215551625500136211731179434010217663000040100180174180239182498180213180217
70204180182135030000000206761180710760914418019481077273161035254010010100304201010030000505508581249149177113018023018026816908831700924010030200400003020070000180232501120201100999814810010000101001000011003095131926571232263354220258288986134157135010499982320502021562201100131011711179520010000993000040100180218180223180225180210180202
702041802131350410001402252208611073614116180158817137069160990254074010100300001010030000505508579749149177106018017218025216914931720934010030200400003020070385180204505120201100998994910010000101001000001003094617181629122602313172024627617879341528746776999822207101120755151800131011711181669010000663000040100180188180213180192180209180197

1000 unrolls and 10 iterations

Result (median cycles for code): 18.0233

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0f1e1f2022293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7003918031713501010015718082517206514018025882761019160745254001010010300001001030000501038580191104917712518024018022316916731702444021730020400003002070000180221501120021109981271010000100101000001030954151716891255252352220219281965050131934471499982147759024662142012702172117947201000010103000040010180207180235180229180210180206
7002418022713501010015896085427126115218019680541317160818254001010010300001001030000501028581629104917716018021018022716921931701844001030020400003002070000180245501120021109981831010000100101000001030970116963012191424733202333072044156534613399982313470023862102012701171117951801000010103000040010180217180246180238180226180185
70024180271135010000155540844171272100180206795813141607232540010100103000010010300005010385818481049177140180252180223169201317016840010300204000030020700001801725011200211099869810100001001010000010309192173661122667255372021931399905813257435299998231299302246211412012701171117948501000010103000040010180230180256180211180181180214
7002418021413501010016473082117128111218021580571517161716254001010010300001022830000501028582534104917710718024118026816919731702074001030020400003002070000180228501120021109984521010000100101000001030952217963012268823626202092755044140344409199982312850025662136112701171117948301000010103000040010180212180244180239180270180219
700241801941349100101522408181712731121802317807351616127325400101001030000100103000050103858251010491771591802161802151692443170189400103002040000300207000018026250112002110998852101000010010100000103089511666521221092526520220280393956137614423199982313438025162149012701171117951101000010103000040010180236180215180206180190180255
70024180217134911001160010844270486116180213793711916118225400101001030105100103000050102858406810491771211802391802371692193170236400103002040000300207000018024451112002110998782101000010010100000103095614167647123416244782021427624933861411644495999822151619216621402012701172117949101000013133000040010180253180218180243180251180237
70024180251135020000161651084717689015618023380432271621632540010100103000010010300005010185811961098177194180228180231169225317018140010300204000030020700001802355111200211099855010100001001010000010309531518465312359524638202342770058173244400399982415432102636214146012701171117951101000013133000040010180262180243180247180221180221
7002418023013502000018620983017205116183093814454451613302540010100103000010010300005009985819681049177120180228180236169221317020540010300204000030020700001802335011200211099794710100001001010000010309041619464512369224901202473159930401574444423999823275829240621463012701171117949201000013133000040010180206180217180198180233180238
7002418019113502002016723981817447414418023881382020161337254001010010304201001030000501028581420104917714118022518021416921731701994001030020400003002070000180248501120021109984581010000100101000001030899118364512311423941202392741012044143404466399982314124025562112012701171117947501000010103000040010180225180227180239180239180217
70024180200135010111143280803173680112180250809591216008325400101006430000100103000050101858086710491771901802141802411692343170719400103002040000300207000018025251112002110998642101000010010100000103090911816681219992357820207287102508613353565699998221312810232621509012701171117951601000010103000040010180212180247180232180219180243

Test 3: throughput

Code:

  caspl w0, w1, w2, w3, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 18.0737

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
602111807651349000000453440100180747307150068333019710030095100301115008623056014917768618008118076516993811170016302110020266401562028270231180092112211020110099100100001001000011003009603315124592453552006203532165400999913531621211172829182018001801800903000030102180763180076180756180100180745
602061807621349000000453282000180744388149390333019710030097100301115008623020114917766418007818073616991811170706302110020278401562026670273180756112211020110099100100001001000011003009602716125415453452005803531865415999913532201011172829159018001795094903000030102180076180743180092180759180071
60206180075135400000045337200018008438714940833301911003009110030111500859005701491770191800931807571699361217068230193002027840148202667025918075711221102011009910010000100100000100300950330125650453532006203533965415999913531520011172829195018001801984063000030102180763180097180759180085180100
60206180084135400000045336200018072727815006733301991003009710030116500859132701491770111807651800911692751117067630211002027840156202667027318075710721102011009910010000100100000100300960015125618453642006213534765406999913532021211172829198018001795374963000030102180759180097180763180076180759
6020618076613480000004535720001800913871494083330197100300951023009350086234080149177685180070180762169917111707033021100202784015620278702311807691072110201100991001000010010000010030096000125645453492006203534865405999913532621011172829135018001795314063000030102180071180759180100180766180100
602061800781353000000453362000180746300149377333019710030097100301115008591502014917700718074218007816925212170038302110020274401322026670231180091112211020110099100100001001000001003009603316125420453642006203531465411999913532120011172929166018001795373663000030102180092180764180071180766180079
602061800911353000000453292100180060278150066333019710030097100301115008591488014917701918076518009116927611170031301930020266401562027470273180070107211020110099100100001001000001003009603316125608453502005803533965398999913530600211172929174018001801945963000030102180763180092180757180079180758
602061807581349000000453452100180084307150043333019710030095100301055008591491014917699818075818007016926711170035301930020278401322027870273180099107211020110099100100001001000001003009603316125456453522006203533165412999913531121211172929149018001802020963000030102180767180071180745180100180746
60206180765134800000045327210018075137815005333301971003009710030111500859106501491770181807621800911692741117001530193002027840132202667027318009911221102011009910010000100100001100300890270125838453522005803533165409999913532421211172829103018001795294963000030102180100180738180071180758180079
60206180099135400100046037210018074330814938633301991003009710030105500862305801491776651800991807651699361217070430205002027840156202667027318076910721102011009910010000100100000100300950330125607453422005803533165410999913533121011172929157018001795405063000030102180100180737180076180767180079

1000 unrolls and 10 iterations

Result (median cycles for code): 18.0766

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
6003118011813541110004537913101180069513131494103330115103009910301145085920120149177040180772180106169255817071130127212220096401522009870259180097592110021109101000010100000103011617031125064454762007102354286546099991354401302310652293173193318018509903000030012180779180119180774180100180543
600261800841354111000454201110018075761311149411323010910300981030111508623882014917768818011218078216992281707153012227342009240144201007028018078712221100211091010000101000011030115183431124821454552007101353666542499991354541310310652292133193117952720903000030012180106180774180107180643180789
60026180097135411100045443111001807578130149401323010910300981030114508624538014917769618010618075316991781707283011234372009240164201007028018078312221100211091010000101000001030116203430124957454352007301355036556999991354481302310652291663193317952209003000030012180077180766180107180774180116
600261802991349100000454511300018008901414150068333010810300981030117508592180014917702518078518011816925281700483012736382010040160200967027318011013821100211091010000101000001030117173431125212454992007300355106551099991354031312310652291463193117952239903000030012180774180107180772180106180774
6002618077713511010004548313101180157512015006133301081030098103010850859140801491777051801061807721699158170712301272130200944014820094702591807725921100211091010000101000001030113193428125046454702007301354276548499991353871312310652292943193317952219903000030012180106180783180119180773180105
600261801201354101100454641300118009360015006633301071030099103010850859141201491776831807741801061692578170046301262433200944013620088702661801016021100211091010000101000011030114183431125020455202007100354106546999991353711312311652292374193317957429903000030012180788180100180755180111180774
60026180772134911110045491130001807700110150072333010610300991030102508592542014917769718010918077216992481707263024429312010040160200967026618078613021100211091010000101000001030115183429124844454572007100354686550299991354311102310652293363193117952119003000030012180822180127180786180107180773
60026180772134910000045454111001807487001500553330107103009910301175085915570149177026180764180106169264141700453011823222009440152200887027318011312121100211091010000101000001030113183431125387455592007310355606561599991355021302300652292483193317952209903000030012180123180763180106180772180106
60026180101135410100045354111011807576131415007332301091030100103011150859190801491770381807421801051692508170042301212332200944014820100702801800936021100211091010000101000001030116183432124838453952007301353776547299991354021310300652291993193317950900903000030012180772180107180767180114180785
60026180785134910110045428111001800917131415006433301071030096103011650859140701491770261807511801201692338170038301266212009640152200947025918008412221100211091010000101000001030113193432124941454092007311354376547199991355451312310652293573193317955519903000030012180787180106180773180106180772