Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CASA (32-bit)

Test 1: uops

Code:

  casa w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 4.001

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 3.006

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)0e0f191e2223243a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f63696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
740073315324917114100100910060533264001210283003300630062304201492952503261632665824300610023006100260123266325872171001100010000300640100620041430119991502016336124042851539941055226204052444513583932645161231482814840300010013260832917326773274833156
740053263524417014001100700060183268400206643006300630062302951492963603262832520725300910033006100260183253326152171001100010000300642100720040730109991502015696114105819741181139227904081443915534132551168491502114988300010013295032730326753309633140
74005326592491201300010041005715325880120861300630093006229348149294490325203262872630061002300610026012330632619217100110001000030090210102004143012999171201658611411384793947642227793971444211483932505172781467115965300010013262132625326763265033167
740053272924490900010070005687329851020471300630063006229785149295730325923266872530091002300910036018326682607217100110001000030064210072004063011999131201659212009284744044742224123792443920554632576171741398015116300010013263832753325573274032593
74005326182481501400010041006083326451020673300630033006230168149294590330113273082630061002300610016012327042599217100110001000030064210052004053011999151221634712281284794036838226314088444414495232421164821423514996300010013329533333330823306332885
7400532670248130160001008000595332571012051630063006300323057314929652032981328127263006100230061002601232633262921710011000100003015421010200416301599914000162401135818460387284422545374144389514632385171491421215016300010013268132656326223283532976
7400532753245801111010070006046331450020604300630033006229630149296090325863273082530091002300610026012328902600217100110001000030064210072004063011999130001567811425284984097742226724082443414444232389159041409514696300010013272232698327563267132564
74005326232451501400010080006006326001020653300630063006229794149295750325113273782530061002300610036012326262597217100110001000030030010042004073015999131001589211607283933981843223494022443814524332554163041401115229300010013265533101330793300533132
7400532650244901400010080006013328590120560300630063006229103149302240326563269362630061002300310026012327492626217100110001000030064210052004063009999161201635011330384334043939223164057444614494432428163591411214962300010013269433189325493273232715
74005330872451211300010120006004330990020544300630063006228633149295750324443267372530061002300310026012327552621217100110001000030060210042004043009999141201659212091284644113441224723997443518584432313165671395214767300010013275433167326933308432986

Test 2: throughput

Code:

  casa w0, w1, [x6]
  add x6, x6, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0277

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0f18191e1f202224293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50210303362271101000373331235108411123028418023628328401001010030000101003000050513255399104927238302493023032030140100202003000020200600003028319411202011009910010000101001000001003022593723361038120077230132242283037199982316125213438911131021711302801000066030000201003029030266302783028830273
5020430300227101100037244821110067132302792322181102284010010100300001010030000505132555321049272273028830304320251401002020030000202006000030314185112020110099100100001010010000010030217841931710365200722728156422303039599982113222241450811131011711303111000066030000201003027230258303073032130272
502043024922711100103775281991016837172302611952198823240100101003000010100300005051325537910492724830305303353202704010020200300002020060000302371851120201100991001000010100100000100302319505316102832009027231132362593034799982413720230425812131011711302791000067130000201003026230294303123025930289
50204303112271010000385442190108532363033024619897828401001010030000101003000050513255954104927205302873030132028240100202003000020200600003028018611202011009910010000101001000001003023010512311103792007920271361832030503999825158231954879051310117113026810000106030000201003033230298302603028130257
50204302672261100000379641164104048108302571852491852840100101003000010100300005051325596010492720630248302733202304010020200300002020060000302981941120201100991001000010100100000100302181045131310326200872234186142773044099982516617242495914131011711302831000066030000201003027430308303693030030281
5020430340227101000037603527010051168302651891910382294010010100300001010030000505132558381049272343023830317320266401002020030000202006000030244194112020110099100100001010010000010030232844134510327200862739200222833043399982316827258558800131011711302601000066030000201003031630214303033030030283
50204302792271111000387837246101604716302892542012286284010210100300001010030000505132551320049271693029030280320259401002020030000202006000030257194112020110099100100001010010000010030254940633410332200872763170222213047999982612830275424801131011711302991000066030000201003031430264303013024930294
502043025122711110003758292641032468830242221219983284010010100300001010030000505132561201049271953030830322320238401002020030000202006000030272194112020110099100100001010010000010030243948229410330200701993192122803043599982316417278484910131011711302931000066030000201003028130310303133028330307
502043024322710100003732172331016843196302291751590822840101101003000010100300005051325549710492720130295303133202794010020200300002020060000303051851120201100991001000010100100000100302531042029010274200722177128682703035299982413024220525901131011711302781000066030000201003027030219303223026130316
5020430247226111000037042916010405318830281184179096334010010100300001010030000505132555261049272053027730273320253401002020030000202006000030300185112020110099100100001010010000010030298853132210320200652329148182823048499982315721253466924131011711302731000066130000201003032030306303233030330292

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0293

retire uop (01)cycle (02)030f181e1f2022293a3e3f4043464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6067696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafatomic or exclusive succ (b3)atomic or exclusive fail (b4)b6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3cfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
500303035522700368210023718341643019819956590284001010010300001001030000500632557920004927200303053032032026540010200203000020020600003032319811200211091010000100101000011030211506412102922008817391843236130435999825114452684460041270001170113030510000101030000200103029930349302803034630215
5002430283227003649862031128391843028817868263284001010010300001001030000500632549520004927212302873029632024440010200203000020020600003034118911200211091010000100101000001030173511393103372009021351761835530412999825148482884760011270001170113027110000101030000200103027730302302963027430283
50024302722270037168124210521763045821857277284001010010300001001030000500632555450004927198302973029332029440010200203000020020600003027018911200211091010000100101000001030206504328103122011728692409631230393999824124412304140011270001170413029710000101030000200103023030318302883031630261
5002430215227003760852071104319230356216975692840010100103000010010300005006325587400049271923032230271320282400102002030000200206000030275189112002110910100001001010000010301874523131028220080102018438988303149998251182626048400121270001160113027910000101030000200103032330260302703028330292
50024303042270036607823910391763030120577166284001010010300001001030000500652561940004927253302953032132027840010200203000020020600003030418911200211091010000100101000001030225493353103342009818402202032230391999824148562234060311270001160113029910000101030000200103031230295303023028430274
50024302932270037809421810431043026517186583284001010010300001001030000500632555700104927259302533033432023440010200203000020020600003028918911200211091010000100101000001030211549316103292009519192042229530476999824152362314440031270001160113032210000101030000200103031330278302823028830332
50024302772270037509223110231443023216867254284001010010300001001030000500632554030004927193302823029032023340010200203000020020600003030018911200211091010000100101000001030202580367103122011022181402030530373999825141392135540011270001160113035310000101030000200103033830411303073029530292
500243027322700377080208116401883029617787763284001010010300001001030000500632552840014927225302723028532030340010200203000020020600003027618911200211091010000100101000001030246468335103242011420381962426430449999824141332014980011270001170123070910000101030000200103026930313302833027030316
50024302662270037887621711282888302621676776928400101001030000100103000050063255374000492724130262302963202464001020020300002002060000303091891120021109101000010010100000103022949433510343201062157250026330401999826118382475740351270002160213028910000101030000200103031330306302743026430294
500243026022700368150207116249230290201774792840010100103000010010300005006325487200049271833026630303320267400102002030000200206000030286200112002110910100001001010000010302275173601030320099231221983628630408999824131572694600061270001170113027610000101030000200103031530296303033028730276

Test 3: throughput

Code:

  casa w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 19.0081

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4021019141214230000000243580000190066455343014210030042100300425002598910014918699801913991900651318137730142101021430042102146008419142513782110201100991001000010010000010030042028024356200281014370443929999114350012001117277738017001912450131023000010101190082191416190066191398190066
4020519008114340000000243600000190066455343014210030042100300425002616666114918831901900651914151318006430142101021430042102146008419006513612110201100991001000010010000010030042028182436620028101434844383999911435101200111733774222722189881001023000010101191415190066191400190082191415
4020519141514240000000243650001190050060343013910030039100300395002598851114918699801913991900651318003630139001021330039102136007819006513702110201100991001000010010000010030039028172436420026001434844389999911435700200111733773522722189884013053000010101191399190084191451190082191414
4020519140114230000100243600001191410455343013910030039100300395002598850114918396501914171900811318002030139001021330504104136033019033413612110201100991001000010010000010030039028182436620026001434844395999911434301200111733774222722189884071003000010101191416190082191411190082191400
40205191410142400000002436000011913824653430139100300391003003950025986651149187001019141719006513180020301391010213300391021360078190081136921102011009910010000100100000100300390002436220026001435644390999911435100000111733774822702189884001053000010101190084190082190066190079190082
40205190065142400000002436100011913970653430139100300391003003950026168821149188334019141019139713181368301391010213300391022860078191399137121102011009910010000100100000100300390281724363200261014360443819999114348010001117337769227031912000101053000010101191215190102190066190079191411
4020519139814340000000255490101190063050433013910030039100300395002616884114918833001914101914131318136530139101021330039102136007819008113612110201100991001000010010000010030039028172435520026001435244390999911434700000111733774522722189881013023000010101190066190082190082190082190082
4020519008114240000000243610001191400455343013910030039100300395002616646114918699801900651900811318003630139101021330039102136007819141013692110201100991001000010010000010030039028172436320026001435744384999911434301000111733774722722189868013053000010101191417191417191400191398190066
40205190078142400000002436001011900504053430139100300391003003950025988491149187001019008119007813180036301391010213300391021360078191414135621102011009910010000100100000100300390281724389200260014347443899999114349012001117337747263221912160131023000010101191416191414191416191416190082
4020519008114240001100243600101190055450343013910030039100300395002616885114918875901914141914141318161930139001021330039102136007819007826442110201100991001000010010000010030039028172435520026001434644381999911434201200111733774122722189868001323000010101191415191398191416191412191411

1000 unrolls and 10 iterations

Result (median cycles for code): 19.1397

retire uop (01)cycle (02)03mmu table walk data (08)0e0f191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6063696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafatomic or exclusive succ (b3)atomic or exclusive fail (b4)bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002819140814230000243601001191393270343004910300391030039502616827114918699701914111900651018137730049101003330039100336007819140613572110021109101000010100001103003928142437020026014354443819999114348102000065177462182218987906603000010011191403190078191419190078191412
40025191411142401002436001001900622063430049103003910300395026168101149188329019007719139710181377300491010033300391003360078191406134821100211091010000101000001030039002436120054014353443899999114342012000065177432182219121409913000010011190078191410190078191412190078
400251900771434000124368100119139620634300491030039103008150261682011491883190190077191411101813673004910100333003910033600781914111357211002110910100001010000010300390132436120026414353443919999114342010000065177392182218988209013000010011190078191400190066191398190069
4002519007714340000243590000191384066343004910300391030039502598835114918699401913991900651018004230049101003330039100336007819006513652110021109101000010100000103003928152436420026014356443899999114345010000065177412182219121409913000010011190075191407190075191409190078
4002519007714340000243650100190050060343004910300391030039502616821114918832901914061900771018004530049001003330039100336007819007713672110021109101000010100001103003927152436220026014348443909999114348010000065177372182218987009913000010011191410190078191400190066191398
4002519141014240000243490000191396077343004910300391030039502598837114918699701913991900652018137630049001003330039100476007819142113572110021109101000010100000103003928162436420026014355443849999114352012000065177462182218988200913000010011191412190066191409190078191409
4002519141114230000243451100191394055343004910300391030039502598843114918699701914091900771018003330049211003330039100336007819139713572110021109101000010100000103003928142436020026014367443889999114345112000065177452182219120909903000010011191407190078191412190078191410
4002519006514330000243570010190050200343004910300391030039502616646114918699401913991900741018004530049101003330039100336007819007413562110021109101000010100000103003927132436320027014347443889999114345112000065177422182218988216603000010011191399190075191408190066190167
40025190074143300002437188010191396206343009110300391030039502598672114918698501914111901501018004530049211003330039100336007819007713652110021109101000010100001103003927132436720026114374443889999114349010000065177492182218987000903000010011191399190078191743190075191399
4002519140214240000243770010190310266803038510301231030039502598665114918699401913971900661018137430049001003330039100336007819141113482110021109101000010100001103003928152435520026014358443909999114355012000065177492182319121409013000010011191412190078191398190753191412