Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STADDH

Test 1: uops

Code:

  staddh w0, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 3.000

Issues: 3.003

Integer unit issues: 1.003

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e22243a3f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)acafbbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)cfd0d1d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
7300634181257811001004105269339542207530031003200010002000109561686100493094903389134116310300010002000200040003404326441171001100010000200022100210002200222215012107030795035941872423935614449146162330511004191041667617513200010003407134211341583413834110
7300434202255400001004105344339102207530031003200010002000109541686100493119603385234042310300010002000200040003406826932171001100010000200022100310002200222215124107320796935601612427335604447135865330791003190391676917620200010003412134203341753404834196
7300434178255600001004005333340032213630031003200010002000109591686610493099203388634043310300010002000200040003400926761171001100010000200022100210002200222215080104950796835754682414136504453126362330121003190471686417893200010003422634208340603421434099
7300434116256500001005005366340202207530031003200010002000109591686500493113303388434165310300010002000200040003406626921171001100010000200022100210002200222215072106810796135662572423437084452156773329971003191021664317589200010003421234147341383418234170
7300434177256400001003105358338952210330031003200010002000109591686530493105603385234026310300010002000200040003403927101171001100010000200022100210002200222215038105870797335830642417535594450166261330011003192281665817558200010003412634057341063420234051
7300434144256900001004005348340382210630031003200010002000109611686110493102303389534137310300010002000200040003414926741171001100010000200022100210005200222214932105870801436544602424736254451136372330231003190911661517858200010003414934117341213415434159
7300434100256600001004105522339872207130031003200010002000109581687230493109003387134204310300010002000200040003402327271171001100010000200022100210002200222215035106651799635621682416735844461206067330441003191331666417481200010003410134201341423412734117
7300434230255400001004105443339042208330031003200010002000109581687120493103303396134063310300010002000200040003400326791171001100010000200022100210002200222215097105380798635560642415936014449127161330121003189001659817557200010003408434167341233423334225
7300434098256500001008105332340032204230031003200010002000109481686210493112903385334051310300010002000200040003401227211171001100010000200022100210002200222215015105501797035950752417936134443156964330731003191571674317646200010003415734295341563399334219
7300434163256600001004105312339772211930031003200010002000109591686400493112903395334128310300010002000200040003405427321171001100010000200022100210002200222214931105570795335932702428236084448146462330171003188671672217804200010003412334217341363424934066

Test 2: throughput

Code:

  staddh w0, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0075

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f2022233a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40206300712250310034693201023030050376526131291525404742048220000201002000091275214129242374926985030049300652481932501040100202002000030200400003007561112020110099100100001010010000010020025397701013110012113112138201751995471513013101161130072204209620000201003007630066300763007630076
40204300752251110034136201026403003724692027129362540544204952000020100200009130631413015275492699503007230075248203250334010020200200003020040000300656111202011009910010000101001000001002002941265810119100061248013620121169528953013101161130072204519620000201003006630076300763005330076
402043007522513100348252800220300603063352712917254046920473200002010020000912913141289323849269950300723006524810325022401002020020000302004000030075611120201100991001000010100100000100200233103561011010005322918117200962108639923013101161130062204019620000201003006630066300663006630066
4020430075240110003456921101536300501681372212936254050720562200002010020000912816141291624949269850300653006524819325033401002020020000302004000030075611120201100991001000010100100000100200332162881013310016314712117201331896511393013101171130062204246620000201003007630076300663007630066
4020430075225111003462832101136300372461152312940254047620445200002010020000912637141298723349269950300723006524820325023401002020020000302004000030075611120201100991001000010100100000100200245154521011910007422632922010521015481163213101161130062204136620000201003006630076300763006630076
40204300752251000034435311090300602562312212887254052920479200002010020000913071141299226949269850300753006524821325023401002020020000302004000030065611120201100991001000010100100001100200303100321014510006211918107201093803421713113101171130062203849620000201003005330066300663007630076
402043007022511100343741910210300603554201612942254046720530200002010020000912734141292322049269950300623006524828325022401002020020000302004000030065611120201100991001000010100100000100200194196661010010009222808520134093537683213101171130072204459920000201003007630066300703006630066
40204300652251010034947250020030037248115211293525404742047720000201002000091273514118482914926985030075300752481032503240100202002000030200400003007561112020110099100100001010010000010020031312948101791001032366119201532824501543013101161130062203949620000201003007030066300663006630076
402043007522611000346462810110300602559819128992540517204882000020100200009127741412943255492698503006530075248083250334010020200200003020040000300756111202011009910010000101001000001002002811294891010410008512428105201330806401433113101161130072204176620000201003006630053300663007630076
402043007522511300343142610803003711611210129502540453204692000020100200009128081412933255492698503006530065247973250234010020200200003020040000300756111202011009910010000101001000001002002341506910106100110128013520109093545683113101161130072203889620000201003006630066300663007630076

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0075

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f202223293a3e3f404346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9aaabacafb6bbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40026300802250000000033961541001003006034217201293625403352032020000200102000091182414129670248492699530075300752483503250554001020020200003002040000300756411200211090101000010010100000102001802495801004610001112626532005622344213001270216553007220324131020000200103005330070300703007630076
4002430052225000000003512102610017123006023220141289925403712035720000200102000091142914125630220492698930072300752483803250554001020020200003002040000300756411200211090101000010010100000102003001195901006210001303024502007812833814001270516553007220362101020000200103007030076300763007630076
400243007522500000000338422110017830060841821129282540332203842000020010200009114781412980027349269953007230075248410325055400102002020000300204000030052641120021109010100001001010000010200230119610100621000701443645200701322559301270516243006620397101020000200103007030070300763007030076
4002430075225000000003402142100140301952011215129225340348209242000020010200009120791412980122549269953007230069248380325050400102002020303300204000030069641120021109010100001001010000010200201220563172100481000421283022200671272458901270516223006620314131020000200103007030070300763007030076
40024300752250000000034013311002103006029213121288225402952029820000200102000091219914129581229492698930072300752483903250554001020020200003002040000300756411200211090101000010010100000102002601366001005110002113022472004913418622801270516553007220288101020000200103007030076300703005330076
40024300752250000000034045141001503006030114141288625403422029620000200102000091164814126660239492699530069300692481903250554001020020200003002040000300526411200211090101000010010100000102002812250650100481000421282231200572342469401270516563007220340131020000200103007030053300703007630076
4002430075225000000003384426100150300601761416129092540322202972000020010200009118721412942124349269953006630075248320325055400102002020000300204000030075641120021109010100001001010000110201260110540100531000441316222005722823612501270525363007220321131020000200103007630070300533007030070
40024300752250000100033841251001603005417115161305625403302061520000200102027691143014126930243492698930066300692483303250554001020020200003002040000300696411200211090101000010010100000102002801446901005810014113424482005513213914901270316223007220317101020000200103007030070300703007630070
4002430075225000000003375420100924300541242531129202540361203762000020010200009116841412663124449269723006630075248370325032400102002020000300204000030069641120021109010100001001010000010200270126890100671000541231837201061210351250127051635300722031213020000200103007630076300763007030070
400243007522500000000339232000015030054225109128842540290203222000020010200009112971412670124049269953006930069248350325048400102002020000300204000030075641120021109010100001001010000010200250935401005010001411924362008513304614801270616223007220340131020000200103007030076300533007630076

Test 3: throughput

Code:

  staddh w0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.2218

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3020611012381010000193051510298011711127803325394431814821063101002000039596664644831022849992931045581038049972271012923010010341125102032000520206400091043984111110201100990100100001001000001002002727315019300100141087072818191641422322111117165101600105098119087131320000101001040141020481011949934499790
302049979975411100206301510110422691718841862541022202202021010100200004677281489659602724998862100989101377982086101329301005976681020320005202064000910314749511102011009901001000010010000010020022233445197551001602995329973910814253625011171640016029550341188521313200001010099024994149932399671101378
302041018347691100021009160031013448141883619254034219968205871010020000470681250050070107549103090104039102951965406998463010077265910200200002020040000995934811110201100990100100001001000001002002424314620192100160011098307281068315033191111722412232210312826206511313200001010010058110100410249510280899052
30205994757471100019351160051012990131685596254185122113203021010020000431440547842100147491014321053041047499776369951230100114111221020020000202004000010556046611102011009901001000010010000110020023263047192491001502994730734826715200202111722482242310204582144513132000010100103493103382101614101387103921
3020410226977210100191921410110480181818835222540152206552114310100200004752143498193912064996859101229100573968566997963010010121283102032000520206400099999453111102011009901001000010010000010020027262946200831001803100163111310077150322411117162601623102420121900913132000010100102934102598102100100824102794
3020410225777111100199071410298997414128261225407942000120587101002000042211764820757016449944449661596379929556954753010012301153102032000520206400099772348111102011009901001000010010000010020025243402025410016029925300199194142502321117165001600103074102062313132000010100100215100181101932100677104167
30204981937231010018887151019893601616851282539313202492039110100200004474982488951801044998659100938101307946336945633010017751665102032000520206400099682948911102011009901001000010010000010020023242951201021001701108163203710475152832231111717250160010413820196611302000010100104322104388105322104019103912
30204103864775101001992214013105911017147884325401841936120826101002000040819884759745141049988641030251043491008937997813010095999710203200052020640009104487459111020110099010010000100100000100200262636022450100150210800317401042715263524311171730016001053031119795130200001010099706100111987799884597023
3020498432744110002028815011101326416080698253943618200205261010020000418950347514300391499931810038899594974637979663010089598910648212032030640009104348376111020110099010010000100100000100200232304923752101897298303106510275152636230111717490160010220114262161302000010100102732103022101641100975100174
3020410075474411100199441600194430514138488225410332059120169101002000045322205009834111149956151002481014969930179919630100717836102032000520206400091043194601110201100990100100001001000001002002726001950510014001008229610908415028211111717470160010281381910913132000010100101516101766101788102322102230

1000 unrolls and 10 iterations

Result (median cycles for code): 11.1715

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6067696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)797bmap int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbbbcl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cdcfd1d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3002611174283900110024034010011114391137917632544788248812014910010200005519060534097311019491088331115671115351072063108180300103023100202000020020400001115666151110021109286551010000101000001020000003222050100010012428316111532912831000640292162211108412548914102000010010111754111575111584111744111737
30024111608836000000222361000111179110891675254449725411200131001020000548301953290861103449108538111502111432107312310791630010221210020200002002040000111847401111002110926356101000010100000102000003533229951000010133273358412156100000640322162211131222618014102000010010111636111389111527111427111712
300241116638360000002418810001111857001092142254522525898201521001020000548594453131891928491089731117101119451076293108051300102025100202000020020400001116706051110021109290021010000101000001020000035342301610001001107032220159061330000640202162211127602656514102000010010111729111750111810111863111531
30024111875837000000263081000111156107892204254858527038200061001020000546716553251141106949108751111657111870107276310783830010282710020200002002040000111534621111002210928703101000010100001102000003136234851000110134583367612702031000064027216221116171245770102000010010111740111658111534111347111415
3002411180283600000022029100001119111809147525432372320320008100102000054930035317658196449108672111407111632107402310814930010222210020200002002040000111788619111002110925463101000010100000102000003136280341000010138263414015976003500064029215221113100228531402000010010111673111512111771111550111765
300241117658350000002460810001111807170917512545548253972000610010200005509969532861311077491084451118771118541075433107865300101318100202000020020400001113933671110021109255571010000101000001020000034342243010001001340231596130980343500064035216221113232259680102000010010111908111536111667111599111656
300241115068370000002471411000111559208917702544121235032000310010200005445902531955201144491085021114441114801071053107988300101215100202000020020400001118605551110021109249311010000101000011020000035023512100010014450340341448613031000640262162311117822899714102000010010111754111719111790111899111620
30024111735836000000250691000111175201299171925452022619020012100102000055015585321063010854910872611153911147010736031077943001028211002020000200204000011159162311100211092825210100001010000010200000002119910001101267431739133420323400064021216221115850254361402000010010112054112043111846111708111708
300241118988380000002326611000111460088916512544931239352000310010200005487466532406701029491086521115881117381072393108020300101418100202000020020400001116593511110021109248751010000101000011020000035352584810000001276534906135791340000640242162211159902196610142000010010111557111595111529111578111593
3002411168183400000024949100011118092809199025452882434220003100102000054836765342060090449108717112001111943107282310790830010061002020000200204000011189137411100211092551810100001010000110200000350231501000000140083536713721032350006400215221112224291171402000010010111953111950111434111506111776