Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (multiple, 8H)

Test 1: uops

Code:

  st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 12.000

Integer unit issues: 0.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f222324373a3f46495051schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
7200729370237114011510021410004694293360101510612000800040008000400051910716115002472929240294613101200040008000800020000293362934311610011000100040055153400621540006041013106948768933138104019211325838169464128642162581292012850400080002959129472294922938729331
7200429463236118012310036641000460329141130151601200080004000800040005190271633300246342925629383310120004000800080002000029363293961161001100010004006501400401440004042013260947969543149745193863381381210373928713162531281013121400080002934029445293382941729339
72004293662371190115100459000046952930632015040120008000400080004000519017162190024692292812933231012000400080008000200002934129349116100110001000400558140040244002415410132179570691830851142195763184381311394428802161521274913073400080002931629328295612935629350
7200429348236123111710021900004637293270301512312000800040008000400051900716325002470829235294153101200040008000800020000293642923711610011000100040054014004016400240420132829338694731051048193763152381911454128674159361308913011400080002931229290294152933229310
720042948123612111170002705000046862924333015158120008000400080004000519037161680724658291602928031012000400080008000200002936629258116100110001000400551514006014400068400132789531686531301038192823307381712435128572164771295113044400080002935229153291972932229357
720042941623611701141002979100146542927903015070120008000400080004000519007160690024698291382930531012000400080008000200002931029276116100110001000400551514004017400041541013255949968973109451193003225381614504428795162251281212883400080002940429418293202935329414
720042944623611511191002644100146752929022015051120008000400080004000518957158460024700291812929450182120844000800080002000029432293581161001100010004005481400401640006843013044938368863128539193853298382521514728797161421309413106400080002946429396293892936229410
72004294262371120119200171400014684293122001512112000800040008000400051890716075002471329287294233101200040008000800020000294012939311610011000100040055150400501640004154101315791836982310153819325334738108454728699162151297512804400080002934829274294802951429303
720042942023612311181004149000046382926423015083120008002400080004000519017160820024619292902952131012000400080008000200002935329422116100110001000400651514004016400041641013177933469023098743194643335382115504928801161201268613109400080002940029247294612931029405
720042922923617101310049540000471229341000150911200080004000800040005188971590800246692925229302310120004000800080002000029344293581161001100010004005615140050154000404101314494876919311794419394333738126484628685161881316013100400080002941529405293482938229387

Test 2: throughput

Count: 8

Code:

  st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.4976

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f222324373f46494c4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
960209202311162000110045079400182635212382161641389223422861107684910474999732132010064000032487950093919471081322710203158020120020106339521337980960100200320000640000200640000160000020100519853411802011009910010080000800001003200000025481032000200232000223400510902161715131985560320000640000100200185199423201385200591199353
960204198919148900000000000876862014820160151716736351053779100722129320060100640000320000500922082610709959002008200199743199668381523368609601002003200006400002006400001600000201198199319118020110099100100800008000010032000004224932032000200032000224200510900131712131976040320000640000100197816196912201639200469199417
960204202143154500000003000841642017711616013921741951104202110072607132000010064000032000050093434391075112510200077019865819999339006341185960100200320000640000200640000160000020092619925611802011009910010080000800001003200000422505103200021018232000224400510900101713132005910320000640000100198975199260199314200610200932
96020420157815431000000000084676197876000155416591251044518100727401320060100640000320000500931911710695063002001370200481200650411143404289601002003200006400002006400001600600201734197562118020110099100100800008000010032000000260180320002002320002000051100061713131990300320000640000100201425202081199118201823199145
960204200987154000000012300087903199877161601500170722510475531007237413200001006402383200005009265653108174051020091801979752007524129834071596010020032000064000020064051216000001983792012411180201100991001008000080000100320000002616203200000063200020000510900181713142011960320000640000100198284201886200851198483201133
9602041976891555000000120000845582005870160165216958251054110100729707320000100640000320000500935863010666685001992320200955199478395603394289601002003200006400002006400001600000200049200567118020110099100100800008000010032018004226101032000010232000224200510900121713122002360320000640000100201090200796198226198379200934
960204200082156700000013230008495120025616160109816141251048653100722970320000100640000320000500914087410680543002010710202088199273399543407169604462003200006400002006400001600000204375209016311802011009910010080000800001003200000422635403200020023200022000510900151713141998310320000640000100198678202789199226200860201432
960204198868156100000003000845392004061600168516277251049922100723770320000100640000320000500939673310844931001993243201375200034419713392289601002003200006400002006400001600000200580202822118020110099100100800008000010032000004227032032000222032000204200510904151714122016720320000640000100202331199939200579199032199629
96020519921515530000011743000884931997461616014561679725104349810071834932006010064000032000050093044541074910200200626019984120142540214339601960100200320000640000200640000160000019650919845211802011009910010080000800001003200000422591603200000003200022020510903132613132005550320000640000100203007200632201642200070200999
960204201962155200003539644000083412203960161601231167081341049875100730581320240100640952320324500924998510885828102000250204720207206396434539900961830200320360640720200640720160180019893719927551802011009910010080000800001003202402422454503200000003200022420051090351714131989580320000640000100198377201214200443199703201454

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.4999

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)l2 tlb miss data (0b)18191e1f23373f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
960027201000155800000021085027199701016116316296251042346107257043200001064000032000050935032810795698119953102004551985943929534019096001020320000640000206400001600000199961200751118002110910108000080000103200141436261230320016001632000203450191917192019813132000064000010200936199500202368200816200236
9600252028251610580000080870921983241616153916012911104176610730756320000106400003201085092488451074223302015350202783202087375491339046960010203200006400002064000016000002012311991761180021109101080000800001032000003424538032000220832000223450192317211620115932000064000010197497201200201281203303200567
9600242010961556000000208798619906816161295163182510437781073086032006010640000320000509443129108446520201805319908120079841886338527960010203201206414402067310016833252205222168501371800211091010800008000010320014144426314032001630433200022050191917131919915232000064000010200064200263200130201236198294
9600241992301507000002131924862002080013751643425105036810729036320000106400003200005091875341079313211954790197290200890394743402719600102032000064000020640000160000019759019774611800211091010800008000010320000042263770320000128683200002050551817181819965632000064000010202638197480201909201972199910
96002419912415000000003087292199004160134116794251042611107287843200001064000032000050925537510837437020012001992161980373805433930196001020320000640000206400001600000199717201344118002110910108000080000103200000422592103200002023200020050191717101920196632000064000010200638198300199507200351201547
9600241999941547000003330884351994060161380172592510421571072230832000010640000320000509275978107266000200593019828820018738326339469960010203200006402402064000016000002009451990891180021109101080000800001032000004224119032000200032000204250191917181720069332000064000010198170202279198523200609200120
960024200823155400000030929952023261616133816611631038288107259483200001064000032000050912617811019668020126302011661990164037833862096001020320000640000206400001600000199989203065301800211091010800008000010320000002516403200020023200022050191417171720021932000064000010201628200524199884200538198398
960024198986156800000120192057200446016164715913251052230107241693200001064000032010850923456810795969019881402000982015174077412406489600102032000064000020640000160000020232820032611800211091010800008000010320000002472603200600023200020050332017171820265032000064000010198635202036200049201091198687
96002420115715430000016231840651993811616135817461531050509107171863200001064000032000050920652810894757020191002014922027223865134047996001020320000640000206400001600000199100201404118002110910108000080000103200000422465403200000023200000050191717161920265732000064000010200436201234201164196790201065
9600241989661534000003007604019905716161409153432510456831072325932000010640000320108509269843106965570200456020044219877741173341703960010203200006400002064024016000001996632007741180021109101080000800001032000004226917032006200532000204250311817121719931732000064000010201755198552199028200357200134