Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (multiple, 4H)

Test 1: uops

Code:

  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.000

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)18191e1f222324373a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6600728806223260181000100047972872000154916000400020004000200025596358071423735288222891931060002000400040001000028642287581161001100010002000520000002000013398987469243310114419265316238047505128241150441240712929200040002855028599284872876028539
6600428719223210220001000045592903320159156000400020004000200025599358012323695289652916931060002000400040001000029152291831161001100010002000020000002000012992916669193084844200773252379814464728591161351344913998200040002913729144293892917529232
660042939422724019000100014618292150015947600040002000400020002563435807182368028912292443106000200040004000100002927829054116100110001000200042000000200041299292766835304064819963315238048484728589164011347214017200040002918329321293562928929378
660042932522711023000110004603291800015912600040012000400020002560935812923762290282931531060002004400040001000029174292031161001100010002000420000002000413086925069073116839201813242380914504428402163661369013846200040002933229663307963065730613
660042920723720122000010104660292680016294600040002000400020002560135804823885293792945931060002000400040001000029416294381161001100010002000620000018200061323695886895318765220428331638069454928840162041335314135200040002948729560294932965629556
66004294622372301710059800104628293812016132600040002000400020002561235802023819290652941631060002000400040001000029337293061161001100010002000420000002000613131935068833135114920143324538139454528683159621336213765200040002934729384293232930729470
660042932322722021000000104672292910016110600040002000400020002568935799023877292922947532860002000400040001000029280293341161001100010002000420000002000013081947968873100950201653252380711393828659162031324114199200040002943629347294232949829464
6600429306228190180001000046492928200161956000400020004000200025609358063237912920829436310600020004000400010010293362927311610011000100020006200000020006129859540689231401248200823328380513524828670161191329414177200040002939429427295042936029473
66004292482271601800010010471229270021613660004000200040002000256133584452378929227294013286000200040004000100002934529350116100110001000200002000003952000613088932769183139115320166334738096464128676160971347914247200040002934929449294192944029354
66004292602281601800910010465029339001606760014000200040002002256293580142383129171294763106000200040004000100002933729416216100110001000200002000000200001318192596918317364920127330538107535228717160831318813861200040002945329311294822934929332

Test 2: throughput

Count: 8

Code:

  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.2641

retire uop (01)cycle (02)030918191e1f222324373f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
48020799906790000030004074710178916162269650825518545100361501160000100320000160000500469168255535081009201018191019292138912218044801002001600003200002003200008000001011611007361180201100991001008000080000100160000042110010160002002160062242051091171110109116000032000010010160610066399771102339101092
4802041023767820000300040213994941616201060212552235010036155916000010032000016000050047261635722810100083100176101518203283218174801002001600003200002003200008000001012071012381180201100991001008000080000100160000042108830160002005160002244051091171110184316000032000010010217810245710053399909100896
4802041001607920000400041624100203161623976855255199831003623901600001003200001600005004614993566921510066110197210083121181230222024807882001600003200002003200008000001014921021281180201100991001008000080000100160060001049101600001001600002420511011711101454160000320000100102605100539100897100029100830
4802041061647950000300040330102422016228465912551878410036554116000010032000016000050047332255689211996161022871014782108332087248044420016000032000020032000080000010166610209911802011009910010080000800001001600000451121801600000001600024420511011711100642160000320000100102323100667101528101285100304
4802041018167840000300043834101362161621397003255222081003598861600001003200001600005004694486565248310067610231910099221248322863480100200160000320000200320000800000101548100653118020110099100100800008000010016000004210849016000210516000224205109126111004611600003200001001009829910110056010166099614
4802041014397870000300039000100741161622675761255229121003628231600001003200001600005004653578568391099941101458101082217233213704801002001600003200002003200008000001017109816921802011009910010080000800001001600000421045401600020021600022420510911711102210160000320000100100640102059101923100993101971
48020410173278700009100041990979991616193566242552028510036185216000010032000016010850047075935734853100838101615100062218683206704801002001600003200002003200008000001009761012881180201100991001008000080000100160000042989101600020021600022420510911711101472160000320000100101293101232101187101016101157
480204100874783000030004268010061616162180669325522432100362196160000100320000160000500465785255612931013741010039942920439122166848079020016024032048020032072080060010274410189131802011009910010080000800001001600622421091321601820019071601222442513513511101152160000320000100101689102189102543100589101237
4802041017837790171823761587000423561011111601761724380522019100361365160120100320472160324500462089456463951022991003631019402037919211234807902001603703204802003204808012001018971014303180201100991001008000080000100160000042104020160002002160002242051101171110115416000032000010099985102029100781102397101303
4802041012557710002440004250099974161623255386255216511003593361600001003200001600005004636715558512610107910115710070121197320867480100200160000320000200320000800000100462998841180201100991001008000080000100160000042104030160002002160002242051101171110215616000032000010010084810074710053010236299854

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.2627

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2224373a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480027100730781000000153004028901012211616225859242552137510360503160000103200001600005047302575635751110148510129410016417628321169480010201600003200002032000080000099845100695118002110901010800008000010160000049978401600620051600021644140050191317131310179216000032000010100309102506101791100321102101
480024100443776100000014104188811020131616248571272552319310364605160000103200001600005046345105813248110204010045010248921159321411480010201600003200002032000080000010171910191311800211090101080000800001016001515431148001600143016160002144414005019617101310034616000032000010100488995659952310104499810
4800249973578210001018141042311110101716162215712651523386103636681600001032000016000050467735955795640998101014331019882191432177948001020160000320256203200008000001017911019281180021109010108000080000101600151444111070160016011716000216441400501913176131017411600003200001010145810187010153699529102123
4800241012367761010000170040568110216316162116611825520675103639031600001032000016000050470720456112230101365104256999122163432138048001020160000320000203200008000001020271012371180021109010108000080000101600141544115001160016001616000216441410501961713131016011600003200001010031499872100800102140101272
480024997897821021023961021041176199848016221968038052129510361934160120103204721602165046861955653490010239810278310077021483222073148035420160240320240203202408012001012271005092180021109010108000080000101600741443117793160136032036160122164414005033142614810089116000032000010100332100269101465100830100715
48002410148977712003139617003994111035911616214866852552315210364269160000103200001600005046769465702265010081310183410017619229321919480010201600003200002032000080000010152310058311800211090101080000800001016001414010803116001400141600021601410501911171591019361600003200001010007910135410140510164699450
4800241020577891011000140042662110081300215270082551689010363194160000103200001600005047669095891398010086510129510074521157321951480010201600003200002032000080000010209610117311800211090101080000800001016001616441142301600160016160002142140050191317131310156816000032000010101547102678100174101200102728
4800241014317871111000170042487110069715162056635525522066103627901600001032000016000050467621055822650101538101090100491211823188934800102016000032000020320000800000991361006031180021109210108000080000101600141546101301160016001916000216441400501914171361021741600003200001010189699794100921102027100416
48002410117579111110001400391081101657161621635970255191961036387816000010320000160000504677557555781401009291019361010311957532129648001020160000320000203200008000001027961018401180021109010108000080000101600151544957501600140016160002164414005019617111310190216000032000010101438101614100254100988100217
48002499649785110000015004238711013151616160265932552311510362924160000103200001600005047427415567109010122710087410107821134321424480010201600003200002032000080000010081210064111800211090101080000800001016001515441108111600161016160002164414105019617131110114916000032000010100874101222101301100201101763