Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (multiple, post-index, 4H)

Test 1: uops

Code:

  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2224373a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6600729380228120100001000466529236001595670001000400020001000400020005000256143581001023861290472933931070002000400050001000029260291621161001100010002000402000002000400129349271685431231441989932933813945442859310001601913380135472000400010002936629266292492930929190
6600429251227040400000100479129273001603370001000400020001000400020005000256293580505237352906129203310700020004000500010000292792929311610011000100020004020000020004001300294796950309804819993317338181742492859510001621713320135742000400010002929829239293702932829344
66004292262270202000121100468029227021601570001000400020001000400020005000256133578908238122904529338310700020004000500010000292732923911610011000100020004020000020004001298394317027311304120154317638111444372857710001606013254134062000400010002933429403293972929329264
6600429212227030200000000464229223021609070001000400020001000400020005000256093580605238002902129408310700020004000500010000292792931211610011000100020004020000020004001316094236857313414220037311038121244432860810001648713357134932000400010002935829345292762928829260
660042925022701000000110047692918700160327000100040002000100040002000500025608358030523835289912941531070002000400050001000029282293431161001100010002000402000002000400132469282688931271522009331693816945392860910001626013414134012000400010002935429306293522935629227
6600429196227030100000100464729197001603970001000400020001000400020005000256113580505238292910029429310700020004000500010000292312924111610011000100020004020000320004001302993496903315104420165317538151241442849910001627413378134332000400010002934229397292902930329310
6600429399227020500000100472229154001597870001000400020001000400020005000256193581106238622903929331310700020004000500010000292852930411610011000100020004020000020004001310692456883312414420052314938161146472853110001612513289137152000400010002937929295291942936029316
660042927922703010000010044292923700159177000100040002000100040002000500025611358100823783291112914431070002000400050001000029356293261161001100010002000402000032000401721295093216960307603620161317238091145422850110001620513206135272000400010002928429362293352932229427
660042926222804040000010046592928000159027000100040002000100040002000500025625358060723842289912925731070002000400050001000029140292361161001100010002000002000002000400130439337690831450432010330793818543372856110001622813093135622000400010002937229216293162930029223
6600429199227040100000000471429318001597170001000400020001000400020005000256073581104238262906429269310700020004000500010000293482937511610011000100020004020000020004001303591606906312213520151321038111544442849410001626913458136272000400010002921729263293602937529265

Test 2: throughput

Count: 8

Code:

  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.2703

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2224373a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
48020710195362100000020042330010225516161943682825603359801003569011600008010032000016000047537546647445655149100715101224992622069032159656010020016000032000020040000080000010062210094411802011009910010080000800001001600131338111680160014200141600021438121511011611101953800000016000032000080100100762102135101357102221101404
480204993177810000002004215301019371616206466582560140480100359055160000801003200001600004754804680834572284610141910023110117421989322006560100200160000320000200400000800000101951100789118020110099100100800008000010016001312361055001600160024160002143812051101171110126680001001600003200008010010113410268810158210064499799
48020410145678700000630043255299991161622817121256018558010036259316000080100320000160000475335471719055941691019241025371014032125832118256010020016000032000020040000080000010069210180111802011009910010080000800001001600000010323016000200216000004000510911611100945800000016000032000080100102037101537101686102064100503
4802051017407890000003004438701017470019586335256009968010035947816000080100320000160000475827466166955582251019031001361024822116132018956010020016000032000020040000080000099724102950118020110099100100800008000010016000004010893016000210216006224000510912511102230800000016000032000080100101804100140101531101403101364
480204101743789000000300442350101676016226467152560031180100362608160000801003200001600004757894735114557658910101310079510124022585321643560100200160000320000200400000800000102175101884218020110099100100800008000010016000004010903016000200216000224000510911611100524800000016000032000080100102403101229101914101614102223
4802041017517870000003004427401011671616225968342559924280100363040160000801003200001600004755434738983581324510055210263910208721850322006560100200160000320000200400300800000100858102245118020110099100100800008000010016000004010231016006200016006224000510911611101550800000016000032000080100102270101461101403101173100410
48020410000779600000330143052010155716162217663925601152801003593501600608010032000016000047593147141705604755102310100575101576212033214855601002001600003200002004000008000001012931004531180201100991001008000080000100160000001063501600020021600020000511011711101113800002016000032000080100102503102061101032101892101961
4802041003387890000012300402680101696161621086790256019538010036218016000080100320000160000475574457944655623861014461038871018082194032074556052320016000032000020040000080000010188310202411802011009910010080000800001001600000401167901600020021600022400051091611110129380000001600003200008010010103410250110251299680100980
48020410193981700000030042236010172016162123684225604476801003624631600008010032000016000047581847092015916486101155100997101794213031520275560100200160000320000200400000800000100714101541118020110099100100800008000010016000004010690016000200216000224000510912511101183800600016000032000080100101470101909101154104945100587
48020410192778400000070144138010241016021226885256023868010036401016000080100320000160000475835467014956690651007851016741004782178932139556010020016000032000020040000080000010184310137811802011009910010080000800001001600000401056501600020021600022400051221171110038380000001600003200008010010271210127099026102670100949

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.2671

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f222324373f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4800271016527880000006010040587100706161619746477256045168001035899616000080069320472160216475264467095656455380001012460101170102705213143202785600102016000032024020400000800000100084101201118002110910108000080000101600000010418316000210016000223205019317331012948000001600003200008001099733102046100258101645102189
480024102220793000100180100411781022661616216864872559836780010363070160000800103200001600004755984696842572547700010127101008051016972230031913356001020160000320000204000008000001022921002851180021109101080000800001016000003298400160002108160002232050193163310109480000016000032000080010101510101315101121102367100228
4800241017347910000001221004075499777016214259962560134580010362383160000800773200001600004752024656906556126800010199601013551000432180732129356001020160000320000204000008006001012781014511180021109101080000800001016000003210723016000200816000223205019317331014058000001600003200008001010164499658101406102092101498
4800241006797880000000210042422101617016221353762559995380010364908160000800103200001600004752314707518570596200010187101014551023192138932041856001020160000320000204000008000001019361018151180021109101080000800001016000003210019016000210216000223205019317339776180000016000032000080010102088100345100268101267102212
48002410174079200000005100404699989116162623661425601860800103591191600008001032000016000047538147081645714091000100169010108110144021819318382560010201600003200002040000080000010150810064911800211091010800008000010160000009984016000200216000223205019316331013088005901600003200008001010257510108210101598854101276
4800241021067880000000210041911101217161623757443256009448001036217516000080010320000160108475277471913755992090009943801001671016532181032240256001020160000320000204000008000001004599971711800211091010800008000010160000032104570160000100160000234050193163310090580060016000032000080010100974100333102101101684101056
480024100981791000110132961004179010070516162263714585598447801873606241601208006932094416010847551246979085609551000100189010226510016622890262115956121920160240320480204006008006001004591013124180021109101080000800001016018203212555016114000212516006223205046444331015398005901600003200008001010065910207810238510161499632
4800241019587760001000510037793101290161623945868256021718001036255516000080010320000160000475231474639756185180001009550102193101985197593209805600102016000032000020400000800000100978102647118002110910108000080000101600000010942016000200816000203205020317331013968000001600003200008001010063310184610167710267999983
480024102420786000000021004332110054316162056621925602052800103610031600008001032000016000047590045848705687077100100807010230710236121449319873560010201600003200002040000080000010050610105711800211091010800008000010160000032116330160002002160002232050203163310010880000016000032000080010101517100046101099100815100669
48002499885791000000021004278210192516161862722525599075800103612271600008001032000016000047507546916855838654010101134010094010261422855320517560010201600003200002040000080000010216910066611800211091010800008000010160000036103910160002108160002232050193173310192780000016000032000080010101365100484100997100943102151