Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (multiple, post-index, 2S)

Test 1: uops

Code:

  st3 { v0.2s, v1.2s, v2.2s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e1f223a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
64007293902203110110204621290930018168500010002000200010002000200050002161016000821769289812929931050002000200050006000292532921511610011000100020042602002002200026231269690366824304104720267307638091251432844010001647013331146062000200010002924229326293232925529289
64004293162190130100304517290480018132500010002000200010002000200050002161616000321768288542923531050002000200050006000292472921911610011000100020032622002012200026221283190606862303804420251303138061143462850210001658813516146122000200010002926029253292522928729206
64004293032200111316314573290522218169500010002000200010002000200050002161916000121759289482919131050002000200050056006292052933511610011000100020032602002015200026201298191616883302004420319304538121050432843410001645513376146302000200010002919829293292252929529265
6400429230227011111031454529023011811250001000200020001000200020005000216161600042178828971291413105000200020005000600029141292541161001100010002004382200301220003622128148932684330581502026330653811951452846510001640913512147272000200010002923629333293012929929308
64004292172190101100304535290910118177500010002000200010002000200050002162816000121769289972936631050002000200050006000292862917211610011000100020022402003103200136221295891246881307104820317305938141149452850310001651913294147282000200010002928429321292612923429256
6400429269219011021030458129032001819050001000200020001000200020005000216201600012177528898293913105000200020005000600029211292841161001100010002003261200303620013421128889320682031900462023430313813746472846710001653613320147242000200010002927129285293032927929313
64004293082190101100404526291592018157500010002000200010002000200050002161416000121804289902933631050002000200050006000291712917211610011000100020022422002012200026201280991586825307314120311305138091048422854110001637013279146312000200010002926029341292252926329229
6400429273219012100041464529130211817550001000200020001000200020005000216161600052182428993291983105000200020005000600029209291681161001100010002002461200301220012421127749078686330520442033030443817446462850410001647013289146162000200010002928829319293042927329211
6400429379220011110615314565290491018148500010002000200010002000200050002161516000321752289542926831050002000200050006000292312923011610011000100020033612003002200024211281891906853302914120198304438121243432843010001654713150142422000200010002928829247292412921929203
640042927721901111054631453429123111820650001000200020001000200020005000216201600012173128953292733105000200020005000600029231291941161001100010002003342200210220002421128549194685930431462025130903809644442853710001653913319146212000200010002925929258292462932429240

Test 2: throughput

Count: 8

Code:

  st3 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st3 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st3 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st3 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st3 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st3 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st3 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st3 { v0.2s, v1.2s, v2.2s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f22373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32020780053620000000302718800301600254036708015916719416000080100160000160000480499207956112956748002380045800450335400100200160000160000200400000480000800448021311802011009910010080000800001001600000400016000000216006324000510901171180042800000160000160000801008004680045800458004680047
32020480046621000110305494803641616907240237380218164092160000801591601181602164812032172330129516580180802138021257744168440038520016012016012020040030048036080214805482180201100991001008000080000100160060040125016012000276716012204000512201352282665800590160000160000801008038080218803788021380215
32020480377623100121329104815803651616181954031938016116437016006080218160236160108481203219830512958498002380045800440328400100200160000160000200400000480000800448004511802011009910010080000800001001600000400016000200016000224000510901171180041800000160000160000801008005580046800458005380047
320204800456200000003023888003716160254038868010016392716000080100160000160000480499207875113030608002380044800450326400100200160000160000200400000480000800548004511802011009910010080000800001001600000400016000200216000224000510901171180042800000160000160000801008004580045800458004680046
320204800456210000003051498002916160254060368010016358316000080100160000160000480499207924412946878002380045800440326400100200160000160000200400000480000800448004411802011009910010080000800001001600000400016000200516000224000510901171180048800000160000160000801008004580046800468004680045
320204800456201000003041688002916160254039158010016371916000080100160000160000480499207939312937458002480044800450327400100200160000160000200400000480000800448004511802011009910010080000800001001600000400016000220216000224000510901171180041800000160000160000801008004680045800478004580046
320204800456200000003139798003016160254046858010016432916000080100160000160000480499223640612935558002480045800450326400100200160000160000200400000480000800458004511802011009910010080000800001001600000400016000200216000224000510901171180042800000160000160000801008004680046800458004680046
320204800456200000003059308003016160254066698010016334216000080100160000160000480499215641712995468002380046800450336400100200160000160000200400000480000800528004511802011009910010080000800001001600000400016000200216000224000510901171180041800000160000160000801008004680046800468004680045
320204800456210000012005444800311600254042488010016383416000080100160000160000480499207891812996868002580045800440336400100200160000160000200400000480000800448004511802011009910010080000800001001600000400016000200216000204000510901171180042800000160000160000801008126080046800498004580046
3202048004562100000030625180107161602540470080100162882160000801001600001600004804992079252129394780023800448004303109400100200160000160000200400000480000800448004411802011009910010080000800001001600000400016000210516000224000510901171180042800000160000160000801008004780045800468004680045

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320027800576201110100126190045843800341616025406513800101670481600008001016000016000048004921620741298611008002480045800440336400010201600001600002040000048000080224800451180021109010108000080000101600000400016000200216000224005021717101080042800000160000160000800108004580046800468004580045
320024800446200000000630039031800301616025405823800101646511600008001016000016000048004920790221298254008002280044800450327400010201600001600002040000048000080044800541180021109010108000080000101600000400016000200216000224005021111710880041800000160000160000800108004680046800468004580046
32002480043621000010003005080180030160025405062800691647591600688001016000016000048004921582561293909008002480046800450327400010201600001600002040000048000080045802151180021109010108000080000101600000400016000000216000024005021101711980042800000160000160000800108004680046800478004680046
32002480046621000000003003941180030016025405064800101655121600008001016000016000048004923145991303565008002480046800450327400010201600001600002040000048000080045800441180021109010108000080000101600000400016000200016000224005021917101080383800000160000160000800108004680046800468004680046
320024800446200000000091006478180031016025403986800101661021600008001016000016000048004922335841302563008002380045800450327400010201601201600002040000048000080045800451180021109010108000080000101600000420016000200216000224005021101781080041800000160000160000800108021180046800468004680045
32002480045620000000003004440180199160025406261800101637821600008001016011816000048004920784101294846008002580043825422386326400010201600001600002040000048000080044800451180021109010108000080000101600000400016000200216000224005021717101080042800000160000160000800108004980046800468004680045
320024800446210000000030037051800301616025406233800101652871600008001016000016000048004919197791298809008002380045800450336400010201600001600002040000048000080046800461180021109010108000080000101600000400016000200927160002040050211017111180042800000160000160000800108004580046801838004680045
3200248004562000000001530042091800301616025402555800691671941600008001016000016000048004921562681296880008002480045800440330400010201600001600002040000048000080213800451180021109010108000080000101600000400016000200516000224005021111710880041800000160000160000800108004580045800458004680046
32002480046620000000003004431180029161602540393780010164556160000800101600001600004800492095389130653300800248004580045032840001020160000160000204000004800008004480044118002110901010800008000010160000040001600020021600022400502181710880041800590160000160000800108004780046800458004580055
320024802126200000000000040441800301616025407049800101651791600008001016000016000048004920784221294480008002480045800450326400010201600001600002040000048000080044800451180021109010108000080000101600000400016000000216000224005021101771180042800000160000160000800108004680045800478004580045