Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STNP (signed offset, Q)

Test 1: uops

Code:

  stnp q0, q1, [x6, #0x10]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)0305080b1e1f3a3d3f464951inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0e5ld/st retires (ed)f5f6f7f8fd
90051165800000341150262625200020002000100000114012221167323200020004000116511651180011000100020000200002000000200002000005198168811622000200011661166116611661166
90041165800000341150272625200020002000100000114012011165323200020004000116511651180011000100020000200002000000200002000005218168711622000200011661166116611661166
90041165900000341150262625200020002000100000114012031165323200020004000116511651180011000100020000200002000000200002000005196168811622000200011661166116611661166
90041165800000341150262625200020002000100000114012011165323200020004000116511651180011000100020000200002000000200002000005217168811622000200011661166116611661166
90041165800000341150262725200020002000100000114012011165323200020004000116511651180011000100020000200002000000200002000005218168711622000200011661166116611661166
90041165800000341150262625200020002000100000114012011165323200020004000116511651180011000100020000200002000000200002000005217168811622000200011661166116611661166
90041165900000341150262725200020002000100000114012041165323200020004000116511651180011000100020000200002000000200002000005217168811622000200011661166116611661166
90041165800000341150262625200020002000100000114011651165323200020004000116511651180011000100020000200002000000200002000005216167711622000200011661166116611661166
90041165800000341150272625200020002000100000114012021165323200020004000116511651180011000100020000200002000000200002000005198168811622000200011661166116611661166
90041165900000341150262625200020002000100000114012011165323200020004000116511651180011000100020000200002000000200002000015217167611622000200011661166116611661166

Test 2: throughput

Count: 8

Code:

  stnp q0, q1, [x6, #0x10]
  stnp q0, q1, [x6, #0x10]
  stnp q0, q1, [x6, #0x10]
  stnp q0, q1, [x6, #0x10]
  stnp q0, q1, [x6, #0x10]
  stnp q0, q1, [x6, #0x10]
  stnp q0, q1, [x6, #0x10]
  stnp q0, q1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0369

retire (01)cycle (02)03080b18191e1f3d3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafdcache store miss (c0)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0e5? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
160205829486220000002938293423442344261825160100100160000100160000500381898408292382948829526286136290716010020016000020032000082949829481180201100991001008000080000100160000016000014540160000000160000160000005110116118294516000001600001008295082949829508294982950
1602048294962100001802938293423442344261825160100100160000100160000500381898408292382949829496286436290716010020016000020032000082949829481180201100991001008000080000100160000016000014540160000003160000160000005110116118294616000001600001008294982950830108294982950
1602048294962100000029382933234423442908251601001001600001001600005003818984083079838858294962864116386816040620016000020032000082949829481180201100991001008000080000100160000016000017440160000103160000160000005110116118294616000001600001008294982950829498295082949
1602048294862200001202938293423442344261825160100100160000100160000500382111008292382949829496439636290716010020016000020032000082949829481180201100991001008000080000100160000016000014540160000003160000160000005110116118294516000001600001008295082949829508294982950
16020482949621000056402938293323442344290825160100100160000100160000500381898418292382949829576286236290616010020016000020032000082948829491180201100991001008000080000100160000016000017440160000000160000160000005110116118294516000001600001008295082949829508294982950
1602048294962100002102938293323442344290825160100100160000100160000500381898418292382948829546286136290616010020016000020032000082948829491180201100991001008000080000100160000016000017440160000000160000160000005110116118294516000001600001008295082949829508294982950
160204829496210000002938293423442344261825160100100160000100160000500381893608292382949829556286236290716010020016000020032000082949829481180201100991001008000080000100160000016000014540160000000160000160000005110116118294616000001600001008294982950829498295082949
160204829486220000002938293323442344290825160100100160000100160000500381898418292382957829496286236290716010020016000020032000082948829491180201100991001008000080000100160000016000014540160000000160000160000005110116118294516000001600001008294982950829498295082949
160204829486210000002938293423442344261825160100100160000100160000500381893618292482948829526323736290616010020016000020032000082948829491180201100991001008000080000100160000016000017440160000000160000160000005110116118294616000001600001008294982950829498295082949
160204829486210000002938293423442344261825160100100160000100160000500381893618292482949829536286236290716010020016000020032000082949829481180201100991001008000080000100160000016000014540160000000160000160000005110116118294616000001600001008294982950829498295082949

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0375

retire (01)cycle (02)030b18191e1f3d3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a4a7a8acafdcache store miss (c0)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0e5ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
16002582998621000002978298323762376266325160010101600001016000050382138418297382998829996293436297816001020160000203200008299982998118002110910108000080000101600001600001774160000001600001600000050203163382996160000160000108299983000829998300083000
16002482999621000002978298423762376266325160010101600001016000050382133618297482999829986293336297816001020160000203200008299882999118002110910108000080000101600001600001479160000001600001600000050203163382996160000160000108299983000829998300082999
1600248299862100000297829842376237626632516001010160000101600005038213361829748299982998629333629791600102016000020320000829988299911800211091010800008000010160000160000177416000001951600001600000050203163382995160000160000108300082999830008302383000
16002482999622000002978298423762376266325160010101600001016000050382133618297482999829986293336297816001020160000203200008299882999118002110910108000080000101600001600001774160000001600001600000050203163382996160000160000108300082999830008299983000
16002482999622001002978298323762376295825160010101600001016000050382138418297382998829996293436297916001020160000203200008299982998118002110910108000080000101600001600001479160000001600001600000050203163282996160000160000108299983000829998300082999
16002482998622000002978298323762376295825160010101600001016000050382138418297482999829986293336297816001020160000203200008299882999118002110910108000080000101600091600081774160000001600001600000050203162382995160003160000108300082999830008299983000
16002482999622000002978298423762376266325160010101600001016000050382133618297382998829996293436297916001020160000203200008305882999118002110910108000080000101600001600001479160000001600001600000050203163282995160000160000108300082999830008299982999
160024829986390001830394113153224622511452017518160361111601141016186350405071818297382998829996293436297916001020160072203200008299882999118002110910108000080000101600001600001774160000001600001600000050203163382996160000160000108299983000829998300082999
16002482998622000002978298323762376295825160010101600001016000050382138408297382998829996293436297916001020160000203200008299982998118002110910108000080000101600001600001774160000001600001600000050203162382995160000160000108300082999830008299983000
16002482999621100002978298423762376266325160010101600001016000050382133608297382998829996293436297916001020160000203200008299982998118002110910108000080000101600001600001479160000001600001600000050202163382995160000160000108300082999830008299983000