Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STNP (Q)

Test 1: uops

Code:

  stnp q0, q1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)0305080b1e1f3d3f464951inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0e5ld/st retires (ed)f5f6f7f8fd
90051165911101441150242425200020002000103651140116511653232000200040001165116511800110001000201515200012014011420001420001405154165411622014200011661166116611661166
90041165811101441150252425200020002000104061140116511653232000200040001165116511800110001000201415200012014011420001420001415135165411622014200011661166116611661166
90041165910001441150242625200020002000103681140116511653232000200040001165116511800110001000201416200002014011420001420001425144164311622014200011661166116611661166
90041165811001441150242425200020002000104551140116511653232000200040001165116511800110001000201415200012014011420001420001405145474511622014200011661166116611661166
90041165811001441150262825200020002000104061140116511653232000200040001165116511800110001000201414200022014011420001420001405136163411622014200011661166116611661166
90041165911191441150242425200020002000103591140116511653232000200040001165116511800110001000201416200022014011420001420001415133163411622014200011661166116611661166
90041165810031441150252425200020002000103591140116511653232000200040001165116511800110001000201616200012014011420001420001415144164311622014200011661166116611661166
900411659110121441150242425200020002000103591140116511653232000200040001165116511800110001000201514200002014011420001420001405133163411622014200011661166116611661166
90041165911001441150242425200020002000104061140116511653232000200040001165116511800110001000201414200022014001520001420001415156164311622014200011661166116611661166
900411658101121441150252425200020002000103591140116511653232000200040001165116511800110001000201614200002014001420001420001405156164511622014200011661166116611661166

Test 2: throughput

Count: 8

Code:

  stnp q0, q1, [x6]
  stnp q0, q1, [x6]
  stnp q0, q1, [x6]
  stnp q0, q1, [x6]
  stnp q0, q1, [x6]
  stnp q0, q1, [x6]
  stnp q0, q1, [x6]
  stnp q0, q1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0376

retire (01)cycle (02)030508090b18191e1f3d3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0e5? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
160205830166221101000142978299823762376296925160100100160017100160006500382208608298883013830136293076296516010620016001620032003283013830131180201100991001008000080000100160014016000017740160000000160000141600001401115117116008300716001401600001008301183016830208301683017
16020483016622100100015297829982376237629672516010010016000010016000650038220861829878301083013629327629651601062001600162003200328301183013118020110099100100800008000010016001416160000177801600140014160000141600001401115117016008301016001401600001008301883017830148301483011
16020483010622111100014297829982376237626702516010010016000010016000050038469261829888301383013629263629711601002001600002003200008301383015118020110099100100800008000010016001416160000177811600140014160000141600001400005110116118301016001401600001008301483014830208301683014
16020483013622110000014297830002376237626692516010010016000010016000050038220601829908301383011629233629681601002001600002003200008301583011118020110099100100800008000010016001415160000177601600140114160000141600001400005110116118301016001401600001008301783014830168301283014
16020483013622111100014297829982376237626692516010010016000010016000050038220601829918301383010629243629681601002001600002003200008301583011118020110099100100800008000010016001416160016148011600140014160000141600001430005110116118300716001401600001008301483014830168301483014
16020483013622100100014297829952376237629662516010010016000010016000050038220601829868301383010629253629691601002001600002003200008301583012118020110099100100800008000010016001416160000177601600140014160000141600001410005110116118301316001401600001008302283018830148301183014
1602048301362211120103663068299823692376300425160109100160000100160000500382206018298883013830136292611629711601002001600002003200008301383013118020110099100100800008000010016001417160000177801600170014160000141600001400005110116118301016001401600001008301683014830208301883014
16020483013622101100014297829982376237626692516010010016000010016000050038220601829888301383016629233629691601002001600002003200008301383010118020110099100100800008000010016001416160000177801600140114160000141600001430005110116118301016002801600001008397683014849108301983014
1602048401462211010001429782984237623763074304160100100160000100160000500382138418297382998829996291236295716010020016000020032000082999829981180201100991001008000080000100160014141600001776116001401141600000160000000005110116118299616000001600001008301483012830168301683012
1602048301162210010001429782984237623762958251601001001600001001600005003821384082974829998299862911362956160100200160000200320000829988299911802011009910010080000800001001600140160008177401600000001600000160000000005110116118299616000001600001008301483003830028299983000

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0375

retire (01)cycle (02)0308181e3d3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a4a6a7a8a9acafbcdcache store miss (c0)c2branch mispredict (cb)cfd0d2d5d6ddinst fetch restart (de)e0e5ecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
1600258299862200029782984237623762663251600101016000010160000503821384182973082998829996293436297816001020160000203200008299882999118002110910108000080000101600001600001774016000000016000001600000050200011611829951600000160000108299983000829998300082999
1600248299862100029782983236823762663251600101016000010160000503821336182974082999829986293336297816001020160000203200008299882999118002110910108000080000101600001600001479016000000016000001600000050200211611829951600000160000108300082999830008299982999
1600248299862200029782984237623762958251600101016000010160000503821384182973082998829996293436297916001020160000203200008299982998118002110910108000080000101600001600001774016000000016000001600000050200011611829961600000160000108299983000829998300082999
1600248299862200029782984237623762663251600101016000010160000503821336182974082999829986293336297816001020160000203200008299882999118002110910108000080000101600001600001774016000000016000001600000050200011611829951600000160000108300082999830008299983000
1600248299962200029782984237623762663251600101016000010160000503821336182974082999829986293336297816001020160000203200008299982998118002110910108000080000101600001600001774016000000016000001600000050200011611829961600000160000108299983000829998300082999
1600248299862200029782984237623762663251600101016000010160000503821336082974082999829986293336297816001020160000203200008299882999118002110910108000080000101600001600001479016000000016000001600000050200011611829961600000160000108300082999830008299983000
1600248299962200029782984237623762663251600101016000010160000503821336182973082999829986293336297816001020160000203200008299882999118002110910108000080000101600001600001774016000000016000001600000050200011611829961600000160000108299983000829998300082999
1600248299862100029782984237623762663251600101016000010160000503821336182974082999829986293336297816001020160000203200008299882999118002110910108000080000101600001600001774016000000016000001600000050200011611829961600000160000108299983000829998300082999
1600248299862200029782983237623762663251600101016000010160000503821336082974082999829986293336297816001020160000203200008299882999118002110910108000080000101600001600001774016000000016000001600000050200011611829961600000160000108299983000829998300082999
1600248299862200029782983237623762958251600101016000010160000503821384082973082998829996293436297916001020160000203200008299982998118002110910108000080000101600001600001479016000000016000001600000050200011611829961600000160000108299983000829998300082999