Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STNP (signed offset, S)

Test 1: uops

Code:

  stnp s0, s1, [x6, #0x10]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)0305080b1e1f3a3d3f464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd0d5d6ddinst fetch restart (de)e0e5ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
90061166900000041151131325200010001000100010001087580001145116611663242000100010002000200011661166118001100010001000010000100000010000100000513021622116310071000100011671167116711671167
90041166911107041151131325200010001000100010001087580001145116611663242000100010002000200011661166118001100010001008810001100701710007100070513021622116310071000100011671167116711671167
90041166811007041151131325200010001000100010001087580001145116611663242000100010002000200011661166118001100010001000010000100000010000100000513021622116310071000100011671167116711671167
90041166910007041151131325200010001000100010001087580001145116611663242000100010002000200011661166118001100010001000010000100000010000100000513021622116310071000100011671167116711671167
90041166911107041151131325200010001000100010001087580001145116611663242000100010002000200011661166118001100010001000010000100000010000100000513021622116310001000100011671167116711671167
90041166900000041151131325200010001000100010001087580001145116611663242000100010002000200011661166118001100010001000010000100000010000100000513021622116310001000100011671167116711671167
90041166900000041151131325200010001000100010001087580001145116611663242000100010002000200011661166118001100010001000010000100000010000100000513021622116310001000100011671167116711671167
90041166900000041151131325200010001000100010001087580001145116611663242000100010002000200011661166118001100010001007810000100701710007100072513021622116310001000100011671167116711671167
90041166900000041151131325200010001000100010001087580001145116611663242000100010002000200011661166118001100010001007810000100702710007100071513021622116310001000100011671167116711671167
90041166900000041151131325200010001000100010001087580001145116611663242000100010002000200011661166118001100010001000010000100000010000100000513021622116310071000100011671167116711671167

Test 2: throughput

Count: 8

Code:

  stnp s0, s1, [x6, #0x10]
  stnp s0, s1, [x6, #0x10]
  stnp s0, s1, [x6, #0x10]
  stnp s0, s1, [x6, #0x10]
  stnp s0, s1, [x6, #0x10]
  stnp s0, s1, [x6, #0x10]
  stnp s0, s1, [x6, #0x10]
  stnp s0, s1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5189

retire (01)cycle (02)0305080b191e1f373d3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0e5ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602064152131110116541454114841508118411841325251607291008062980000100800008000050019106086488041414954152341523214343214871601002008000080000200160000160000415204152011802011009910010080000800001008001514800008820800140114800001480000140511011611415208001480000800001004152441524415214152441524
16020441523310100001439391484150811841184147325162850100830038000010080000800005001910464649883141498415204152021436321483160100200800008000020016000016000041523415231180201100991001008000080000100800000800007350800000008000008000000511011611415098000080000800001004151341513415134151341513
1602044151231100000035431484149711841184146625162926100828268000010080000800005001910080648489141488415134151321426321487160100200800008000020016000016000041512415121180201100991001008000080000100800000800008820800000008000008000000511011611415108000080000800001004151441514415144151441514
16020441513310000000282614841497118411841466251629261008282680000100800008000050019100806484891414884151341513214262321487160100200800008000020016000016000041512415121180201100991001008000080000100800000800007350800000008000008000000511011611415098000080000800001004151341513415134151341513
1602044151231000000027731484149811841184132025163629100807048000010080000800005001910128648331141488415134151321426321479160100200800008000020016000016000041512415121180201100991001008000080000100800000800008821800000008000008000000511011611415108000080000800001004151341513415134151341513
1602044151231100000027731484149711841184146625162843100828278000010080000800005001910080650653041487415124151221425321495160100200800008000020016000016000041512415121180201100991001008000080000100800000800008820800000008000008000000511011611415108000080000800001004151441514415144151441514
160204415133110000007051484149711841184146625163486100833868000010080000800005001910080650175141487415124151221425321495160100200800008000020016000016000041513415131180201100991001008000080000100800000800007350800003008000008000000511011611415098000080000800001004151341513415134151341513
1602044151231100000027731484149811841184132025163629100835298000010080000800005001910128648530141488415134151221425321482160100200800008000020016000016000041512415121180201100991001008000080000100800000800008820800001008000008000000511011611415098000080000800001004151341513415134151341513
1602044151231100000027651484149811841184132025162873100827738000010080000800005001910128650046141488415134151321426321483160100200800008000020016000016000041513415131180201100991001008000080000100800000800008820800000008000008000000511011611415108000080000800001004151441514415144151441514
16020441513311000000338214841497118411841325251634041008330280000100800008000050019105366506911414984152241522214363214851601002008000080000200160000160000415264152611802011009910010080000800001008001615800008822800140014800001480000141511011611415198001480000800001004152441521415244152441522

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5189

retire (01)cycle (02)030a18191e373d3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a4a7a8acafdcache store miss (c0)c2cfd0d2icache miss (d3)d5d6ddinst fetch restart (de)e0e5ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160026415233110000272814841497118411841466251622321082740800001080000800005019100806466660041488415134151221448321496160010208000080000201600001600004151241513118002110910108000080000108000080000882800000080000800000502050012169641510800008000080000104151341514415134151441513
16002441512311000030721484149811841184132025161118108149080000108000080000501910080650644004148741512415132144932149316001020800008000020160000160000415134151211800211091010800008000010800008000088280000708000080000050205309169741510800008000080000104151441513415144151341514
16002441513311000021131484149711841184146625163244108323480000108000080000501910128646340004148741512415132144932149216001020800008000020160000160000415124151311800211091010800008000010800008000088280000008000080000050205309169741509800008000080000104151441513415144151341514
160024415133110000323414841498118411841320251621231082113800001080000800005019100806497030041488415134151221448321493160010208000080000201600001600004151341513118002110910108000080000108000080000882800000080000800000502000012167941509800008000080000104151341514415134151441513
16002441512311000026901484149711841184132025162118108211380000108000080000501910080648381004148841513415122144832150316001020800008000020160000160000415134151211800211091010800008000010800008000073580000008000080000050205409167841509800008000080000104151441513415144151341514
160024415133110000211314841497118411841466251632441083234800001080000800005019101286463400041487415124151321449321503160010208000080000201600001600004151341512118002110910108000080000108000080000735800000080000800000502000010167941510800008000080000104151341513415144151341514
1600244151331100003234148414981184118413202516308710821088000010800008000050191598464618210414874151341512214483215001600102080000800002016000016000041513415121180021109101080000800001080000800007358000000800008000005020540111613941509800008000080000104151441513415144151341514
16002441513311000021131484149711841184146625163244108323480000108000080000501910128646340104148741512415132144932149316001020800008000020160000160000415134151211800211091010800008000010800008000073580000008000080000050200001016101341510800008000080000104151341514415134151441513
1600244151231100003234148414981184118413202516212310821138000010800008000050191008064970310414884151341512214483214921600102080000800002016000016000041512415131180021109101080000800001080000800008828000000800008000005020000916101141510800008000080000104151341514415134151441513
1600244151231100002108148414971184118414662516324410832348000010800008000050191012864807600414884151341512214483214931600102080000800002016000016000041513415121180021109101080000800001080000800007358000000800008000005020500916101041509800008000080000104151441513415144151341514