Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STNP (signed offset, D)

Test 1: uops

Code:

  stnp d0, d1, [x6, #0x10]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)0305080b1e1f3d3f464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0e5ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
900611668000004115115162520001000100010001000108758000114511661166324200010001000200020001166116611800110001000100001000010000001000010000051211611116310071000100011671167116711671167
900411669111004115115162520001000100010001000108758000114511661166324200010001000200020001166116611800110001000100001000010000001000010000051211611116310071000100011671167116711671167
900411668101904115115162520001000100010001000108758000114511661166324200010001000200020001166116611800110001000100971000010070071000710007151221611116310001000100011671167116711671167
900411669000074115116162520001000100010001000108788000114511661166324200010001000200020001166116611800110001000100001000010000001000010000051211611116310071000100011671167116711671167
900411669111004115115162520001000100010001000108758000114511661166324200010001000200020001166116611800110001000100001000010000001000010000051211611116310001000100011671167116711671167
900411668000004115115162520001000100010001000108758000114511661166324200010001000200020001166116611800110001000100001000010000001000010000051211611116310001000100011671167116711671167
900411669000904115115162520001000100010001000108758000114511661166324200010001000200020001166116611800110001000100001000010000001000010000051221611116310001000100011671167116711671167
9004116690001204115115162520001000100010001000108758000114511661166324200010001000200020001166116611800110001000100981000210070171000710007051211611116310001000100011671167116711671167
900411668000374115116162520001000100010001000108788000114511661166324200010001000200020001166116611800110001000100001000010000001000010000051211611116310001000100011671167116711671167
900411669000004115115162520001000100010001000108758000114511661166324200010001000200020001166116611800110001000100881000210075071000710007051211611116310001000100011671167116711671167

Test 2: throughput

Count: 8

Code:

  stnp d0, d1, [x6, #0x10]
  stnp d0, d1, [x6, #0x10]
  stnp d0, d1, [x6, #0x10]
  stnp d0, d1, [x6, #0x10]
  stnp d0, d1, [x6, #0x10]
  stnp d0, d1, [x6, #0x10]
  stnp d0, d1, [x6, #0x10]
  stnp d0, d1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5189

retire (01)cycle (02)03090b18191e1f373a3d3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafdcache store miss (c0)c2cfd0d2d5d6ddinst fetch restart (de)e0e5? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602064151231100001080268301484149711841184146625162340100822428000010080000800005001910080646733044148804151341512214253234721601002008000080000200160000160000415124151311802011009910010080000800001008000008000073508000000080000800000511000116114151080000080000800001004151341514415134151441513
160204415123100000630344001484149711841184146625162342100822408000010080000800005001910128647564104148704151341512214253214701601002008000080000200160000160000415124151311802011009910010080000800001008000008000073508000000080000800000511000116114151080000080000800001004151341514415134151441513
160204415123110000960343901484149711841184146625162342100821978000010080000800005001910080648389144148804151341512214253214701601002008000080000200160000160000415134151211802011009910010080000800001008000008000073508000000080000800000511044116114151080000080000800001004151341514415134151441513
1602044151231100001050344101484149711841184146625162342100822408000010080000800005001910128650338144148704151241513214263214701601002008000080000200160000160000415124151311802011009910010080000800001008000008000088208000000080000800000511044116114150980000080000800001004151441513415144151341514
160204415133110000960265701484149711681184146625162342100822428000010080000800005001910128650336004148804151341512214253214701601002008000080000200160000160000415124151311802011009910010080000800001008000008000088208000000080000800000511000116114150980000080000800001004151441513415144151341514
1602044151331000009902242014841498118411841320251627581008226680000100800008000050019100806483891441488041513415122142529214711601002008000080000200160000160000415134151211802011009910010080000800001008000008000073508000000080000800000511040116114151080000080000800001004151341514415134151441513
16020441512311000000343901484149711841184146625163276100827938000010080000800005001910128650338004149004151341512214253214701601002008000080000200160000160000415124151311802011009910010080000800001008000008000088208000000080060800000511040116114151080000080000800001004151441513415144151341514
160204415133110000420224001484149811841184132025163539100834398000010080000800005001910080646733004148804151341512214263214701601002008000080000200160000160000415124151311802011009910010080000800001008000008000088208000000080000800000511000116114150980000080000800001004151441513415144151341514
1602044151331100001170224001484149811841184132025163539100834408000010080000800005001910080646727144148804151341512214253214711601002008000080000200160000160000415134151211802011009910010080000800001008000008000073508000000080000800000511040116114150980000080000800001004151441513415144151341514
1602044151331100001020343901484149811841184132025163539100834398000010080000800005001910080646733004148804151241513214263214711601002008000080000200160000160000415134151211802011009910010080000800001008000008000073508006000080000800000511040116114151080000080000800001004151441513415144151341514

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5189

retire (01)cycle (02)0305080b1e1f373d3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd2d5d6d9ddinst fetch restart (de)e0e5ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002641523311111014134614841508118411841325251606241080537800001080000800005019106086471740414954152341523214613215001600102080000800002016000016000041523415231180021109101080000800001080015148000088208001401148000014800001425020061605541510800008000080000104151341514415134151441513
1600244151231100010503851484149811841184132025160482108047280000108000080000501910080641155041487415124151321449321493160010208014480000201600001600004151341512118002110910108000080000108000008000073508000000080000080000005020071604641509800008000080000104151441513415144151341514
160024415133110007205971484149711841184146625160395108038680000108000080000501910128641416041487415124151321449321493160010208000080000201600001600004151341512118002110910108000080000108000008000073508000000080000080000005020041606641510800008000080000104151341514415134151441513
16002441512311000960505148414981184118413202516048210804728000010800008000050191008064115514148741512415132144932149216001020800008000020160000160000415124151311800211091010800008000010800000800008820800000008000014800001415020051604641522800148000080000104152441521415244152441524
160024415233111011021413511484150811841184146625160480108050580000108000080000501910128641792041487415124151321449321492160010208000080000201600001600004151241513118002110910108000080000108000008000073508000000080000080000005020051605641509800008000080000104151441513415144151341514
160024415133110002426459714841497118411841466251605151080504800001080000800005019101286417910416584151241513214493214931600102080000800002016000016053641513415121180021109101080000800001080000080000735080000101510680000080000005020061606641520800148000080000104152341524415244152141523
1600244152531111155214324914841507118411841473251605301080489800001080000800005019106086491500414974152341523214563215001600102080000800002016000016000041523415231180021109101080000800001080015158000088208001413148000014800001415020161606541520800148000080000104152441524415264152341524
160024415203111003121442201484150811841184147225160528108304680000108000080000501910512645230041498415204152021459321493160010208000080000201600001600004151341512118002110910108000080000108000008000073508000000080000080000005020051605441510800008000080000104151341514415134151441513
16002441512310001141028281484149811841184132025160607108059780000108000080000501910080641515041488415124151521449321492160010208000080000201600001600004151241513118002110910108000080000108000008000088208000000080000080000005020051605541509800008000080000104151441513415144151341514
160024415133110001110282814841498118411841320251606071080597800001080000800005019100806415150414874151241513214493214931600102080000800002016000016000041513415121180021109101080000800001080000080000735080060002780000080000005020051604541509800008000080000104151441513415144151341514