Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STNP (D)

Test 1: uops

Code:

  stnp d0, d1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)0305080b191e1f3d3f464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6e74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd0d2d5d6ddinst fetch restart (de)e0e5eaebld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
900611661011100741151151625200010001000100010001087580001011451385116611338920761000103920002076152711662180011000100010071310380100701710007100070513002162211631007091000100011671167116711671167
9004140211120199541151161625200010001000100010001087880160011451166116603242000100010002000200011661166118001100010001008810000100700710007100070513002162211631007051000100011671167116711671167
900411661011000741151161625200010001000100010001087580001011451166116603242000100010002000200011661166118001100010001008910002100700710007100071513002162211631007091000100011671167116711671167
9004116611101007411511616252000100010001000100010875800000114511661166032420001000100020002000116611661180011000100010078100011007011610007100070513002162211631007061000100011671167116711671167
900411661111100741151151625200010001000100010001087580000011451166116603242000100010002000200011661166118001100010001000010000100002010000100000513002162211631000051000100011671167116711671167
900411661000006041151151625200010001000100010001087580000011451166116603242000100010382000200011661166118001100010001000010000100000010000100000513002162211631000051000100011671167116711671167
900411661100000041151151625200010001000100010001087580000011451166116603242000100010002000200011661166118001100010001000010000100000310000100000513002162211631000051000100011671167116711671167
900411661100000041151151625200010001000100010001087580000011451166116603242000100010002000200011661166118001100010001000010000100000010000100000513002162211631000051000100011671167116711671167
900411661100003041151151625200010001000100010001087580001011451166116603242000100010002000200011661166118001100010001000010000100000010000100000513002162211631000051000100011671167116711671167
900411661100000041151151625200010001000100010001087580000011451166116603242000100010002000200011661166118001100010001000010000100000010000100000513002162211631000061000100011671167116711671167

Test 2: throughput

Count: 8

Code:

  stnp d0, d1, [x6]
  stnp d0, d1, [x6]
  stnp d0, d1, [x6]
  stnp d0, d1, [x6]
  stnp d0, d1, [x6]
  stnp d0, d1, [x6]
  stnp d0, d1, [x6]
  stnp d0, d1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5189

retire (01)cycle (02)0308090b18191e1f373d3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafdcache store miss (c0)c2cfd5d6ddinst fetch restart (de)e0e5? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602064151231100000630550148414971184118414662516309610082791800001008000080000500191008064837904148741513415122142532147116010020080000800002001600001600004151341512118020110099100100800008000010080000080000882080000000800008000005110116114150980000080000800001004151341514415134151441513
1602044151231000000002791148414981156116213202516298310082970800001008000080000500191008064837904149441514415132142632147116010020080000800002001600001600004151341512118020110099100100800008000010080000080000735080000000800008000005110116114150980000080000800001004151341514415134151441513
160204415123110004030880148414981184118413202516336110084030800001008000080000500191008064837904148841512415132142632147016010020080000800002001600001600004151341512118020110099100100800008000010080000080000735080000000800008000005110116114150980000080000800001004151441513415144151341514
1602044151331100000002996148414981184118413202516307010082883800001008000080000500191008064900404148841512415132142632147116010020080000800002001600001600004151341512118020110099100100800008000010080000080000735080000000800008000005110116114150980000080000800001004151441513415144151341514
1602044151331100000003529148414981184118413202516307010082883800001008000080000500191008064900414148741513415122142532147016010020080000800002001600001600004151341512118020110099100100800008000010080000080000735080000003800008000005110116114151080000080000800001004151341514415134151441514
1602044151331100000003446148414971184118414662516309610082791800001008000080000500191012864828914148741513415122142532147016010020080000800002001600001600004151241513118020110099100100800008000010080000080000882080000000800008000005110116114151080000080000800001004151441513415144151341514
1602044151331100000002970148414981184118413202516307010082770800001008000080000500191008064837914148841512415132142632147116010020080000800002001600001600004151341512118020110099100100800008000010080000080000735080000000800008000005110116114151080000080000800001004151341514415134151441513
160204415123110000000880148414981184118413202516080410082883800001008000080000500191008064829514148841512415132142632147116010020080000800002001600001600004151241513118020110099100100800008000010080000080000735080000000800008000005110116114151080000080000800001004151341514415134151441513
1602044151231100000002791148414981184118413202516298410082218800001008000080000500191008064318304148741513415122142532147016010020080000800002001600001600004151241513118020110099100100800008000010080000080060735080000000800008000005110116114151080000080000800001004151341514415134151441513
1602044151231100000002226148414981184118413202516346310082759800001008000080000500191008064860404148841512415172142632147116010020080000800002001600001600004151341512118020110099100100800008000010080000080000882080000000800008000005110116114150980000080000800001004151441513415144151341514

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5189

retire (01)cycle (02)030508090a0b18191e1f373d3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0e5eald/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160026415223101100100014285014841510118411841473251621431080478800001080000800005019105366480040414980415204152121459321501160010208000080000201600001600004152541523118002110910108000080000108000008000073508000000080000080000005020111622415098000008000080000104151441513415144151341514
160024415133110000000003108148414981184118413202516268210826738000010800008000050191008064932404149504152341523214573215061600102080000800002016000016000041523415231180021109101080000800001080000080000735080000000800001480000141502061626415208001408000080000104152341524415244152141523
160024415253111000000014337014841508118411841325251610741081810800001080000800005019105606482621414980415234152221459321503160010208000080000201600001600004152341523118002110910108000080000108001414800008822800140014800001480000141502031636415208001408000080000104152441524415264152341524
160024415203111200100014270514841508118411841473251633091082832800001080000800005019106086414501414950415234152321457321506160010208000080000201600001600004152341523118002110910108000080000108001514800008820800140014800001480000141502021622415208001408000080000104152241524415244152141522
16002441523311110010001455514841511118411841325251627651082828800001080000800005019106086487281414980415234152321457321501160010208000080000201600001600004152541523118002110910108000080000108001415800007361800140014800001480000141502021626415238001408000080000104152441524415214152441524
16002441521311100000001426571484149711841184146625162970108296080000108000080000501910128647971141487041512415132144932149316001020800008000020160000160000415134151211800211091010800008000010800000800007350800000008000008000000502021626415098000008000080000104151441637415144151341514
16002441513311000000001429501484150811841184132025162716108234980000108000080000501910080649324141487041512415122144832149216001020800008000020160000160000415124151311800211091010800008000010800000800008820800000008000008000000502031626415108000008000080000104151341514415134151441513
1600244151231100000000031081484149811841184132025162683108267280000108000080000501910080641092041488041513415122144832149216001020800008000020160000160000415124151311800211091010800008000010800000800008820800000008000008000000502021662415098000008000080000104151441513415144151341514
1600244151331400000000030881484149711841184146625163118108310880000108000080000501910080648655041488041513415122144832149216001020800008000020160000160000415124151311800211091010800008000010800000800008820800000008000008000000502021662415108000008000080000104151441513415144151341514
1600244151331100000000027071484149711841184146625163118108310780000108000080000501910128648119041488041513415122144832149216001020800008000020160000160000415124151311800211091010800008000010800000800008820800000008000008000000502021662415108000008000080000104151341514415134151441513