Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STNP (S)

Test 1: uops

Code:

  stnp s0, s1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)0307080a0b18191e1f3a3d3f464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6e74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a6a7a8a9acafdcache store miss (c0)c2cficache miss (d3)d5d6ddinst fetch restart (de)e0e5ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
90061465130000121320051151111531320001003107610761038120158000113191390116611324200010001038200020001189137411800110001000100001000010381961039100045150114798116310381000100011671167151211902040
90041522150100006000411511313252076107610761000100011987830411145162611900332920961038103920462000116615352180011000100010000103901000000103810002547093289116310381000100011921866146215061466
900411969000010132176041484139595200010001000100010001087580001129614261166013369215210381000207620001166140321800110001000100001076010000022910001000051509161110122910381000100011671167116711901167
9004150512001000000411511313252076100010001039100011975860811145153011660325207610001000200020001429116611800110001000100001000210001061038100005150923119116310001000100011671167116711671192
90041445900001098814115114112520001000100010381000108758000111451166116603261200010001000207820001539116611800110001000104001000210380422010381000454801038119127310001000100011901167116711671440
90041166130100001710041151131325200010001000100010001087580001114511661166032420001000100020002000116611661180011000100010000100001000100100010000515081688116310001000100011671167116711671167
90041166900001000041151131125200010001000100010001087583041114511661166032420001000100020002000116611661180011000100010382100001000000103810000515082589125110001000100011671167155818801595
90041938900000013200411511313252078100010391000106510875861611170116611890324200010381039200020001166148211800110001000100001000010380021100010000575094689118610641000100014581167116715861577
900411661101010126400511741213393200010001000100010001087583120114511661166032420001000100020002000116611661180011000100010000100001038003100010000515091689133210101000100011671191116711671908
9004149516100000900416091511252000100010381038100011976831201234116614970132420761038103920002000116615122180011000100010420103721038003100010000515071688116310001000100014911560156011901167

Test 2: throughput

Count: 8

Code:

  stnp s0, s1, [x6]
  stnp s0, s1, [x6]
  stnp s0, s1, [x6]
  stnp s0, s1, [x6]
  stnp s0, s1, [x6]
  stnp s0, s1, [x6]
  stnp s0, s1, [x6]
  stnp s0, s1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5189

retire (01)cycle (02)03080b18191e1f373d3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0e5? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160206414923110000002811146414781168116814462516296110082800800001008000080000500190916864860204146742067414932140632145016010020080000800002001600001600004149341492118020110099100100800008000010080000158000072508000000080000080000005110116114149080000080000800001004149341510415054149341494
160204414933100000002862146414771168116813022516296210082862800001008000080000500190912064851004146841493416302140632145116010020080000800002001600001600004149341492118020110099100100800008000010080000148000072508000000080000080000005110116114148980000080000800001004149441501415064150741493
160204414923110000002861146414771168116813022516296110082861800001008000080000500190912064851414146741492414932140632145116010020080000800002001600001600004149341492118020110099100100800008000010080000148000087008000000080000080000005110116114148980000080000800001004149441504415044149441619
160204414933110000002818146414781168116814502516337110082259800001008000080000500190916864860204146741492414932140632145116010020080000800002001600001600004149341492118020110099100100800008000010080000148000087008000000080000080000005110116114148980000080000800001004149441504415064150741494
16020441493311000000286214641477116811681446154916293110083392800001008000080000500190916864860214146741492414932140632145116010020080000800002001600001600004149341492118020110099100100800008000010080005148000072508000000080000080000005110116114149080000080000800001004149341508415064149641493
160204414923110000002832146414781168116813022516352110082862800001008000080000500190912064851404146841492414932140632145116010020080000800002001600001600004149341492118020110099100100800008000010080000148000072508000000080000080000005110116114149080000080000800001004149341504415034149441493
160204414923110000002832146414781168116814462516293110082832800001008000080000500190916864860204146741492414932140632145116010020080000800002001600001600004149341492118020110099100100800008000010080000148000087008000040080000080000005110116114148980000080000800001004149341512415044149341494
160204414933100000002832146414781168116813032516290010083421800001008000080000500190912064851404146841493414922140532145016010020080000800002001600001600004149241493118020110099100100800008000010080000148000072508000000080000080000005110116114149080000080000800001004149341504415064149641493
16020441492310000000957146414771168116813042516296210082767800001008000080000500190912064845704146841493414922140532145016010020080000800002001600001600004149241493118020110099100100800008000010080000148000087008000010080000080000005110116114149080000080000800001004149341503415234149341494
160204414933210000002832146414781168116813032516296210082835800001008000080000500190916864859704146741492414932140632145116010020080000800002001600001600004149341492118020110099100100800008000010080000148000087008000050080000080000005110116114149080000080000800001004149341506415044149541494

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5189

retire (01)cycle (02)03080b18191e373d3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafdcache store miss (c0)cfd0d5d6d8dbddinst fetch restart (de)e0e5ebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002641492317000114433261464147811641168185725162673108290380000108000080000501909168650466004159542288414932142915214721600102080000801402016028816000042884414932180021109101080000800001080014480012870080017000800008000050340434256034414908000008000080000104149541494414954235441869
160024414923110000029031464147711681168130225162910108290080000108000080064501909120648379004185641493422882188032147216001020801368000020160000160256414924259811800211091010800008000010800000800338700800000008000080000502004160064414898000008000080000104149542536414934149441493
160024417673111100012971494230811621168130225162924108278280000108013280000501909120650352004146741492414932142932303816001020800008006820160000160000414934149211800211091010800008000010800000800007250800000008000080000502007160034414908000008000080000104149542569414934149442447
160024414933200010031951514264811681168144625162804108279380000108000080000501909168647990004146741492414932142932147316001020800008000020160000160000414934149211800211091010800008000010800000800007250800000008000080000502004160034414908000008000080000104149541494414934149441493
160024414923110000033301464147811431168130225162261108248880000108000080000501909120648383004146841493414922175232310116001020800008000020160000160000414934149311800211091010800008000010800000800008700800000008000080000502004160044414898000008000080000104149441493414964149344466
160024414933111000029031464147711681168145025162803108279480000108000080000501909168648711104146741492414932142932147316001020800008000020160000160000414934149211800211091010800008000010800000800008700800000008000080000502004160074414908000008000080000104149441493414944149341494
160024414933110000027931464147811681168144625162803108279380000108000080000501909168648711104146741492414932142932147316001020800008000020160000160000414934149211800211091010800008000010800000800008700800000008000080000502004160043414898000008000080000104149341493414944149341494
16002441493311000001604146434041168116819526331626731081405800001080000800005019160816483670041468414934149221428321473160010208000080000201600001600004149341492118002110910108000080000108000008000087028000000158000080000502007160044414898000008000080000104149441495414944149341494
160024414933110000621131504149911841184146625162337108276180013108000080000501910128648076004148741512415132144932149316001020800008000020160000160000415134151211800211091010800008000010800000800007350800000008000080000502007160076415108000008000080000104151441515415144151341514
160024415133110000021221484149711841184146625162746108323680000108000080000501910128646340004148741512415132144932149316001020800008000020160000160000415134151211800211091010800008000010800000800007350800000008000080000502047160076415108000008000080000104151541514415134151441513