Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STXRH

Test 1: uops

Code:

  stxrh w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 1.000

Issues: 1.000

Integer unit issues: 0.001

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)simd uops in schedulers (5a)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)
7148340191143728811492451000400010001000200011000
710043389410011100001000400010001000200011000
710043428410011100001000400310001000200011000
710043433810011100001000400010001000200011000
710043406510011100001000400010001000200011000
710043417410011100001000400010001000200011000
710043408810011100001000400010001000200011000
710043409210011100001000400010001000200011000
710043395710011100001000400010001000200011000
710043391810011100001000400010001000200011000

Test 2: throughput

Code:

  stxrh w0, w1, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.1099

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
20206214572019910163100361016510005355172217082011010205100051020520010100041000010100
20204210702010310103100001010410034358092244842016810234100341020420008100031000010100
20204212112010410104100001010310004354732228032010810204100041020520010100041000010100
20204211092010310103100001010410004354732234862010810204100041020420008100031000010100
20204210902010310103100001010410004354732242622010810204100041020420008100031000010100
20204210612010310103100001010410004354732227472010810204100041020420008100031000010100
20204211032010310103100001010410034357942234822017010236100341020420008100031000010100
20204210692010310103100001010410004354732224872010810204100041020420008100031000010100
20204210702010310103100001010410004354732227632010810204100041020420008100031000010100
20204210802010310103100001010410004354732230172010810204100041020420008100031000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 2.1074

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
20026213552010610070100361007310004352642231912001810024100041002020000100011000010010
20024210702001110011100001001010000352592229562001010020100001002020000100011000010010
20024210132001110011100001001010000352592221642001010020100001002020000100011000010010
20024210552001110011100001001010000352592218712001010020100001002020000100011000010010
20024210342001110011100001001010000352592223712001010020100001002020000100011000010010
20024210042001110011100001001010000352592217202001010020100001002020000100011000010010
20024210012001110011100001001010000352592220392001010020100001002020000100011000010010
20024210442001110011100001001010000352592229602001010020100001002020000100011000010010
20024210772001110011100001001010034355882240702007810054100341002020000100011000010010
20024209982001110011100001001010000352592224212001010020100001002020000100011000010010

Test 3: throughput

Code:

  stxrh w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0047

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
1020520158101191011001810010000300528855101002001000420020008110000100
1020430047101011011000010010000300529053101002001000420020008110000100
1020430055101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0047

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
100252015410029110100181001000030528855100102010000202000011000010
100243004710011110100001001000030528855100102010000202000011000010
100243004710011110100001001000030528855100102010000202000011000010
100243004710011110100001001000030528855100102010000202000011000010
100243004710011110100001001000030528855100102010000202000011000010
100243004710011110100001001000030528855100102010000202000011000010
100243004710011110100001001000030528855100102010000202000011000010
100243004710011110100001001000030528855100102010000202000011000010
100243004710011110100001001000030528855100102010000202000011000010
100243004710011110100001001000030528855100102010000202000011000010