Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STXRB

Test 1: uops

Code:

  stxrb w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 1.000

Issues: 1.000

Integer unit issues: 0.001

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)
71005602921003110021000400010001000200011000
71004343891001110001000400010001000200011000
71004338951001110001000400010001000200011000
71004345541001110001000400010001000200011000
71004342271001110001000400010001000200011000
71004339001001110001000400010001000200011000
71004339011001110001000400010001000200011000
71004338971001110001000400010001000200011000
71004338971001110001000400010001000200211000
71004339041001110001000400010001000200011000

Test 2: throughput

Code:

  stxrb w0, w1, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.1167

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
20206213422020010164100361016510005355052218472011010205100051020520010100041000010100
20204211202010310103100001010410004354972241482010810204100041020420008100031000010100
20204211672010310103100001010410004354972237012010810204100041020420008100031000010100
20204211192010310103100001010410004354972249102010810204100041020420008100031000010100
20204211712010310103100001010410004354972244292010810204100041020420008100031000010100
20204211262010310103100001010410004354972238942010810204100041020420008100031000010100
20204211832010310103100001010410004354972232192010810204100041020420008100031000010100
20204210152010310103100001010410004354972253492010810204100041020420008100031000010100
20204210922010310103100001010410004354972223082010810204100041020420008100031000010100
20204211302010310103100001010410034358332248292016810234100341020420008100031000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 2.1066

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
20026214102010610070100361007310004352482226512001810024100041002020000100011000010010
20024211092001110011100001001010000352592223262001010020100001005420068100351000010010
20024216482020410132100721013110000352592228142001010020100001002020000100011000010010
20024210532001110011100001001010000352592224232001010020100001002020000100011000010010
20024210532001110011100001001010000352592222772001010020100001002020000100011000010010
20024210582001110011100001001010000352592219822001010020100001002020000100011000010010
20024210482001110011100001001010000352592218072001010020100001002020000100011000010010
20024210172001110011100001001010000352592215362001010020100001002020000100011000010010
20024210362001110011100001001010000352592221462001010020100001002020000100011000010010
20024210062001110011100001001010000352592218882001010020100001002020000100011000010010

Test 3: throughput

Code:

  stxrb w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0047

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
1020520159101191011001810010000300528855101002001000420020008110000100
1020530087101211031001810210000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430054101011011000010010000300529071101002001000420020008110000100
1020430076101011011000010010000300528855101002001000420020008110000100
1020430050101011011000010010000300528891101002001000420020008110000100
1020430066101011011000010010000300528927101002001000420020008110000100
1020430047101011011000010010000300528909101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0047

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
1002520162100291110018101000030528963100102010004202000011000010
1002430047100111110000101000030528945100102010000202000011000010
1002430047100111110000101000030528855100102010000202000011000010
1002430047100111110000101000030528855100102010000202000011000010
1002430047100111110000101000030528855100102010000202000011000010
1002430047100111110000101000030528855100102010000202000011000010
1002430047100111110000101000030528855100102010000202000011000010
1002430047100111110000101000030528855100102010000202000011000010
1002430047100111110000101000030528855100102010000202000011000010
1002430047100111110000101000030528855100102010000202000011000010