Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STXR (32-bit)

Test 1: uops

Code:

  stxr w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 1.000

Issues: 1.000

Integer unit issues: 0.001

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)
71005356651003110021000400010001000200011000
71004343941001110001000400010001000200011000
71004342661001110001000400010001000200011000
71004343911001110001001400710011001200011000
71004360771001110001000400310001000200011000
71004339721001110001000400010001000200011000
71004339531001110001000400010001000200011000
71004342661001110001000400010001000200011000
71004339261001110001000400010001000200011000
71004339611001110001000400010001000200011000

Test 2: throughput

Code:

  stxr w0, w1, [x6]
  add x6, x6, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.1857

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
20207222482024710193100541019610005354972334452011010205100051020520010100041000010100
20204219112010410104100001010510004354972323472010810204100041020420008100031000010100
20204218572010310103100001010410004354972324742010810204100041020420008100031000010100
20204218542010310103100001010410004354972325422010810204100041020420008100031000010100
20204218612010310103100001010410004354972326132010810204100041020420008100031000010100
20204218562010310103100001010410004354972325752010810204100041020420008100031000010100
20204218572010310103100001010410004354972327422010810204100041020420008100031000010100
20204218662010310103100001010410034358332333042016810234100341020420008100031000010100
20204218572010310103100001010410004354972325932010810204100041020420008100031000010100
20204218602010310103100001010410004354972324862010810204100041020420008100031000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 2.1966

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
2002722313201521009810054101031000435248233907020018100241000401002020000100011000010010
2002421963200111001110000100101000035259233626020010100201000001002020000100011000010010
2002421961200111001110000100101000035259232816020010100201000001002320008100041000010010
2002421912200111001110000100101000035235233483020010100201000001005120060100321000010010
2002421963200111001110000100101000035235234390020010100201000001011120180100921000010010
2002421995200111001110000100101000035235233289020010100201000001002020000100011000010010
2002421845200111001110000100101003035566234281020071100511003001002020000100011000010010
2002422017200111001110000100101000035235234543020010100201000001002020000100011000010010
2002422024200111001110000100101000035235234542020010100201000001002020000100011000010010
2002422025200111001110000100101000035235234546020010100201000001002020000100011000010010

Test 3: throughput

Code:

  stxr w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0040

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
1020520154101191011001810010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100
1020430047101011011000010010000300528855101002001000420020008110000100

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0040

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
1002520522100291110018101000030528855100102010004202000011000010
1002430047100111110000101000030528855100102010000202000011000010
1002430251100111110000101000030529251100102010000202000811000010
1002430040100111110000101000030528855100102010000202000011000010
1002430040100111110000101000030528855100102010000202000011000010
1002430040100111110000101000030528855100102010000202000011000010
1002430040100111110000101000030528855100102010000202000011000010
1002430040100111110000101000030528855100102010000202000011000010
1002430040100111110000101000030528855100102010000202000011000010
1002430040100111110000101000030528855100102010000202000011000010