Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SWP (32-bit)

Test 1: uops

Code:

  swp w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 2.000

Issues: 2.000

Integer unit issues: 0.001

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)
720053441720051200420001176720002000400012000
720053435120031200220001176020002000400012000
720043441120011200020001176020002000400012000
720043469820011200020001177020002000400012000
720043453120011200020001176220002000400012000
720043413120011200020001176020002000400012000
720043415520011200020001176020002000400012000
720043413020011200020001176020002000400412000
720043413020011200020001176020002000400012000
720043413120011200020001176020002000400012000

Test 2: throughput

Code:

  swp w0, w1, [x6]
  add x6, x6, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0062

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
30208307983027210169201031016920006328941262503010910203200071020340013100032000010100
30204300623010510102200031010220005328911263963010710202200061020240012100022000010100
30204300623010510102200031010220005328911262563010710202200061020240012100022000010100
30204300623010510102200031010220005328911263883010710202200061020240012100022000010100
30204300623010510102200031010220005328911262783010710202200061020240012100022000010100
30204300623010510102200031010220005328881261773010710202200061020240012100022000010100
30204300623010510102200031010220005328881262773010710202200061020240012100022000010100
30204300623010510102200031010220005328881262833010710202200061020240012100022000010100
30204300623010510102200031010220005328881263113010710202200061020240012100022000010100
30204300623010510102200031010220005328881263113010710202200061020240012100022000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0058

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
30029307153023710097201401009720005326251273883001710022200061002040000100012000010010
30024300583001110011200001001020000325921271613001010020200001002040000100012000010010
30024300583001110011200001001020000326061275413001010020200001002040000100012000010010
30024300583001110011200001001020000325921273473001010020200001002040000100012000010010
30024300583001110011200001001020000325921272513001010020200001002040000100012000010010
30024300583001110011200001001020000325921274813001010020200001002040000100012000010010
30024300583001110011200001001020000325921275113001010020200001002040000100012000010010
30024300583001110011200001001020000325921272673001010020200001004340096100232000010010
30024300583001110011200001001020000326161275613001010020200001002040000100012000010010
30024300583001110011200001001020000326141273853001010020200001002040000100012000010010

Test 3: throughput

Code:

  swp w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.4799

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
202051013332013610120035100200005001797223201002002000420040008120000100
202041013262010210120001100200665001779970201662002010020040008120000100
202041000802010110120000100200005001779129201002002000420040008120000100
202041000802010110120000100200005001779129201002002000420040008120000100
202041000802010110120000100200005001779129201002002000420040008120000100
202041000802010110120000100200005001779129201002002000420040008120000100
202041000862010110120000100200005001779129201002002000420040008120000100
202041000802010110120000100200005001779129201002002000420040008120000100
202041000802010110120000100201845001801698202842002043020040828120000100
202041046782017910120078100200195001859121201192002005220040160120000100

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0217

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
200261006122013111201201020000501781143200102020004204000012000010
200241001922001111200001020000501781143200102020000204000012000010
200251001362005811200471020000501782114200102020000204000012000010
200241001692001111200001020000501779003200102020000204016012000010
200241002662002011200091020016501781461200262020048204002412000010
200241003102001411200031020004501783248200142020012204002412000010
200241003102001411200031020029491783263200392020078204015612000010
200241004252002611200151020032501789034200422020088204038812000010
200241004972006711200561020049501785666200592020128204014012000010
200241003652004911200381020017491790960200272020046204014012000010