Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SWPLB

Test 1: uops

Code:

  swplb w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 2.000

Issues: 2.000

Integer unit issues: 0.001

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)
720053448820051200420001177020002000400012000
720043419020011200020001176220002000400012000
720043420020011200020001176220002000400012000
720043418720011200020001176220002000400012000
720043421620011200020001176220002000400012000
720043420020011200020001176220002000400012000
720043451720011200020001177020002000400012000
720043458720011200020001176020002000400012000
720043454420011200020001176020002000400012000
720043472920011200020001176020002000400012000

Test 2: throughput

Code:

  swplb w0, w1, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 6.0057

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
30206606303018110128200531012920004328941327263010610202200041020240008100012000010100
30205601103016310123200401012320002328911327313010310201200031020140005100012000010100
30204600573010110101200001010120002328911327003010310201200031020140005100012000010100
30204600573010110101200001010120002328911326973010310201200031020140005100012000010100
30204600573010110101200001010120002328911327033010310201200031020140005100012000010100
30204600573010110101200001010120002328911327043010310201200031020140005100012000010100
30204600573010110101200001010120002328911326863010310201200031020140005100012000010100
30204600573010110101200001010120002328911327003010310201200031020140005100012000010100
30204600573010110101200001010120002329001335103010310201200031020240008100012000010100
30204600573010310101200021010220002328911326973010310201200031020140005100012000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 6.0061

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
30024600613001110011200001001120000326121330053001010020200001002040000100012000010010
30024600613001110011200001001020000325811328433001010020200001002040000100012000010010
30024600543001110011200001001020000325811328433001010020200001002040000100012000010010
30024600543001110011200001001020000325811328643001010020200001002040000100012000010010
30024600543001110011200001001020000325811328493001010020200001002040000100012000010010
30024600543001110011200001001020000325811328563001010020200001002040000100012000010010
30024600543001110011200001001020000325811328293001010020200001002040000100012000010010
30024600543001110011200001001020000325811328683001010020200001002040000100012000010010
30024600543001110011200001001020000325811328693001010020200001002040000100012000010010
30024600543001110011200001001020000325811328453001010020200001002040000100012000010010

Test 3: throughput

Code:

  swplb w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 9.8142

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
20205100171209701120208581110205265221755618206272022164020243536120000100
2020498186201141010200131000211365001759220212362002345620040640120000100
2020498381205991010204981000207045001754426208042002197620041056120000100
2020499154207241270205971260207616021725167209002782238220040872120000100
2020496954201011010200001000205895001741212206892002168220041144120000100
2020497194201931010200921000200425321739911201432022014020041424120000100
2020497296207501480206021470201235031724288202242022035420040732120000100
2020495944204371080203291070200475131733813201472022015420041216120000100
2020498878205431010204421000203055001766896204052002098020045776120000100
20204100050214481010213471000213825001785376214822002444820044140120000100

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0363

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
200261007412110813210951221832561774538218452825364244806012000010
200241001712112612211141121674551780101216862424986225096012000010
200241007762146412214521121282451776644212922424042264948012000010
200241005772144213214291221751651782947217663025406305025612000010
200241005842157415215591421566491778436215772424898224960012000010
200241004852152312215111121454571775796214662424292224432412000010
200241005272143114214171322032571780364220442426108224883212000010
200241004762141812214061121777551784720217892825496204725212000010
200241008052154811215371021753481777992217632225256264838012000010
200241003342124813212351221691481779211217012225306224955212000010