Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SWPL (32-bit)

Test 1: uops

Code:

  swpl w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 2.000

Issues: 2.000

Integer unit issues: 0.001

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)
720063445420111201020001177020002000400012000
720043421020011200020001178020002000400012000
720043494420011200020001176920002000400012000
720043425620011200020001177020002000400012000
720043411420011200020001177020002000400012000
720043410920011200020001177020002000400012000
720043414220011200020001177020002000400012000
720043414020011200020001177020002000400012000
720043411520011200020001177020002000400012000
720043410920011200020001177020002000400012000

Test 2: throughput

Code:

  swpl w0, w1, [x6]
  add x6, x6, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 6.0061

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
30207603623019010134200561013520004328911329043010610202200041020240008100012000010100
30204600613010110101200001010120002328561328623010310201200031020140005100012000010100
30204600613010110101200001010120002328871329663010310201200031020140005100012000010100
30204600613010110101200001010120002328871329673010310201200031020140005100012000010100
30204600613010110101200001010120002328871329533010310201200031020140005100012000010100
30204600613010110101200001010120002328871329583010310201200031020140005100012000010100
30204600613010110101200001010120045329681334193016610221200451020140005100012000010100
30204600543010110101200001010120002328871329353010310201200031020140005100012000010100
30204600543010110101200001010120002328871329433010310201200031020140005100012000010100
30204600613010110101200001010120002328561327793010310201200031020140005100012000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 6.0061

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
30027603663010110044200571004420002326241332843001310021200031002040000100012000010010
30024600643001110011200001001020000325951331563001010020200001002040000100012000010010
30024600573001110011200001001020000325951331853001010020200001002040000100012000010010
30024600573001110011200001001020000325961331973001010020200001002040000100012000010010
30024600573001110011200001001020000325971331563001010020200001002140005100012000010010
30024600573001310011200021001220000326411335173001010020200001002040000100012000010010
30024600573001110011200001001020000326431335023001010020200001002040000100012000010010
30024600573001110011200001001020000325991332343001010020200001002040000100012000010010
30024600573001110011200001001020000326211332683001010020200001002040000100012000010010
30024600643001110011200001001020000325991330833001010020200001002040000100012000010010

Test 3: throughput

Code:

  swpl w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 9.7685

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
202051006782091010320807102213895111773322214922102438626049184120000100
20204988352073710320634102202884991751270203882002086220041636120000100
20204974572011010120009100203175091726866204172002075020040656120000100
20204991972065610220554101208695001729767209692002278020243424220000100
20204980152014410120043100203105001742832204102002091221440908120000100
20204963982020110120100100200405001714335201402002010820040504120000100
20205971812020510320102102202975001764579203972002099620041092120000100
20204976242028810120187100200654181718954201662102019620041724120000100
20204972832033410120233100201484061737006202482002045420040088120000100
20204990622089410120793100207223601761442208222002232020048804120000100

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0461

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
200251001752020712201951121787491774480217972025174244972012000010
200251007682135712213451121887511781135218982425658205064812000010
200241004612125212212401120958521773318209692223042264866812000010
200241004902140713213941221345531773621213572423976205106012000010
200241005752141214213981321599501775651216092024754245008812000010
200241004332136713213541221608521776805216192224816244918412000010
200241004802141812214061121573491775950215832024878265121212000010
200241005572133013213171221791471784384218012025664245008412000010
200241004652143815214231421850501782189218612225674244972812000010
200241002822085711208461020976501773723209862023066305114012000010