Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SWPLH

Test 1: uops

Code:

  swplh w0, w1, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 2.000

Issues: 2.000

Integer unit issues: 0.001

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)dispatch uop (78)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
720053456320051200420001176520002000040001200000
720043433920011200020001176620002000297424615321181217
720043418520011200020001176220002000040041200000
720043418820011200020001176220002000040001200000
720043441120011200020021178920022002040001200000
720043436020011200020001176020002000040001200000
720043452320011200020001176020002000040001200000
720043429320011200020001176020002000040001200000
720043447120011200020001176020002000040001200000
720043420420011200020001176020002000040001200000

Test 2: throughput

Code:

  swplh w0, w1, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 6.0064

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
302066061930181101282005310129200043288813271630106102022000410202400081000120000010100
302046006430101101012000010101200043289413272430106102022000410222400901002220000010100
302046006430103101012000210102200023289513275730103102012000310201400051000120000010100
302046006430101101012000010101200023289313270630103102012000310201400051000120000010100
302046006430101101012000010101200023289313272730103102012000310201400051000120000010100
302046006430101101012000010101200023289313274930103102012000310201400051000120000010100
302046006430101101012000010101200023289313274430103102012000310201400051000120000010100
302046006430101101012000010101200023289113268530103102012000310201400051000120000010100
302046006430101101012000010101200023289313275130103102012000310223400891002320000010100
302046006430101101012000010101200023289313274030103102012000310201400051000120000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 6.0057

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
30026602603007110035200361003520002326151329583001310021200031004140086100212000010010
30024600613001110011200001001020000325831328633001010020200001002040000100012000010010
30024600613001110011200001001020000325811328123001010020200001002040000100012000010010
30024600583001110011200001001020000326141329963001010020200001002040000100012000010010
30024600543001110011200001001020000325811328183001010020200001004140089100222000010010
30024600543001110011200001001020000325831328933001010020200001002040000100012000010010
30024600543001110011200001001020000326121329523001010020200001002040000100012000010010
30024600543001110011200001001020000325831328593001010020200001002040000100012000010010
30024600543001110011200001001020000325831328483001010020200001002040000100012000010010
30024600543001110011200001001020002326481334453001310021200031002240008100012000010010

Test 3: throughput

Code:

  swplh w0, w1, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 9.8428

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
202051002472110510920996108205673791759585206672082184220643508120000100
20204995522095110620845105200893981732332201892002027820040232120000100
20204993132080710120706100208413951760597209412002273020043296120000100
20204993662082110120720100201833281747920202832002062020043312120000100
20204984282049610120395100201133321763266202132002037820045904120000100
20204981442018910120088100206375631727853207532902202426247440120000100
20204993912094111820823117202365121728515203482582073620043856120000100
20204983182050410120403100203193961729837204192002104420041384120000100
20204981162016810120067100204075001752948205072002128629242588120000100
20204965932038210120281100201945001726916202942002059620440276120000100

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0391

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
2002510071921531112152010215645217746752157522247722451868120000010
2002510070521624122161211214824917815982149220246222647476120000010
2002410002521278132126512217565717819002176826255362448084120000010
2002410057421520122150811218965017804492190722258042050744120000010
2002410044121405152139014213785617766762139126241382648372120000010
2002410049121249152123414217865417831852179824255302851112120000010
2002410039421329172131216219396317782912195328256942049664120000010
2002410042121453132144012216455217802522165622250302648768120000010
2002410018721126132111312216704817747212168020251922247036120000010
2002410040221324142131013210515617753232106134233342451108120000010