Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STCLRH

Test 1: uops

Code:

  stclrh w0, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 3.000

Issues: 3.001

Integer unit issues: 1.002

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
73005346843018101420041002200077601051130001000200020004000100220001000
73004342473002100220001000200077601051130001000200020004000100220001000
73005342953005100320021001200077601051130001000200020004000100220001000
73004342133002100220001000200077601051130001000200020004000100220001000
73004342203002100220001000200077601051130001000200020004000100220001000
73004342403002100220001000200077601051130001000200020004000100220001000
73004342163002100220001000200077601051130001000200020004000100220001000
73004342153002100220001000200077601051130001000200020004000100220001000
73004342213002100220001000200077601051130001000200020004000100220001000
73004342793002100220001000200077601051130001000200020004000100220001000

Test 2: throughput

Code:

  stclrh w0, [x6]
  add x6, x6, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0056

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
4020830636403782027120107202302000411586410586040108202042000430211400132001020000020100
4020430063401102010820002201042000411573410575140108202042000430206400082000720000020100
4020430056401092010720002201042000411572710573740108202042000430206400082000720000020100
4020430056401092010720002201042000411570010568940108202042000430206400082000720000020100
4020430056401092010720002201042000411572810573940108202042000430206400082000720000020100
4020430056401092010720002201042000411572210572740108202042000430206400082000720000020100
4020430056401102010820002201042000411571910572140108202042000430206400082000720000020100
4020430056401092010720002201042000411572510573340108202042000430206400082000720000020100
4020430056401092010720002201042003511317311232740172202372003530206400082000820000020100
4020430056401092010720002201042000411571210570940108202042000430206400082000820000020100

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0059

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
400263036440155201152004020074200041157411059434001820024200043002040000200082000020010
400243006640018200182000020010200041160751062804001820024200043002040000200072000020010
400243005940017200172000020010200001155761057674001020020200003002040000200072000020010
400243005940017200172000020010200001155631057464001020020200003002040000200072000020010
400243005940017200172000020010200001155631057474001020020200003002040000200072000020010
400243005940017200172000020010200001155711057574001020020200003002040000200072000020010
400243005940017200172000020010200001155691057534001020020200003002040000200072000020010
400243005940017200172000020010200001155771057754001020020200003002040000200072000020010
400243005940017200172000020010200001155611057414001020020200003002040000200072000020010
400243005940017200172000020010200001155761057694001020020200003002040000200072000020010

Test 3: throughput

Code:

  stclrh w0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 12.6476

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
302051343604175321698200551013020050239433522649753017810228200562020040000211602000010100
302041272464128621260200261010020000239405722663383010010200200002024040077213962000010100
302041273594125321247200061010020007245823823208343011410207200132020040000211022000010100
302041264764120221202200001010020000238260022524523010010200200002020040000211012000010100
302051247294093820892200461013020000238243622523083010010200200002020040000211012000010100
302041264764120221202200001010020000238243622523083010010200200002020040000211012000010100
302041264764120221202200001010020000238243622523083010010200200002020040000211012000010100
302041264764120221202200001010020000238243622523083010010200200002020040000211012000010100
302041264764120221202200001010020051242863322944713018110230200582020040000211012000010100
302041264764120221202200001010020000238243622523083010010200200002020040000211002000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 12.9761

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
300251322404157821542200361004020000245624823130833001010020200002002040000212732000010010
300241297614128321283200001001020000245624823130833001010020200002002040000212732000010010
300241297614128321283200001001020000245624823130833001010020200002006040080212942000010010
300241298004128021280200001001020000245624823130833001010020200002002040000212732000010010
300241297764128721287200001001020000245628323131443001010020200002002040000212732000010010
300241297614128321283200001001020000245624823130833001010020200002002040000212742000010010
300241297614128321283200001001020000245624823130833001010020200002002040000212732000010010
300241297554126121261200001001020050240088522644933008810048200562002040000212782000010010
300241297344125621256200001001020000245544723124413001010020200002002040000212732000010010
300241297614128321283200001001020000245624823130833001010020200002002040000212732000010010