Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STCLRL (64-bit)

Test 1: uops

Code:

  stclrl x0, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 70 nops): 3.000

Issues: 3.001

Integer unit issues: 1.002

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
73005345923019101520041002200077621051330001000200020004000100320001000
73004342293003100320001000200077621051330001000200020004000100220001000
73004341873002100220001000200077621051330001000200020004000100220001000
73004341893002100220001000200077621051330001000200020004000100220001000
73004350993002100220001000200077621051330001000200020004000100220001000
73004342823002100220001000200077621051330001000200020004000100220001000
73004344173002100220001000200077621051330001000200020004000100220001000
73004342953002100220001000200077621051330001000200020004000100220001000
73004343393002100220001000200077691052730001000200020004000100220001000
73004350313002100220001000200077621051330001000200020004000100220001000

Test 2: throughput

Code:

  stclrl x0, [x6]
  add x6, x6, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 6.0058

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
40209606224046320372200912020120005115941964634011020205200053020340004200072000020100
40205601104017220143200292013420002115941961344010420202200023025440070200432000020100
40204600784010720107200002010220002115899960674010420202200023020340004200032000020100
40204600554010320103200002010220002115907960914010420202200023020340004200032000020100
40204600554010320103200002010220002115917961094010420202200023020340004200032000020100
40204600554010320103200002010220002115913961054010420202200023020340004200032000020100
40204600554010320103200002010220036116176966624017220236200363020340004200062000020100
40204600874011720117200002010220005116016965254011020205200053020840009200192000020100
40204600584011720117200002010220002116151963184010420202200023020340004200172000020100
40204600584011720117200002010220002116151963164010420202200023020340004200162000020100

1000 unrolls and 10 iterations

Result (median cycles for code): 6.0055

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
40029606244036420277200872010720002115848961524001420022200023002340004200062000020010
40024600554001320013200002001020000115717960514001020020200003002040000200032000020010
40024600554001320013200002001020000115711960344001020020200003002040000200032000020010
40025601034008320051200322004420000115739960904001020020200003002040000200032000020010
40024600554001320013200002001020000115713960404001020020200003002040000200032000020010
40024600554001320013200002001020000115714960454001020020200003002040000200032000020010
40024600554001320013200002001020000115727960694001020020200003002040000200032000020010
40024600554001320013200002001020000115711960394001020020200003002040000200032000020010
40024600554001320013200002001020000115727960704001020020200003002040000200032000020010
40025601034008620052200342004620000115676959784001020020200003002040000200082000020010

Test 3: throughput

Code:

  stclrl x0, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.7704

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)
30205111788442002125022950131602234120222671957274345041231823877228824481202043820000010100
30204109567421262067121455115222076719706631922152315161085021290217264280001971120000010100
30204106879395951931520280101492060819622601912382312411073321042210524154601983620000010100
30204107222395801935220228101782002419277821901230301431021920036209404140401955120000010100
30204107704409341997120963108812040319494191903631308481054520670206004074201933420000010100
30204106986400841951120573106212034219334371893900307231048120567215844261801967220000010100
30204106382398061934020466103012050419432331898973309851058120743207144095601940720000010100
30204107553405011987820623106562045519371841898715309291057420698217204287001982420000010100
30204108106404521992220530105362035019734191917407307601054420590208564095701952320000010100
30204105867396491920820441103262067219425391895899314131084521265211504184101959320000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 11.4154

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
300251146084630322649236541419024791208160120046693879314015277142895455824222332000010010
300241139394613422379237551371024366207918520041483821013860274192707353005224242000010010
300241136004611722179239381405424544207642720010133853514004275882824054878222412000010010
300251147784652422348241761418624820208604520100713922114414281152778054067221522000010010
300241139894599622063239331396124553206893019951373857214033276712814154286223962000010010
300241146344670722392243151427824550208041820043683861114075276012768253517222582000010010
300241139534611422351237631385624892207647820009103934614468282702781953825221792000010010
300241139934647622291241851423324782207461620000573906814300280612942656698221232000010010
300241145264694422783241611391523270210113320225623604712787252912828355116222992000010010
300241139344646922557239121391724734207830320024653897614257279932753853330223902000010010