Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STURH

Test 1: uops

Code:

  sturh w0, [x6, #1]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f22233a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005553411100181015391616625100010001000230525545633673412100010002000552554111001100010001015154400101600191002164414073116115491000555554552553564
1004564411010171015391616525100010001000234385545523673421100010002000554552111001100010001015144300101600171002164414173116115511000564555555555555
1004554410110191015481616125100010001000229565525543643412100010002000551554111001100010001014154401101600181002164414073116115501000555554552553564
1004563411010171015381616225100010001000230285545633673421100010002000554551111001100010001015164401101601171002164414173116115511000555553553565565
1004554411010221015391615025100010001000230045515543663412100010002000553554111001100010001015154401101601171002164214173116115511000555555555554553
1004552411110191015391616525100010001000230045525543653412100010002000552554111001100010001014154402101600181002164414173116115511000555555554553553
1004552411100181015391616625100010001000234385545523673409100010002000554554111001100010001016154401101600171002164414073116115501000565555555555555
1004554410110190015391616325100010001000230285525543653412100010002000552554111001100010001014144400101601181002164414173116115511000553565555555555
1004554411110171015391616225100010001000230045545523673410100010002000554552111001100010001015144400101602161002164414073116115511000553564556555555
1004554411000181015361616125100010001000230045545633673421100010002000554552111001100010001014144302101600161002164414173116115501000548555555553553

Test 2: throughput

Count: 8

Code:

  sturh w0, [x6, #1]
  sturh w0, [x6, #1]
  sturh w0, [x6, #1]
  sturh w0, [x6, #1]
  sturh w0, [x6, #1]
  sturh w0, [x6, #1]
  sturh w0, [x6, #1]
  sturh w0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e1f22233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205400423119300400271616025801001008000010080007500183945504936962400404004229961729994801072008001620016003240040319951180201100991008000010080000100800003428280002058000223411151180160040048800001004004340043400434004340051
8020440042300000040025160025801001008000010080007500183945504936970400404004029961730001801072008001620016003240042319931180201100991008000010080000100800000008000015800020011151180160040039800001004005140041400514004340043
802044004230018610400271616025801001008000010080000500183942414936970400424004029955330007801002008000020016000040042320021180201100991008000010080000100800000008000208800022000051101161140037800001004004340041400434005040041
8020440050300030040034161602580100100800001008000050018394240493696940042400422995533000880100200800002001600004004231995118020110099100800001008000010080000340080002028000003400051101161140037800001004004140043400434004340051
80204400423000310400270160408010010080000100800005001839424049369624004240042299533300088010020080000200160000400403199511802011009910080000100800001008000034008000200800002000051101161140039800001004004140043400514004140052
802044004230003104002516002580100100800001008000050018398561493696940042400502995332999880100200800002001600004004231993118020110099100800001008000010080000340080002028000223400051101161140039800001004004340050400414004340043
8020440050300031040027161602580100100800001008000050018398561493697040042400512996432999880100200800002001600004004231993118020110099100800001008000010080000340080002158000003400051101161140037800001004004140043400434004340043
8020440050300030040027161602580100100800001008000050018393521493696240042400422995533000980100200800002001600004004231995118020110099100800001008000010080000340080002028000223400051101161140047800001004004340050400434004140043
802044005030003004002716160258010010080000100800005001839424049369604005040040299553300008010020080000200160000400423199311802011009910080000100800001008000034008000008800022000051101161140039800001004004340043400434004140043
802044004230060004002716002580100100800001008000050018422180493697140040400512995532999880100200800002001600004004032004118020110099100800001008000010080000340080000088000223400051101161140039800001004004140043400434005040043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f2223243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)dadbddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002540061300111102010014003501652580010108000010800005018399561493697240047400532998733003380010208000020160000400614004711800211091080000108000010800151536280016012180002234005021116051140039080000104005140041400514004340052
800244004230000000310004003500025800101080000108000050183942404936960400494004929985330022800102080000201600004005040042118002110910800001080000108000003408000000580002034005020116041140039080000104005140043400414004140041
800244004030000006610004002700025800101080000108000050183985614936971400424005129977330022800102080000201600004004240051118002110910800001080000108000003408000000280000234005020116041140040080000104004340041400434005240043
80024400493000000000000400351616225800101080000108000050183935204936962400424004229975330020800102080000201600004004240040118002110910800001080000108000003408000200880002034005020116041140039080000104004340041400434005240043
80024400423000000000000400251616025800101080000108000050183935214936962400424004029977330022800102080000201600004004240040118002110910800001080000108000003408000200580002234005020116041140039080000104004140043400414004340051
80024400423000000031000400351616025800101080000108000050183942414936962400404004029977330022800102080000201600004004040042118002110910800001080000108000003408000200880002234005020116041140037080000104004340041400434004340041
800244005030000000310004002700025800101080000108000050183980814936962400514005029985330022800102080000201600004005140042118002110910800001080000108000003408000200080002234005020116041140037080000104004340051400434005140041
8002440049299000000100040036016025800101080000108000050183935204936962400404004229977330022800102080000201600004004040042118002110910800001080000108000003408000000280000234005020116051140039080000104004140043400434004340041
8002440042300000000000040027161602580010108000010800005018397600493696240040400422997533002280010208000020160000400424004011800211091080000108000010800000008000200280002234005020116041140039080000104004140051400434005140043
8002440049300000006100040027161602580010108000010800005018394240493696240042400422997733002080010208000020160000400424004211800211091080000108000010800000340800021008000220005020116041140039080000104004140052400434004140043