Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (register, uxtw, 32-bit)

Test 1: uops

Code:

  str w0, [x6, w7, uxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f223a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005558410010191154516165251000100010002293215525613603416100010003000552557111001100010001014153600101601191002163514373116115471000548553548561561
1004542400000300535160025100010001000224241542540355340010001000300054054211100110001000100003400100200210022340073116115391000541552543543543
1004542500000000527161602510001000100022352154054235334001000100030005425401110011000100010000000100200210002340073116115391000541551543543543
10045424000006005251600251000100010002242405425493533400100010003000542550111001100010001000000010000021002000073116115481000541543552552543
10045424000000005271616025100010001000223521542542355340010001000300054054211100110001000100003400100210210002340073116115471000543551552541543
1004540400000900527161602510001000100022424154254935334081000100030005425421110011000100010000340010020001002200073116115391000551550541541541
10045404000009005271616225100010001000224241542550353340810001000300054254211100110001000100003400100000210022340073116115391000543541543541550
100454040000030052501602510001000100022424154055135534001000100030005405401110011000100010000000100200010022340073116115371000541551550541543
10045424000000105251616025100010001000223521542551355339810001000300054054011100110001000100003400100200510020340073116115391000543541541552552
100455140000030052701602510001000100022352154255035534001000100030005425511110011000100010000340010020001002000073116115391000541543543551541

Test 2: throughput

Count: 8

Code:

  str w0, [x6, w7, uxtw]
  str w0, [x6, w7, uxtw]
  str w0, [x6, w7, uxtw]
  str w0, [x6, w7, uxtw]
  str w0, [x6, w7, uxtw]
  str w0, [x6, w7, uxtw]
  str w0, [x6, w7, uxtw]
  str w0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e1f22233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)fetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802054005130009104002516160258010010080000100800075001839455149369694004240049299687299928010720080016200240048400493199511802011009910080000100800001008000034080002028000223401115118016040120800001004005040043400514004340051
80204400403000910400250160258010010080000100800065001839455149369604004040040299597299948010720080016200240048400403199311802011009910080000100800001008000034080002028000203401115118016040037800001004004340052400434004340041
8020440042300000040035161602580100100800001008000750018398101493696040042400422995972999280106200800162002400484004231995118020110099100800001008000010080000008000200800022001115118016040039800001004004340043400434004140043
802044005029903104002716160258010010080000100800075001839810149369624004240040299617299928010720080016200240432400423199511802011009910080000100800001008000034080000008000223401115118016040037800001004004140051400434004340043
80204400423000310400341616025801001008000010080007500183945514936960400404004929968729992801062008001620024004840179319951180201100991008000010080000100800003408000202800022001115118016040039800001004004140041400504004140041
8020440042300090040027161602580100100800001008000750018394551493696040040400422995973000280107200800162002400484004031993118020110099100800001008000010080000008000202800020001115118016040037800001004004340041400434004340043
802044004230003104003616160258010010080000100800065001839455149369694004240049299597299948010720080016200240048400423199511802011009910080000100800001008000034080002008000003401115118016040039800001004004340043400434005140041
802044004030003104002701602580100100800001008000750018394551493696040042400502995972999480107200800162002400484004231995118020110099100800001008000010080000008000202800022001115118016040039800001004004140043400514004340051
80204400422990010400271616325801001008000010080007500183985814936960400424004229961729992801072008001620024004840051319931180201100991008000010080000100800003408000002800022001115118016040039800001004004340043400524004340052
80204400423000000400271600258010010080000100800065001839455149369604004040042299617299928010720080502200240048401793199511802011009910080000100800001008000034080002028000023401115118016040047800001004004340050400434004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)18191e1f223f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800254005130000000091400271616025800101080000108000050183935214936960400404004229977330022800102080000202400004004240040118002110910800001080000108000003480002088000223405020271620254003780000104004340043400434004340052
8002440042304000000314003416160258001010800001080000501839352149369604004240042299773300228001020800002024000040042400401180021109108000010800001080000034800020855800020005020281614254003980000104004340043400434004340051
80024400423000000003140027161602580010108000010800005018398561493696040042400502998533002280010228000020240000400494004911800211091080000108000010800000348000201948000023405020221625154003980000104005040043400434004340051
80024400423000000000040583160025800101080000108000050183942414936962400424004229975330030800102080000202400004004240042118002110910800001080000108000003480002008000223405020131625154003980000104004140043400504004340050
800244004330000000061400271616025800101080000108000050183942414936970400424004029977330029800102080000202400004004240049118002110910800001080000108000003480002138000223405020151627154003780000104004340043400434004140051
8002440040300000000314002716160258001010800001080000501839424149369624004240042299753300228001020800002024000040042400421180021109108000010800001080000034800027118000223405020201627214003980000104005040043400514004340043
800244004230000004091400271616355800701080060108000050183942414936962400504004229984330022800102080000202400004005040040118002110910800001080000108000003480000028000223405020261614264003980000104005140043400514004340043
800244004230000000661400270160258001010800001080000501839856149369624004240051299863300228001020800002024000040051400501180021109108000010800001080000034800001228000223405020151627274003780000104004340041400524004340043
800244004030000000031400361600258001010800001080000501839424149369714004240040299775530031800102080000202400004004040040118002110910800001080000108000003480002028000223405020281627264004880000104004340043400434059640051
8002440040299000000304002716160145800101080000108000050183985604936962400424004029977293002280010208000020240000400424004211800211091080000108000010800000080002458000223405020201627184003780000104004340054400434005240050