Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STRB (register, uxtw)

Test 1: uops

Code:

  strb w0, [x6, w7, uxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f223a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005543400003005271616125100010001000224480542554355340010001000300054354211100110001000100004200100200210022420073216115391000543544544544544
1004543400003005271616125100010001000224720542542356340010001000300054354311100110001000100004200100200210022420073116115391000543543543543543
1004542400003005271616125100010001000224721542542355340010001000300054254211100110001000100004200100200210022420073116115391000543543543543544
1004543400003005391616025100010001000224481542542355340010001000300054254211100110001000100004200100200210022420073116115391000543543543543543
1004542400003005271616025100010001000224481542542355340010001000300054254211100110001000100004200100200510022420073116115391000543544544544544
1004542400003005271616025100010001000224480543542355340110001000300054254211100110001000100004210100200210022420073116115391000544544544544544
1004543400003005271616125100010001000224720542542356340110001000300054354311100110001000100004200100200510022420073116115391000544544544544544
1004543300003105281616125100010001000224720542543356340010001000300054254211100110001000100004200100200210022420073116115391000543543543543543
10045424000930052716165251000100010002244805425423553400100010003000543543111001100010001000042001002002100216420073116115391000543544543543543
1004542400003005271616125100010001000224720542543356340010001000300054354211100110001000100004200100200210022420073116115391000543543543543543

Test 2: throughput

Count: 8

Code:

  strb w0, [x6, w7, uxtw]
  strb w0, [x6, w7, uxtw]
  strb w0, [x6, w7, uxtw]
  strb w0, [x6, w7, uxtw]
  strb w0, [x6, w7, uxtw]
  strb w0, [x6, w7, uxtw]
  strb w0, [x6, w7, uxtw]
  strb w0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)191e1f22233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802054005830000030040027161602580100100800001008000750018393780493696940042400502996873000380107200800162002400484004231995118020110099100800001008000010080000000800000280002034111511801600400390800001004004140050400434004340041
8020440050300000000400271616025801001008000010080007500183937804936962400404004229961729994801072008001620024004840042319931180201100991008000010080000100800000340800020080002034111511801600400390800001004004340041400434004340051
8020440042300000010400251616425801001008000010080007500183945504936960400514004029969730000801072008001620024004840040319951180201100991008000010080000100800000340800020280002034111511801600400370800001004005140041400414004340041
8020440040299006610400271616025801001008000010080007500183937804936960400424004029961729994801072008001620024004840042319931180201100991008000010080000100800000340800000080002234111511801600400390800001004004340050400434004340043
802044005030000000040025016025801001008000010080007500183945504936960400404005129959730001801062008001620024004840042319931180201100991008000010080000100800000340800020080002034111511801600400390800001004005240043400524004340041
8020440042300000300400271616025801001008000010080007500183945504936962400404004229961729994801072008001620024004840040319931180201100991008000010080000100800000340800020880002234111511801600400390800001004004140043400504004340041
8020440040300000000405661604028580220100801801008000650018394551493696240042400402996120303638010720080016200240048400403199511802011009910080000100800001008000023488801200109080122234111511801600409450800001004074041430414214129240735
8020441427310100600400270160258010010080000100800135001839381049369624004240050299501029980801122008002220024006640040394811180201100991008000010080000100800000340800020280002234222512812311400390800001004004340043400524004340043
802044004230000030040027000258010010080000100800125001839316049369604004240040299521029980801132008002220024006640042319951180201100991008000010080000100800000340800020280000234222512812311400390800001004005140043400514004340041
8020440040300000001400270002580100100800001008001350018393810493696240049400422995210299808011320080022200240066400403199511802011009910080000100800001008000000080002088000020222512812311400370800001004004140043400434004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002540043300000000000004002716012580010108000010800005018393521493696240040400422997533002080010208000020240000400424004011800211091080000108000010800000420080000009800022000502002516151540039080000104004140041400434004140041
8002440042300000000031004002716160258001010800001080000501839448149369604004240042299783300208001020800002024000040042400401180021109108000010800001080000000080002100800022000502001916141340040080000104005540041400434004340043
800244004030000000000000400251600258001010800001080000501839448149369624004240042299753300228001020800002024000040042400541180021109108000010800001080000042008000200080000200050200151691940037080000104004440043400414004440043
80024400433000000000300040027016025800101080000108000050183935214936960400424004229977330022800102080000202400004004240042118002110910800001080000108000000008000200280002200050200191615940039080000104004140043400414004140043
800244004030000000093100400271616025800101080000108000050183944804936963400434004329977330023800102080000202400004004040043118002110910800001080000108000000008000000280000000050200151614840039080000104004340043400414004340043
8002440054300000000030004002716012580010108000010800005018394720493696240043400422997533002280010208000020240000400424004011800211091080000108000010800000000800000028000024200502001516151340037080000104004140041400434004140041
80024400423000000000310040025160025800101080000108000050183944814936974400424004229977330022800102080000202400004004240042118002110910800001080000108000004210800020028000200005020091671540037080000104004340041400434004340041
800244004230000000003000400251600258001010800001080000501839448149369624004040042299773300348001020800002024000040040400401180021109108000010800001080000042008000000280002000050200151691940039080000104018340182400414004340041
8002440040300000000031004002716161258001010800001080000501839448149369624004240042299893300228001020800002024000040040400421180021109108000010800001080000042008000000280000242005020081691540039080000104004340043400414004340044
800244004330000000000100400251616025800701080300108021650183944814936962400424004229978330022800102080000202400004004240042118002110910800001080000108000000008000000380002042005020091681440039080000104004340043400414004340041