Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (32-bit)

Test 1: uops

Code:

  str w0, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031f223f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005542431527161602510001000100022448542554355340110001000200054254211100110001000100042010022100224273316115391000543543543543543
1004542331528161602510001000100022448542542355340010001000200054254311100110001000100042010022100224273216125391000543543543543543
1004542430527161612510001000100022448542542367340010001000200054354311100110001000100042010025100224273216115391000543543543544544
1004543430527161602510001000100022448542542355341210001000200054354211100110001000100042010022100224273216115401000543544544544544
1004543430527161602510001000100022448543543355340010001000200054254211100110001000100042010022100224273116115391000543543543543543
1004542470528161612510001000100022472542543356340010001000200054254311100110001000100042010022100224273116115401000544544544544543
1004542430527161612510001000100022472542542355340010001000200054254211100110001000100042010022100224273116115391000543543543543543
1004542430528161602510001000100022472542543356340010001000200054254211100110001000100042010022100224273116115391000543543543543543
1004542430527161612510001000100022472542543356340010001000200054354311100110001000100042010022100224273216115391000543543543543543
1004542430527161602510001000100023000542542355340010001000200054254211100110001000100042010022100224273216115401000544543543543544

Test 2: throughput

Count: 8

Code:

  str w0, [x6]
  str w0, [x6]
  str w0, [x6]
  str w0, [x6]
  str w0, [x6]
  str w0, [x6]
  str w0, [x6]
  str w0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l1d tlb fill (05)l2 tlb miss data (0b)191e1f22233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205400543000000310400271600258010010080000100800005001839448149369634004240040299553300128010020080000200160000400403199511802011009910080000100800001008000000008000202800020420051102162240037800001004004140043400444004140044
8020440040300000030040027161612580100100800001008000050018393521493696340042400432995333000080100200800002001600004004231995118020110099100800001008000010080000042008000202800020420051102162240039800001004004340055400444004340044
802044004230010003004002716160258010010080000100800005001839472149369634004240043299533300018010020080000200160000400433199311802011009910080000100800001008000000008000002800020420051102162240039800001004004340043400444004140041
802044004230000003104002801602580100100800001008000050018393520493696340054400432995533000180100200800002001600004004231995118020110099100800001008000010080000042008000202800022420151102162240040800001004004340043400414004340043
8020440042299000031040027161612580100100800001008000050018394480493696240042400422995532999880100200800002001600004004231993118020110099100800001008000010080000042008000200800022420051102162240039800001004004340041400444005540044
8020440042300000630040028161602580100100800001008000050018394480493696040040400422995332999880100200800002001600004004231996118020110099100800001008000010080000042008000202800022420051102162240039800001004004340043400434004440041
8020440040300000030040027161602580100100800001008000050018393520493696240040400422995332999880100200800002001600004004331995118020110099100800001008000010080000042008006360800022420051102162240039800001004004340043400434004340043
80204400403000000010400391616125801001008000010080000500183935204936962400404004229955330000801002008000020016000040042319961180201100991008000010080000100800000000800020580002200051102162240039800001004004140043400434004440043
8020440042299000000140028161602580100100800001008000050018393521493696240040400422995333001280100200800002001600004005431996118020110099100800001008000010080000042008000200800002420051102162240039800001004004440041400434004340043
8020440043300000030040027160292580100100800001008000050018393521493696040042400422995633000080100200800002001600004004231993118020110099100800001008000010080000042008000002800020420051102162240037800001004005540043400434004140043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2223243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd0d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025400423000000003000040027161602580010108000010800005018394240149369624004040042299773300228001020801392016000040040400511180021109108000010800001080000000080000002800002005020014160601210400482080000104004340043400434004340041
8002440042300000000600004002516160258001010800001080000501839352014936962400404004029985330020800102080000201600004004840042118002110910800001080000108000000008000200080002234050200121606081340039080000104004340051400434004140043
80024400493000000000000040027160025800101080000108000050183942400493696040042400402998433002280010208000020160000400424005011800211091080000108000010800000340080002002800000340502007160609940047080000104004140043400414004140041
80024400403000000000100040027161602580010108000010800005018394240149369624004240042299773300298001020800002016000040048400421180021109108000010800001080000034008000010880000034050200141606091240039080000104004140043400514004140043
8002440042300000000300004002516160258001010800001080000501839352014936960400424004029977330022800102080000201600004004240051118002110910800001080000108000000008000010280000200502001116060151440039080000104004340043400434004340051
8002440042300000000300004002516042580010108000010800005018394240049369624004040040299753300208001020800002016000040049400401180021109108000010800001080000000080002000800020340502001016050101240046080000104004140043400514004340043
8002440042300000000300004002716160258001010800001080000501839352004936962400404004229977330020800102080000201600004004240042118002110910800001080000108000000008000200580002234050200716060101040039080000104004140043400514004140052
800244004030000000001000400250160258001010800001080000501839808014936960400424005129977330022800102080000201600004004240051118002110910800001080000108000003400800000058000220050200121605261040039080000104004140043400434004340043
80024400503000000003000040025160025800101080000108000050183935211493696240042400422997533002080010208000020160000400424004211800211091080000108000010800000000800020028000220050200111605011640047080000104004340043400504004140043
8002440042300000006300004009716160258001010800001080000501839424014936962400404004229975330022800102080000201600004004040042118002110910800001080000108000000008000200280002000502001216050101040039080000104005140041400514004340043