Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (register, lsl, Q)

Test 1: uops

Code:

  str q0, [x6, x7, lsl #4]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e1f22233f464f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)st unit uop (a7)afbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200654240000527002520001000100010001000500022488051754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543545
200454240010527002520001000100010001000500022488151754254223032782000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454240000527002520001000100010001000500022488151754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454240000527002520001000100010001000500022488051754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454240000527002520001000100010001000500022488051754254223032752000100010001000300054254211100110001000100010001000207311611539100010001000546543543543543
200454240000527002520001000100010001000500022488051754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454240000527002520001000100010001000500022488151754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000545543543543543
2004542400005271602520001000100010001000500022488051754254423232752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454240000527002520001000100010001000500022488051754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454243000527002520001000100010001000500022488151754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543

Test 2: throughput

Count: 8

Code:

  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)1e1f223f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16020640042299114020140027016025160112801088000080116800064005521839639140017400424004219963619993160118802168001680216240048400444004411802011009910010080000800001008000000800000080000242111511711611400418000880000801004004540043400434004340045
1602044005630011717314002716161251601088010880000801128000640055218395141400194004240044199616199931601188021680016802162400484004240044118020110099100100800008000010080000420800020080000042111511711611400398000880000801004004340043400434004340043
160204400423001174130400290012516010880108800008011680006400568183962014001740042400421996861999416011880216800168021624004840042400421180201100991001008000080000100800000080000008000200111511711611400398000880000801004004340043400434004540043
160204400443001124304002900025160108801088000080112800064005521839514140017400424004219967619993160124802168001680216240048400424004211802011009910010080000800001008000000800000080000042111511711611400418001280000801004004340043402714026840043
1602044004230011120040027016025160108801088000080112800064005521839514140017400424004219959619993160118802168001680336240048400424004411802011009910010080000800001008000042080002008000020111511711611400398001280000801004004340043400434004340045
160204401852991133004002700025160108801088000080112800084005521839620140017400424004219961619993160118802168001680216240048400424004211802011009910010080000800001008000000800000080000042111511711611400398001280000801004004340043400434004540043
16020440042300115283040027016125160108801088000080112800064005521839514140017400424004219969619993160119802168013680216240048400424004411802011009910010080000800001008006242080002038000200111511711611400418001280000801004004340043400434004340043
16020440042300115700400270002516011280108800008011280008400552183951414001740042400421996761999516011880216800168021624004840042400421180201100991001008000080000100800000080000008000020111511711611400398001280000801004004340043400434004340045
16020440042300114800400290002516011280108800008011280006400552183951414001940044400421996761999616011880216800168021624004840042400421180201100991001008000080000100800000080000108000200111511711611400398001280000801004004340259400454004540043
160204400443001132700400271616025160108801088000080112800084005521839514040019400424004219962619993160118802168001680216240048400444004211802011009910010080000800001008000000800001080000042111511711611400398000880000801004004340043400454005740045

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16002640061310101000027010040027161602516001080010800008001080000400050183956014001740042400421997803200241600108015880000800202400004005140044118002110910108000080000108000000008000010580002234005020001116711400398000080000800104005240043400454004340053
16002440042300000000042961004002916160251600108001080000800108000040005018394880400174004240042199780320024160010800208000080020240000400444005311800211091010800008000010800000340080000000800002340050200012161212400418000080000800104005440043400534004540043
16002440042300000000088500004002700025160010800108000080010800004000501839488140017400424004219978032002216001080020800008002024000040044400441180021109101080000800001080000034008000200580002234005020006161110400418000080000800104004340043400434004340043
1600244004230000000003314001400340002516001080010800008001080000400050183982404002440063400531998503200221600108002080000800202400004004240042118002110910108000080000108000000008000215088000020005020009161111400418000080000800104004340043400434004340043
1600244004430000000000000040027161602516001080010800008001080000400050183956014001940042400421997903200221600108002080000800202400004004240042118002110910108000080000108000100008000000080000234005020009161212400398000080000800104004340043400434004340043
160024400423000000000549010040029161602516001080010800008001080000400050183948804002740042400441997903200241600108002080000800202400004004240042118002110910108000080000108000000008000000080000000050200012161110402288011680000800104004540043400454004540043
1600244004230000000008130000400290002516001080010800008001080000400050183948814001940044400421997903200331600108002080000800202400004004240042118002110910108000080000108000000008000000080000000050200012161114400398000080000800104004340043400434004340043
160024400423000000000150000400290160251600108001080000800108000040005018394880400284004240044199780320024160010800208000080020240000400444004411800211091010800008000010800000000800000008000000005098001216106400398000080000800104004340043400434004340043
1600244004229900000000010040027161602516001080010800008001080000400050183948804001740042400421997803200271600108002080000800202400004004440044218002110910108000080000108000000008000000080000000050200011161312400398000080000800104004340043400434004340043
1600244004230000000009150000400270002516001080010800008001080000400050183956014001740042400421997803200241600108002080000800202400004005340044118002110910108000080000108000003400800000008000000005020001116911400398000080000800104004340043400454004540043