Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STR (register, lsl, Q)

Test 1: uops

Code:

  str q0, [x6, x7, lsl #4]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)031e1f22233f464f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a7afbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
200654240000527002520001000100010001000500022488051754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543545
200454240010527002520001000100010001000500022488151754254223032782000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454240000527002520001000100010001000500022488151754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454240000527002520001000100010001000500022488051754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454240000527002520001000100010001000500022488051754254223032752000100010001000300054254211100110001000100010001000207311611539100010001000546543543543543
200454240000527002520001000100010001000500022488051754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454240000527002520001000100010001000500022488151754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000545543543543543
2004542400005271602520001000100010001000500022488051754254423232752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454240000527002520001000100010001000500022488051754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543
200454243000527002520001000100010001000500022488151754254223032752000100010001000300054254211100110001000100010001000007311611539100010001000543543543543543

Test 2: throughput

Count: 8

Code:

  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  str q0, [x6, x7, lsl #4]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03070a1e1f223f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a4a7a8acafbcdcache store miss (c0)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
16020640042299114020140027016025160112801088000080116800064005521839639140017400424004219963619993160118802168001680216240048400444004411802011009910010080000800001008000000800000080000242111511711611400418000880000801004004540043400434004340045
1602044005630011717314002716161251601088010880000801128000640055218395141400194004240044199616199931601188021680016802162400484004240044118020110099100100800008000010080000420800020080000042111511711611400398000880000801004004340043400434004340043
160204400423001174130400290012516010880108800008011680006400568183962014001740042400421996861999416011880216800168021624004840042400421180201100991001008000080000100800000080000008000200111511711611400398000880000801004004340043400434004540043
160204400443001124304002900025160108801088000080112800064005521839514140017400424004219967619993160124802168001680216240048400424004211802011009910010080000800001008000000800000080000042111511711611400418001280000801004004340043402714026840043
1602044004230011120040027016025160108801088000080112800064005521839514140017400424004219959619993160118802168001680336240048400424004411802011009910010080000800001008000042080002008000020111511711611400398001280000801004004340043400434004340045
160204401852991133004002700025160108801088000080112800084005521839620140017400424004219961619993160118802168001680216240048400424004211802011009910010080000800001008000000800000080000042111511711611400398001280000801004004340043400434004540043
16020440042300115283040027016125160108801088000080112800064005521839514140017400424004219969619993160119802168013680216240048400424004411802011009910010080000800001008006242080002038000200111511711611400418001280000801004004340043400434004340043
16020440042300115700400270002516011280108800008011280008400552183951414001740042400421996761999516011880216800168021624004840042400421180201100991001008000080000100800000080000008000020111511711611400398001280000801004004340043400434004340045
16020440042300114800400290002516011280108800008011280006400552183951414001940044400421996761999616011880216800168021624004840042400421180201100991001008000080000100800000080000108000200111511711611400398001280000801004004340259400454004540043
160204400443001132700400271616025160108801088000080112800084005521839514040019400424004219962619993160118802168001680216240048400444004211802011009910010080000800001008000000800001080000042111511711611400398000880000801004004340043400454005740045

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)030507080a0b18191e1f22243a3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd0d2d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
16002640061310101000027010040027161602516001080010800008001080000400050183956014001740042400421997803200241600108015880000800202400004005140044118002110910108000080000108000000008000010580002234005020001116711400398000080000800104005240043400454004340053
16002440042300000000042961004002916160251600108001080000800108000040005018394880400174004240042199780320024160010800208000080020240000400444005311800211091010800008000010800000340080000000800002340050200012161212400418000080000800104005440043400534004540043
16002440042300000000088500004002700025160010800108000080010800004000501839488140017400424004219978032002216001080020800008002024000040044400441180021109101080000800001080000034008000200580002234005020006161110400418000080000800104004340043400434004340043
1600244004230000000003314001400340002516001080010800008001080000400050183982404002440063400531998503200221600108002080000800202400004004240042118002110910108000080000108000000008000215088000020005020009161111400418000080000800104004340043400434004340043
1600244004430000000000000040027161602516001080010800008001080000400050183956014001940042400421997903200221600108002080000800202400004004240042118002110910108000080000108000100008000000080000234005020009161212400398000080000800104004340043400434004340043
160024400423000000000549010040029161602516001080010800008001080000400050183948804002740042400441997903200241600108002080000800202400004004240042118002110910108000080000108000000008000000080000000050200012161110402288011680000800104004540043400454004540043
1600244004230000000008130000400290002516001080010800008001080000400050183948814001940044400421997903200331600108002080000800202400004004240042118002110910108000080000108000000008000000080000000050200012161114400398000080000800104004340043400434004340043
160024400423000000000150000400290160251600108001080000800108000040005018394880400284004240044199780320024160010800208000080020240000400444004411800211091010800008000010800000000800000008000000005098001216106400398000080000800104004340043400434004340043
1600244004229900000000010040027161602516001080010800008001080000400050183948804001740042400421997803200271600108002080000800202400004004440044218002110910108000080000108000000008000000080000000050200011161312400398000080000800104004340043400434004340043
1600244004230000000009150000400270002516001080010800008001080000400050183956014001740042400421997803200241600108002080000800202400004005340044118002110910108000080000108000003400800000008000000005020001116911400398000080000800104004340043400454004540043