Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STR (register, lsl, S)

Test 1: uops

Code:

  str s0, [x6, x7, lsl #2]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)03191e1f22233f46494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a2a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
1005542400010534161602510001000100022856151754254035334001000100030005515421110011000100010023410020210022073116115391000550543543543543
10045424006005271616025100010001000224240526542542355340810001000300054255011100110001000100034100205100223473116115371000550551543543543
1004542400300527160025100010001000223521515542542355340010001000300054255111100110001000100034100208100223473116115391000543551541543543
1004542400300527161602510001000100022424151754254035534001000100030005425421110011000100010000100202100223473116115391000543543543551551
10045514000005271616025100010001000228561517542550353340810001000300054254911100110001000100034100208100003473116115461000543543543551551
1004549400310527016025100010001000223520517551542353340010001000300054054211100110001000100034100002100223473116115481000543543552552543
10045424003005341616025100010001000224241524540542363340010001000300054254911100110001000100034100202100223473116115391000550551543543543
1004542400300527161622510001000100022424051754254035534001000100030005425421110011000100010003410020210022073116115391000543543543550551
10045504003005351616025100010001000228080517542542355340010001000300054954211100110001000100034100202100223473116115461000543550551543543
10045423003005271616225100010001000224241524542542363340010001000300054254011100110001000100034100200100023473116115391000543543543543543

Test 2: throughput

Count: 8

Code:

  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)0305080b181e1f22243a3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205400523001110015103400370160258010010080000100800075001839719140015400404004229959729992801072008001620024004840042400401180201100991001008000080000100800000000800020028000223400111512291688400390800001004004340041400414004140043
802044004230000000310240025161602580100100800001008000750018394551400154004240042299617299928010620080016200240048400424004011802011009910010080000800001008000003400800020058000203400111512281688400470800001004004340051400434004340043
80204400423000000012102400270002580100100800001008000750018394551400174004240040299697299948010720080016200240048400424004211802011009910010080000800001008000003400800020028000223400111512281688400370800001004004340052400414005140043
80204400403000000030024002716160258010010080060100800075001839810040025400424004229961729992801072008001620024004840042400421180201100991001008000080000100800000000800020088000223400111512281688400390800001004004140043400414004340051
802044004230000000910240025161602580100100800001008000750018393780400264004240042299597299928010720080016200240048400404004011802011009910010080000800001008000003400800000088000223400111512261688400390800001004004340050400434005140041
80204400423000000030024002716160258010010080000100800075001839839140017400404004229959729994801072008001620024004840040400421180201100991001008000080000100800000000800020008000223400111512281688400390800001004004140041400434004340041
802044004030000000310240025000258010010080000100800065001839378040017400424004229961730002801072008001620024004840040400511180201100991001008000080000100800000000800000028000203400111512281698400390800001004004340043400414004140043
80204400423000000000024002716002580100100800001008000750018398870400154004240042299617299948022120080016200240048400424004011802011009910010080000800001008000003400800020008000023400111512281688400390800001004004340043400414004340052
80204400423000000030024003501602580100100800001008000650018393780400264004240040299617299948010620080016200240048400514004211802011009910010080000800001008000003400800000028001223400111512281688400390800001004004140043400414004340041
802044004029900000900240027161602580100100800001008000750018393780400174004240048299597300028010720080016200240048400404004011802011009910010080000800001008000003400800020008000203400111512281688400390800001004004340043400434004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03070b181e1f223f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025400423000000304002716160258001010800001080000501839784040015400404004029984330020800102080000202400004004240042118002110910108000080000108000003480002028000023450322162240039080000104004140041400514004340043
8002440040300000031400251616025800101080000108000050183935204001740040400422997733002280010208000020240000400424004211800211091010800008000010800000080002028000223450302162240046080000104005040041400514004140041
800244004030000000040027016025800101080000108000050183942404002440042400422997733003180010208000020240000400404004011800211091010800008000010800000080002008000003450322162240047080000104004140043400414004340043
800244004030000003040027016025800101080000108000050183942404001740040400422997733002280010208000020240000400504004211800211091010800008000010800000080000008000023450302162240039080000104005140041400514004340043
8002440042299000901400351616025800101080000108000050183935204001540040400402997533002080010208000020240000400424004011800211091010800008000010800000348000002800020050282162240048080000104004340043400414004340041
800244004030000000140025160025800101080000108000050183935204001540042400402997733002080010208000020240000400494004211800211091010800008000010800000348000208800002050302162240037080000104004340041400414004140041
8002440042300000090400270002580010108000010800005018394240400154005040042299753300228001020800002024000040049400421180021109101080000800001080000008000202800000050303162240037080000104004340050400414004340041
800244004030000003040027160025800101080000108000050183942404002540042400422997533003180010208000020240000400404004011800211091010800008000010800000080000008000223450322162240039080000104004140043400504004340050
800244004230000003040027160025800101080000108000050183942404001540042400422997533002080010208000020240000400424004211800211091010800008000010800000348000202800022050302162240039080000104004340041400434004340043
8002440049300000031400271616025800101080000108000050183935204001540040400422997533003080010208000020240000400424004211800211091010800008000010800000348000202800020050302162240039080000104004340041400434004140043