Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STR (register, uxtw, Q)

Test 1: uops

Code:

  str q0, [x6, w7, uxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)031e1f223f46494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a2a4a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
100554240315271616125100010001000224485175425423553400100010003000542543111001100010001000420100202100224273216225391000555544543544543
100454240315271616025100010001000224725175425433563400100010003000543542111001100010001000420100202100224273216225401000543543543543543
1004542333315281616125100010001000224485175425423553401100010003000542542111001100010001000420100202100224273216225391000543543543543543
100454240315281616125100010001000224485175545423553400100010003000543542111001100010001000420100202100224273216225391000544543543544543
100454240315271616025100010001000224485175425423553401100010003000542542111001100010001000420100212100224273216225401000543543555544555
100454240315281616125100010001000224485175425423553400100010003000542543111001100010001000420100202100224273216225391000544543544543544
100454340315271616025100010001000224485175425423553401100010003000542542111001100010001000420100202100224273216225401000543543543543543
100454240315271616125100010001000224725175425423563401100010003000542542111001100010001000420100202100224273216225391000544543544543544
100454340315281616125100010001000224485175425423673400100010003000543542111001100010001000420100202100224273216225391000543544543543544
100454340315271616025100010001000224725295425433563400100010003000542543111001100010001000420100202100224273216225401000543543543543543

Test 2: throughput

Count: 8

Code:

  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)031e1f22243f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a7a8acafbcdcache store miss (c0)c2c5branch mispredict (cb)cdcfd5d6e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020540050300031040025016025801001008000010080007500183945514001704005240040299597299928010720080016200240048400424004211802011009910010080000800001008000003480000008000223401115118616400480800001004004340041400434004340041
80204400423003331040035160025801001008000010080007500183937814002504021240189299617299948010720080016200240048400424004211802011009910010080000800001008000003480002008000223401115118016400370800001004004340041400434004140043
8020440042300001040025016025801001008000010080007500183937814002504004040040299617299948010620080137200240048400424004211802011009910010080000800001008000003480002028000223601115118016400390800001004004340043400414004340050
80204400403003600040027160025801001008000010080006500183937814001704004240040299717299948010720080016200240048400514004211802011009910010080000800001008000003480002028000223601115118016400370800001004004340050400434004140043
802044004030024300400270160258010010080000100800065001839378140017040042400402995973000280107200800162002400484004040040118020110099100100800008000010080000008000002800022001115118016400470800001004004140043400524004340043
8020440040300369004002701602580100100800001008000750018394551400170400474004029961729992801072008001620024004840049400401180201100991001008000080000100800000080002028000223401115118016400390800001004004340041400434005040041
80204400503001231040036161602580100100800001008000650018394551400150400584004229961729992801062008001620024004840042400401180201100991001008000080000100800000348000205800002001115118016400390800001004004140041400414005040043
80204400423004531040025161602580100100800001008000650018394551400250400424004929959729994801072008001620024004840042400421180201100991001008000080000100800000348000202800002001115118016400390800001004004340043400434004340052
8020440042300393104002716002580100100800001008000750018394551400170400424004029959729992801062008001620024004840042400401180201100991001008000080000100800000348000002800000001115118016400390800001004004140052400414005240043
802044012330000104002501622580100100800001008000750018397911400170400424004029959729994801062008001620024004840042400501180201100991001008000080000100800000080000008000223401115118016400390800001004004340051400414005040043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)031e1f22233a3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a4a6a7a8acafbcdcache store miss (c0)c5cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002540042299423102400271616025800101080000108000050183935214001504004040042299773300208001020800002024000040043400431180021109101080000800001080000420080002028000204205024131615154003980000104004340043400414004140043
80024400403002583002400271616025800101080000108000050183944814001704004240042299783300228001020800002024000040040400431180021109101080000800001080000420080002008000204205024151614134004080000104004140044400434004340044
80024400403007270024002716012580010108000010800005018394481400170400404004229977330022800102080000202400004004240040118002110910108000080000108000042008000202800000005024171615174003980000104004140043400444004140043
8002440040299300024003916002580010108000010800005018400001400170400424004229977330022800102080000202400004004040042118002110910108000080000108000000080000028000204205024161616144004080000104004440041400434004340043
80024400423009901024002716002580010108000010800005018393521400170400424004229977330022800102080000202400004005440042118002110910108000080000108000042008000202800002005024181614174003780000104004140043400414004440043
80024400423001110102400271616025800101080000108000050183944814014604004240042299773300228001020800002024000040042400421180021109101080000800001080000420080002028000204205024171617144003780000104004340044400414004340041
800244004029972000240028160025800101080000108000050183944814001704004240042299783300228001020800002024000040043400431180021109101080000800001080000000800020280000000502414169184004080000104004440041400434004440041
8002440043300357000240025016140800101080000108000050183944814001704004040042299893300208001020800002024000040042400401180021109101080000800001080000420080002008000024205024131614164004080000104004440043400434004440043
8002440040300480102400250002580010108000010800005018400001400150400424004229977330020800102080000202400004004240040118002110910108000080000108000000080000028000224205024131618164003980000104004140044400434004340043
80024400423008701024002801602580010108000010800005018394481400150400424004029977330022800102080000202400004004240042118002110910108000080000108000042008000000800000005024171618144003980000104004340044400444004340044