Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (register, uxtw, Q)

Test 1: uops

Code:

  str q0, [x6, w7, uxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e1f223f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100554240315271616125100010001000224485175425423553400100010003000542543111001100010001000420100202100224273216225391000555544543544543
100454240315271616025100010001000224725175425433563400100010003000543542111001100010001000420100202100224273216225401000543543543543543
1004542333315281616125100010001000224485175425423553401100010003000542542111001100010001000420100202100224273216225391000543543543543543
100454240315281616125100010001000224485175545423553400100010003000543542111001100010001000420100202100224273216225391000544543543544543
100454240315271616025100010001000224485175425423553401100010003000542542111001100010001000420100212100224273216225401000543543555544555
100454240315281616125100010001000224485175425423553400100010003000542543111001100010001000420100202100224273216225391000544543544543544
100454340315271616025100010001000224485175425423553401100010003000542542111001100010001000420100202100224273216225401000543543543543543
100454240315271616125100010001000224725175425423563401100010003000542542111001100010001000420100202100224273216225391000544543544543544
100454340315281616125100010001000224485175425423673400100010003000543542111001100010001000420100202100224273216225391000543544543543544
100454340315271616025100010001000224725295425433563400100010003000542543111001100010001000420100202100224273216225401000543543543543543

Test 2: throughput

Count: 8

Code:

  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  str q0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e1f22243f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020540050300031040025016025801001008000010080007500183945514001704005240040299597299928010720080016200240048400424004211802011009910010080000800001008000003480000008000223401115118616400480800001004004340041400434004340041
80204400423003331040035160025801001008000010080007500183937814002504021240189299617299948010720080016200240048400424004211802011009910010080000800001008000003480002008000223401115118016400370800001004004340041400434004140043
8020440042300001040025016025801001008000010080007500183937814002504004040040299617299948010620080137200240048400424004211802011009910010080000800001008000003480002028000223601115118016400390800001004004340043400414004340050
80204400403003600040027160025801001008000010080006500183937814001704004240040299717299948010720080016200240048400514004211802011009910010080000800001008000003480002028000223601115118016400370800001004004340050400434004140043
802044004030024300400270160258010010080000100800065001839378140017040042400402995973000280107200800162002400484004040040118020110099100100800008000010080000008000002800022001115118016400470800001004004140043400524004340043
8020440040300369004002701602580100100800001008000750018394551400170400474004029961729992801072008001620024004840049400401180201100991001008000080000100800000080002028000223401115118016400390800001004004340041400434005040041
80204400503001231040036161602580100100800001008000650018394551400150400584004229961729992801062008001620024004840042400401180201100991001008000080000100800000348000205800002001115118016400390800001004004140041400414005040043
80204400423004531040025161602580100100800001008000650018394551400250400424004929959729994801072008001620024004840042400421180201100991001008000080000100800000348000202800002001115118016400390800001004004340043400434004340052
8020440042300393104002716002580100100800001008000750018394551400170400424004029959729992801062008001620024004840042400401180201100991001008000080000100800000348000002800000001115118016400390800001004004140052400414005240043
802044012330000104002501622580100100800001008000750018397911400170400424004029959729994801062008001620024004840042400501180201100991001008000080000100800000080000008000223401115118016400390800001004004340051400414005040043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002540042299423102400271616025800101080000108000050183935214001504004040042299773300208001020800002024000040043400431180021109101080000800001080000420080002028000204205024131615154003980000104004340043400414004140043
80024400403002583002400271616025800101080000108000050183944814001704004240042299783300228001020800002024000040040400431180021109101080000800001080000420080002008000204205024151614134004080000104004140044400434004340044
80024400403007270024002716012580010108000010800005018394481400170400404004229977330022800102080000202400004004240040118002110910108000080000108000042008000202800000005024171615174003980000104004140043400444004140043
8002440040299300024003916002580010108000010800005018400001400170400424004229977330022800102080000202400004004040042118002110910108000080000108000000080000028000204205024161616144004080000104004440041400434004340043
80024400423009901024002716002580010108000010800005018393521400170400424004229977330022800102080000202400004005440042118002110910108000080000108000042008000202800002005024181614174003780000104004140043400414004440043
80024400423001110102400271616025800101080000108000050183944814014604004240042299773300228001020800002024000040042400421180021109101080000800001080000420080002028000204205024171617144003780000104004340044400414004340041
800244004029972000240028160025800101080000108000050183944814001704004240042299783300228001020800002024000040043400431180021109101080000800001080000000800020280000000502414169184004080000104004440041400434004440041
8002440043300357000240025016140800101080000108000050183944814001704004040042299893300208001020800002024000040042400401180021109101080000800001080000420080002008000024205024131614164004080000104004440043400434004440043
8002440040300480102400250002580010108000010800005018400001400150400424004229977330020800102080000202400004004240040118002110910108000080000108000000080000028000224205024131618164003980000104004140044400434004340043
80024400423008701024002801602580010108000010800005018394481400150400424004029977330022800102080000202400004004240042118002110910108000080000108000042008000000800000005024171618144003980000104004340044400444004340044