Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STR (register, sxtw, Q)

Test 1: uops

Code:

  str q0, [x6, w7, sxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)031e1f223f46494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a2a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
100554240715281616125100010001000224720517543540355341210001000300054254211100110001000100042100202100224273116115371000541543544543543
10045424030528160025100010001000224720515543542355340110001000300054254211100110001000100044100202100224273116115401000544544543543543
1004554403152716002510001000100022448051754254235534001000100030005425421110011000100010000100202100224273116115401000543543555555543
10045424030527016025100010001000224481517542543356340010001000300054354311100110001000100044100202100224273116115401000543543564564543
100454240305271616125100010001000224720518543543353340010001000300054254011100110001000100044100002100224273116115391000544544555543543
10045424030527016525100010001000224480517542540355340010001000300054354211100110001000100044100202100224273116115391000555543555548543
10045423030527160025100010001000224481517543543355340010001000300054254211100110001000100044100202100024273116115391000543543543543543
1004542400152716161251000100010002244805175425543553398100010003000542542111001100010001000441002023100204273116115391000543543555555541
10045424030527161612510001000100022472051754354335534001000100030005425421110011000100010000100202100224273116115371000543543555555543
100454240305251616025100010001000224480517542554355340010001000300054254211100110001000100044100202100224273116115371000543555555564541

Test 2: throughput

Count: 8

Code:

  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03081e1f223f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a7a8acafbcdcache store miss (c0)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
802054004230000314002700425801001008000010080006500183945504002640042400422996172999280107200800162002400484004040040118020110099100100800008000010080000080002028000223411151182163340048800001004004340041400434004340051
802044004030003304003416160258010010080000100800075001839378040015400424004229961730001801062008001620024004840040400401180201100991001008000080000100800003480002028000203411151185164440039800001004004340043400434004340051
80204400422990001400251600258010010080000100800065001839455040015400404004029961729992801062008001620024004840042400421180201100991001008000080000100800003480002058000003411151184163340039800001004005240043400414004340052
802044004230000904002716160258010010080000100800075001839455140015400404004229961730003801062008001620024004840042400421180201100991001008000080000100800003480002028000003411151184164440048800001004004340043400414004140041
8020440040300003140027160025801001008000010080007500183945514001540050400422996172999480107200800162002400484005040042118020110099100100800008000010080000080002028000023411151184164340047800001004004140043400414004340041
802044004229900014003416160258010010080000100800065001839455140017400424004929961729992801062008001620024004840043400421180201100991001008000080000100800003480002028000023411151183163440046800001004005040041400514004140043
8020440042299000040035160025801001008000010080007500183945504002640042400402996172999480107200800162002400484004240040118020110099100100800008000010080000080002028000023411151183163440037800001004004340041400434005140043
802044004929900914002516160258010010080000100800075001839455040017400404004029961232999280107200800162002400484005040042118020110099100100800008000010080000348000208800022011151182163440039800001004004140041400414004140043
802044004030000904002701602580100100800001008000750018394550400154004240042299617299948010620080016200240048400514005011802011009910010080000800001008000008000202800022011151184164440037800001004004340043400434004340041
802044004230000014002716160258010010080000100800065001839378040017400514004229969729992801072008001620024004840040400421180201100991001008000080000100800003480002028000203411151184164440048800001004004340043400434004140043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5006

retire (01)cycle (02)0305080b1e1f22233a3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025400543001000170014004016165258001010800001080000501840004004002840054400632998933004380010208000020240000400544005411800211091010800008000010800141644018001601178000216441415020316114006180000104004840053400554005240055
8002440047300110017001400421616225800101080000108000050184000430400294005240054299893300278001020800002024000040053400471180021109101080000800001080015164400800160216800021601415020116114004980000104005540053400554005340055
800244005230011101410140032150225800101080000108000050184000400400284005440063299893300318001020800002024000040054400631180021109101080000800001080000042008000200080002242005020116114003980000104005540053400554004840055
80024400523001000190014004816012580010108000010800005018400040040022400524005429989330034800102080000202400004005340047118002110910108000080000108001514001800160119800021601425020116114006080000104005540052400554004840055
8002440047300111991710140042000258001010800001080000501839934004002940064400522998733003480010208000020240000400484005211800211091010800008000010800000420080002600580002242005020116114003980000104004340041400434004340044
800244004230000003000400331616025800101080000108000050183944800400154004040040299773300228001020800002024000040043400421180021109101080000800001080000042008000200280000242005020116114003980000104004340044400444004340041
8002440042300000030004002816160258001010800001080000501839448004001740042400422997733002280010208000020240000400424004211800211091010800008000010800151642018007601280002242005020116114003980000104005540064400554005340053
8002440054300110020001400271616025800101080000108000050183947200400184004340042299783300228001020800002024000040042400421180021109101080000800001080000042008000200280002242005020116114003980000104004340043400414004340043
8002440042300000030004004000025800101080000108000050183990800400294005240047299873300348001020800002024000040047400531180021109101080000800001080016164410800160019800001401415020116114005180000104004140044400414004340044
800244004229900003100400501616225800101080000108000050184050800400294005440063299993300328001020800002024000040047400481180021109101080000800001080000042008000200280002242005020116114003980000104006540055400644005540055