Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STR (register, sxtw, D)

Test 1: uops

Code:

  str d0, [x6, w7, sxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)031e1f22243f46494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a2a4a7a8a9acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
100554240010527161612510001000100022472051854354235534001000100030005425421110011000100010004201002002100204273216115401000543541543541543
100454249310528161602510001000100022448051754254235334001000100030005405421110011000100010004201002002100024273116115391000544544543543543
100454240300528161602510001000100022352051754254235334011000100030005405431110011000100010004201002002100224273116115391000543543543543541
10045424030052816012510001000100022448051754254335334001000100030005425541110011000100010004201002002100224273116115401000541543543544544
100454330710527161612510001000100022448051754254235534001000100030005425421110011000100010004201002012100224273116115401000544544544544544
100454340710527161612510001000100022448051754254235634001000100030005405421110011000100010004201002002100024273116115391000555543543543543
100454250310527161612510001000100022448051754255435534001000100030005425421110011000100010004201002002100004273116115511000585637543543543
100454240300539161602510001000100022448051554054235534001000100030005405421110011000100010004201002002100224273116115391000541541543543543
100454240300527161612510001000100022448051755454235334001000100030005425421110011000100010004201002002100024273116115401000543543541543543
100454240310527161602510001000100022472051754354335634001000100030005435421110011000100010004201002005100224273116115391000544544544544544

Test 2: throughput

Count: 8

Code:

  str d0, [x6, w7, sxtw]
  str d0, [x6, w7, sxtw]
  str d0, [x6, w7, sxtw]
  str d0, [x6, w7, sxtw]
  str d0, [x6, w7, sxtw]
  str d0, [x6, w7, sxtw]
  str d0, [x6, w7, sxtw]
  str d0, [x6, w7, sxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5007

retire (01)cycle (02)030508090b18191e1f22233a3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd6e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
802054005430010010001800140039161632580100100800001008000750018397180400384005340054299737300068010620080016200240048400524005411802011009910010080000800001008001514440080016211680002164414011151181640051800001004005540053400544006140054
802044005429910010001710140032161662580100100800001008000750018400540400274005440054299737300008010720080016200240048400544005411802011009910010080000800001008001415440180016202480000164414011151181640044800001004019340053400584006440053
80204402023011020106371101400391616225801001008000010080006500183998704002940052400472997273000680106200800162002400484005240054118020110099100100800008000010080077144401800161187480002144414011151181640051800001004005540053400544006540053
802044005430011110061800140039161622580100100800001008000650018400350400384005340054299737300068010620080016200240048400544005411802011009910010080000800001008001614460180016001980002164214211151181640051800001004005440055400584005540055
8020440054300100100019101400391502258010010080000100800075001840035040029400474005429973730006801062008001620024004840053400541180201100991001008000080000100800161444028001611238000216014211151181640044800001004005540053400734006440053
8020440054300110000015001400381601258010010080000100800075001840054040027400544005429973730015801062008001620024004840054400541180201100991001008000080000100800151744018001600198006216014011151181640049800001004005340055402034005540191
80204400522991212020200014003201512580100100800001008000650018400540400234005440054299737300068010620080016200240048400544005411802011009910010080000800001008001615440080016411780002144414111151181640049800001004005540055400554005440055
802044005130011110001800140039161632580100100800001008000650018397180400274005440054299737300158010720080016200240048400474005411802011009910010080000800001008001414440180016001680002164414111151181640044800001004005540053400584006540053
8020440054300101000021101400390163258010010080000100800075001839959040029400634005229971730006801072008001620024004840063400511180201100991001008000080000100800151600180016002180000164414211151181640051800001004005340055400684005640055
80204400473001110000210014003916164258010010080000100800075001839959040029400634005329971729999801072008001620024004840063400471180201100991001008000080000100800161400180016001480002164414211151181640052800001004005540053400654005540053

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)0307080a18191e1f2223243a3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)c2cficache miss (d3)d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002540042300000000300004002701602580010108000010800005018393521400174004040042299773300228001020800002024000040040400401180021109101080000800001080000000080000002800022340502002316222240048080000104004340181401804004340182
8002440319311000000300004002516160258001010800001080000501839352140017400424004229977330022800102080000202400004004240042118002110910108000080000108000003400800020011800022340502002216112240039080000104004340052400434005140043
8002440040299000000310004002516002580010108000010800005018394721400174004240042299773300228001020800002024000040040400401180021109101080000800001080000034008000200880000234050200916222240039080000104004140041400414005240043
80024400422990000015661000400270160258001010800001080000501839808140024400424004229977330022800102080000202400004004240042118002110910108000080000108000000008000210080000000502002216221140047080000104020240043400434004140041
800254005029900000001000400271616025800101080000108000050183935214001540049400492998533002280010208000020240000400504004011800211091010800008000010800000000800020008000220050200221692240039080000104004340043400434004340041
80024400423000000000000040025160025800101080000108000050183985614002640042400422997533002280010208000020240000400424004011800211091010800008000010800000340080002005800022340502002216112240039080000104004140050400434004340043
8002440042300000000000004002716002580010108000010800005018393521400154004240040299773300228001020800002024000040042400421180021109101080000800001080000034008000200880000200502002216112240037080000104004340043400414004340041
8002440049299000000600004003516160370806101280600108108050188782614060741418412903081319831747809822081815202458084208641847141800211091010800008000010809020344333810220212578809622340533302996223241462080000104226141570421124231942251
80024425443162111717225315871001423721601049565810901081020108162050190874214131840040400492998533002280010208000020240000400404004211800211091010800008000010800000340080000003280002200502001016112240039080000104004340050400434005140043
800244004230000000030000400250160258001010800001080000501839424140015400424004929975330029800102080000202404204004240051118002110910108000080000108000003400800000015080002234050200221619940047080000104004140051400434004340043