Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STR (register, uxtw, D)

Test 1: uops

Code:

  str d0, [x6, w7, uxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)031e1f223f46494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a2a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
1005550400152516160251000100010002235251754054235333981000100030005425401110011000100010000100000100223473116115371000541543541541541
1004550403053616160251000100010002235251554254935333981000100030005425401110011000100010003410020210022073116115371000541543543543543
1004540403152701602510001000100022424515540540355340810001000300054254011100110001000100034100200100203473116115481000543543543543541
100454040305271600251000100010002242451754254035533981000100030005405421110011000100010000100205100223473116115391000543550551551541
1004542403052716160251000100010002242451754254235533981000100030005425421110011000100010000100000100223473116115391000541543543543543
10045424030535161602510001000100022352515542551355339810001000300054254211100110001000100034100200101223473116115481000543543543543541
1004540403052701602510001000100022424517542542353340010001000300055154011100110001000100034100000100203473116115371000541543543543543
1004542400152716002510001000100022352517542542353340010001000300054254211100110001000100034100002100203473116115471000543543543543541
100455040305271600251000100010002242451754854035333981000100030005405421110011000100010003410020810020073116115471000543543543541541
1004550300052701602510001000100022352525540540353339810001000300054254011100110001000100034100200100203473116115461000541541543543541

Test 2: throughput

Count: 8

Code:

  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)031e1f223f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a4a6a7a8acafbcdcache store miss (c0)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205400423002401400270160258010010080000100800075001839378140017400424005129961729994801072008001620024004840042400421180201100991001008000080000100800003400800000280000234011151180160040037800001004004340041400414004340052
802044004229915631400271616025801001008000010080007500183937814001540051400402995972999480107200800162002400484004040042118020110099100100800008000010080000000800020280000034011151181160040037800001004004140052400414004140041
80204400423001203140027016025801001008000010080007500183945514001740040400492996972999280106200800162002400484004040040118020110099100100800008000010080000340080002008000020011151180160040039800001004005140041400414004340041
8020440042299031400271616025801001008000010080007500183945504001540040400422996173000280106200800162002400484004240051218020110099100100800008000010080000000800000280002234011151180160040039800001004005140041400514004340041
8020440042299000400251600258010010080000100800075001839378140015400424004229961729994801072008001620024004840042400421180201100991001008000080000100800003400800020080002234011151180160040037800001004004140041400434004340043
8020440042300108014002716160258010010080000100800075001839455140024400424004029959729992801072008001620024004840042400421180201100991001008000080000100800003400800000280000234011151180160040039800001004005140041400414004140050
80204400423001119140027160025801001008000010080013500183938104001540042400422995210299828011320080022200240066400424004011802011009910010080000800001008000000080002028000020022251281231140039800001004004340043400434004140052
8020440042300441614002701602580100100800001008001250018393811400264004240040299501029980801122008002220024006640051400401180201100991001008000080000100800003400800000080000034022251281231140047800001004005240043400524004340043
8020440042300999140027161602580100100800001008001250018393161400174004040042299521029990801132008002220024006640042400401180201100991001008000080000100800003400800020280002234022251281231140037800001004005140041400514004340043
8020440042300138914003516160258010010080000100800135001839381140015400514005029960102998280112200800222002400664004940049118020110099100100800008000010080000029080002028000220022251281231140037800001004004340043400434005240043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)0318191e1f223f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ebld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025400423000000140027161602580010108000010800005018394484001740042400422997733002480010208000020240000400424004011800211091010800008000010800000420800000280002242502031642400391080000104004140043400434004440041
800244004329900030400281601258001010800001080000501839352400154004040042299783300208001020800002024000040054400401180021109101080000800001080000000800020080002242502041624400510080000104004340043400414004440044
8002440043299000314002716161258001010800001080000501839448400184004340040299773300208001020800002024000040043400431180021109101080000800001080000042080000028000000502041623400370080000104004340043400414004340043
8002440042299000304002516160258001010800001080000501839448400154004040040299773300258001020800002024000040040400421180021109101080000800001080000042080000028001220502021643400390080000104004340043400444004440044
80024400433000000040025161602580010108000010800005018394484001740042400402997733002580010208000020240000400424004011800211091010800008000010800000420800020080000242502021622400390080000104004140041400434004340043
800244004030000031400250160258001010800001080000501839448400154004040042299753300228001020800002024000040042400421180021109101080000800001080000000800001280002042502041622400390080000104004340041400434004340043
8002440040299000314002716160258001010800001080000501839448400154004040042299773300278001020800002024000040042400401180021109101080000800001080000042080002008000220502021623400390080000104004140041400434004340043
80024400423000003140027161602580010108000010800005018400004001740042400402998933002080010208000020240000401224004211800211091010800008000010800000420800020280000242502021624400370080000104004340041400434004340043
80024400423000103140028161612580010108000010800005018394484001740042400402997733002480010208000020240000400404004211800211091010800008000010800000420800020080002242502021622400370080000104004340041400444004140044
80024400432990000140028161612580010108000010800005018394484001740040400422997533002280010208000020240000400404004011800211091010800008000010800000420800000080002242502021633400370080000104004340043400434004140041