Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STR (register, uxtw, S)

Test 1: uops

Code:

  str s0, [x6, w7, uxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b1e1f22233a3f46494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
1005543411110190015391616125100010001000230040526554554376341010001000300055455311100110001000101614440010160119100216014073316335371000543543543543543
100454240000031005271616025100010001000224480517542542367340010001000300055454211100110001000100004200101602161002164414073316335511000553565555555555
10045544111032100153716161251000100010002300405385545473683409100010003000555553111001100010001014144401101600181002164414273316335511000553555555548555
10045544101101900153916162251000100010002288405295545633673412100010003000554552111001100010001016144400101600171002164414073316235511000555555553553549
10045644110001900153916161251000100010002302805295525543653412100010003000552554111001100010001014144400101600161002164414073316335511000553553564555556
1004554410100180015391616125100010001000226920526554554376341010001000300055455311100110001000101414000101400161002164414173316335491000555555553553553
10045634111001700153816165251000100010002290805295545483673412100010003000554552111001100010001014164401101602171000164214173216335511000555555553552553
10045634100001700153716166251000100010002293305225525543673412100010003000553554111001100010001015154410101602141002164414073316335511000555553552564564
10045544100101910153716166251000100010002293305295525543673412100010003000554554111001100010001015144402101400181002164414073316335481000553553564555556
10045544100101900153916150251000100010002300405385545513673406100010003000554552111001100010001015144301101600161000164414173316335491000555548553564555

Test 2: throughput

Count: 8

Code:

  str s0, [x6, w7, uxtw]
  str s0, [x6, w7, uxtw]
  str s0, [x6, w7, uxtw]
  str s0, [x6, w7, uxtw]
  str s0, [x6, w7, uxtw]
  str s0, [x6, w7, uxtw]
  str s0, [x6, w7, uxtw]
  str s0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)0318191e1f2223243f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205400423000003100400271616025801001008000010080006500183937804001804004240042299627299948010620080016200240048400404004011802011009910010080000800001008000004200800020028000020011151181160140043800001004004340043400444004140044
80204400433000003000400270160258010010080000100800065001839474140017340042400422997372999480106200800162002400484004240040118020110099100100800008000010080000042008000200580002242011151180160040041800001004004440044400434004140043
8020440040300000000040025160025801001008000010080006500183947414001704004240040299617299928010620080016200240048400434004311802011009910010080000800001008000000008000000580002242011151180160040041800001004004440041400434004340043
80204400422990003100400271600258010010080000100800065001839503140015040040400422995210299808011220080022200240066400404004211802011009910010080000800001008000004200800021008000020022251281231140039800001004004340180403224019540184
80204400402990063000400270002580100100800001008001250018394121400150400424004229952102998280112200800222002400664004040040118020110099100100800008000010080000042008000200680002242022251281231140041800001004004140043400444004340044
8020440042300000300040028161602580100100800001008001350018394121400170400424004229953102998280113200800222002400664004240040118020110099100100800008000010080000042008000230580002242022251281231140040800001004004440041400434004340043
80204400423000000000400271601258010010080000100800125001839429140015040043400432995210299828011320080022200240066400434004311802011009910010080000800001008000000008000200080002242022251281231140037800001004012340041400444004340046
8020440042300000310040027161602580100100800001008001250018394121400170400424012229964102998080112200800222002400664004040040118020110099100100800008000010080000042008000200280002242022251281231140037800001004004140043400434004340043
80204400423000000000400271600258010010080000100800125001839412040017040042400422995210299808011220080022200240066400404004211802011009910010080000800001008000004200800000028000220022251281231240037800001004004340043400534004140043
802044004230000070004002801602580100100800001008001250018394121400150400404004329952102998380112200800222002400664004340043118020110099100100800008000010080000042008000000280002042022251281231240039800001004004140044400414004340041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)030818191e1f22243f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a4a6a7a8a9acafbcdcache store miss (c0)c2c5cfd5d6dadbddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025400423000000310400271616025800101080000108000050183935204001540043400432997533002080010208000020240000400424004211800211091010800008000010800000008000200080002242005020516004340040080000104004340044400434004340043
8002440054300000031040027160025800101080000108000050183944804001740042400422997733002280010208000020240000400424004211800211091010800008000010800004200800020028000020005020316003440039080000104004340043400414004140044
80024400403000000310400271616125800101080000108000050183935204001740042400422997733002280010208000020240000400424004211800211091010800008000010800004200800020028000200005020316004440040080000104004440053400414004140043
8002440043299000030040027161622580010108000010800005018394480400174004240042299773300228001020800002024000040042400421180021109101080000800001080000000800020008000020005020316004440039080000104004340043400434004140043
800244004230000000004002516160258001010800001080000501840000040015400424004229989330022800102080000202400004004040040118002110910108000080000108000042008000000280000242005020416004440039080000104004140041400434004340043
80024400543000000300400251616025800101080000108000050183944804001540042400422997533002280010208000020240000400404004211800211091010800008000010800004200800000008000020005020416003440039080000104004340041400414004340043
80024400423000000310400250160258001010800001080000501839448040015400424004029977330020800102080000202400004004240042118002110910108000080000108000042008000200080002042005020416004440039080000104004340041400414004140041
800244005430000003004002501602580010108000010800005018393521400154004240040299753300228001020800002024000040042400401180021109101080000800001080000000800000008000020005020416004440037080000104004140043400414004140043
800244004030000000004002800125800101080000108000050183947204001840042400422997533002080010208000020240000400424004011800211091010800008000010800000008000010080002042005020416003440039080000104004340055400434004140043
800244004030000003004002716160258001010800001080000501839448040017400404004229987330022800102080000202400004004240040118002110910108000080000108000042008000200280002242005020416003340037080000104004140043400444004340044