Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (register, uxtw, D)

Test 1: uops

Code:

  str d0, [x6, w7, uxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e1f223f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005550400152516160251000100010002235251754054235333981000100030005425401110011000100010000100000100223473116115371000541543541541541
1004550403053616160251000100010002235251554254935333981000100030005425401110011000100010003410020210022073116115371000541543543543543
1004540403152701602510001000100022424515540540355340810001000300054254011100110001000100034100200100203473116115481000543543543543541
100454040305271600251000100010002242451754254035533981000100030005405421110011000100010000100205100223473116115391000543550551551541
1004542403052716160251000100010002242451754254235533981000100030005425421110011000100010000100000100223473116115391000541543543543543
10045424030535161602510001000100022352515542551355339810001000300054254211100110001000100034100200101223473116115481000543543543543541
1004540403052701602510001000100022424517542542353340010001000300055154011100110001000100034100000100203473116115371000541543543543543
1004542400152716002510001000100022352517542542353340010001000300054254211100110001000100034100002100203473116115471000543543543543541
100455040305271600251000100010002242451754854035333981000100030005405421110011000100010003410020810020073116115471000543543543541541
1004550300052701602510001000100022352525540540353339810001000300054254011100110001000100034100200100203473116115461000541541543543541

Test 2: throughput

Count: 8

Code:

  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  str d0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e1f223f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205400423002401400270160258010010080000100800075001839378140017400424005129961729994801072008001620024004840042400421180201100991001008000080000100800003400800000280000234011151180160040037800001004004340041400414004340052
802044004229915631400271616025801001008000010080007500183937814001540051400402995972999480107200800162002400484004040042118020110099100100800008000010080000000800020280000034011151181160040037800001004004140052400414004140041
80204400423001203140027016025801001008000010080007500183945514001740040400492996972999280106200800162002400484004040040118020110099100100800008000010080000340080002008000020011151180160040039800001004005140041400414004340041
8020440042299031400271616025801001008000010080007500183945504001540040400422996173000280106200800162002400484004240051218020110099100100800008000010080000000800000280002234011151180160040039800001004005140041400514004340041
8020440042299000400251600258010010080000100800075001839378140015400424004229961729994801072008001620024004840042400421180201100991001008000080000100800003400800020080002234011151180160040037800001004004140041400434004340043
8020440042300108014002716160258010010080000100800075001839455140024400424004029959729992801072008001620024004840042400421180201100991001008000080000100800003400800000280000234011151180160040039800001004005140041400414004140050
80204400423001119140027160025801001008000010080013500183938104001540042400422995210299828011320080022200240066400424004011802011009910010080000800001008000000080002028000020022251281231140039800001004004340043400434004140052
8020440042300441614002701602580100100800001008001250018393811400264004240040299501029980801122008002220024006640051400401180201100991001008000080000100800003400800000080000034022251281231140047800001004005240043400524004340043
8020440042300999140027161602580100100800001008001250018393161400174004040042299521029990801132008002220024006640042400401180201100991001008000080000100800003400800020280002234022251281231140037800001004005140041400514004340043
8020440042300138914003516160258010010080000100800135001839381140015400514005029960102998280112200800222002400664004940049118020110099100100800008000010080000029080002028000220022251281231140037800001004004340043400434005240043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)0318191e1f223f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025400423000000140027161602580010108000010800005018394484001740042400422997733002480010208000020240000400424004011800211091010800008000010800000420800000280002242502031642400391080000104004140043400434004440041
800244004329900030400281601258001010800001080000501839352400154004040042299783300208001020800002024000040054400401180021109101080000800001080000000800020080002242502041624400510080000104004340043400414004440044
8002440043299000314002716161258001010800001080000501839448400184004340040299773300208001020800002024000040043400431180021109101080000800001080000042080000028000000502041623400370080000104004340043400414004340043
8002440042299000304002516160258001010800001080000501839448400154004040040299773300258001020800002024000040040400421180021109101080000800001080000042080000028001220502021643400390080000104004340043400444004440044
80024400433000000040025161602580010108000010800005018394484001740042400402997733002580010208000020240000400424004011800211091010800008000010800000420800020080000242502021622400390080000104004140041400434004340043
800244004030000031400250160258001010800001080000501839448400154004040042299753300228001020800002024000040042400421180021109101080000800001080000000800001280002042502041622400390080000104004340041400434004340043
8002440040299000314002716160258001010800001080000501839448400154004040042299773300278001020800002024000040042400401180021109101080000800001080000042080002008000220502021623400390080000104004140041400434004340043
80024400423000003140027161602580010108000010800005018400004001740042400402998933002080010208000020240000401224004211800211091010800008000010800000420800020280000242502021624400370080000104004340041400434004340043
80024400423000103140028161612580010108000010800005018394484001740042400402997733002480010208000020240000400404004211800211091010800008000010800000420800020080002242502021622400370080000104004340041400444004140044
80024400432990000140028161612580010108000010800005018394484001740040400422997533002280010208000020240000400404004011800211091010800008000010800000420800000080002242502021633400370080000104004340043400434004140041