Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (register, sxtw, Q)

Test 1: uops

Code:

  str q0, [x6, w7, sxtw]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e1f223f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100554240715281616125100010001000224720517543540355341210001000300054254211100110001000100042100202100224273116115371000541543544543543
10045424030528160025100010001000224720515543542355340110001000300054254211100110001000100044100202100224273116115401000544544543543543
1004554403152716002510001000100022448051754254235534001000100030005425421110011000100010000100202100224273116115401000543543555555543
10045424030527016025100010001000224481517542543356340010001000300054354311100110001000100044100202100224273116115401000543543564564543
100454240305271616125100010001000224720518543543353340010001000300054254011100110001000100044100002100224273116115391000544544555543543
10045424030527016525100010001000224480517542540355340010001000300054354211100110001000100044100202100224273116115391000555543555548543
10045423030527160025100010001000224481517543543355340010001000300054254211100110001000100044100202100024273116115391000543543543543543
1004542400152716161251000100010002244805175425543553398100010003000542542111001100010001000441002023100204273116115391000543543555555541
10045424030527161612510001000100022472051754354335534001000100030005425421110011000100010000100202100224273116115371000543543555555543
100454240305251616025100010001000224480517542554355340010001000300054254211100110001000100044100202100224273116115371000543555555564541

Test 2: throughput

Count: 8

Code:

  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  str q0, [x6, w7, sxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)1e1f223f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802054004230000314002700425801001008000010080006500183945504002640042400422996172999280107200800162002400484004040040118020110099100100800008000010080000080002028000223411151182163340048800001004004340041400434004340051
802044004030003304003416160258010010080000100800075001839378040015400424004229961730001801062008001620024004840040400401180201100991001008000080000100800003480002028000203411151185164440039800001004004340043400434004340051
80204400422990001400251600258010010080000100800065001839455040015400404004029961729992801062008001620024004840042400421180201100991001008000080000100800003480002058000003411151184163340039800001004005240043400414004340052
802044004230000904002716160258010010080000100800075001839455140015400404004229961730003801062008001620024004840042400421180201100991001008000080000100800003480002028000003411151184164440048800001004004340043400414004140041
8020440040300003140027160025801001008000010080007500183945514001540050400422996172999480107200800162002400484005040042118020110099100100800008000010080000080002028000023411151184164340047800001004004140043400414004340041
802044004229900014003416160258010010080000100800065001839455140017400424004929961729992801062008001620024004840043400421180201100991001008000080000100800003480002028000023411151183163440046800001004005040041400514004140043
8020440042299000040035160025801001008000010080007500183945504002640042400402996172999480107200800162002400484004240040118020110099100100800008000010080000080002028000023411151183163440037800001004004340041400434005140043
802044004929900914002516160258010010080000100800075001839455040017400404004029961232999280107200800162002400484005040042118020110099100100800008000010080000348000208800022011151182163440039800001004004140041400414004140043
802044004030000904002701602580100100800001008000750018394550400154004240042299617299948010620080016200240048400514005011802011009910010080000800001008000008000202800022011151184164440037800001004004340043400434004340041
802044004230000014002716160258010010080000100800065001839378040017400514004229969729992801072008001620024004840040400421180201100991001008000080000100800003480002028000203411151184164440048800001004004340043400434004140043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025400543001000170014004016165258001010800001080000501840004004002840054400632998933004380010208000020240000400544005411800211091010800008000010800141644018001601178000216441415020316114006180000104004840053400554005240055
8002440047300110017001400421616225800101080000108000050184000430400294005240054299893300278001020800002024000040053400471180021109101080000800001080015164400800160216800021601415020116114004980000104005540053400554005340055
800244005230011101410140032150225800101080000108000050184000400400284005440063299893300318001020800002024000040054400631180021109101080000800001080000042008000200080002242005020116114003980000104005540053400554004840055
80024400523001000190014004816012580010108000010800005018400040040022400524005429989330034800102080000202400004005340047118002110910108000080000108001514001800160119800021601425020116114006080000104005540052400554004840055
8002440047300111991710140042000258001010800001080000501839934004002940064400522998733003480010208000020240000400484005211800211091010800008000010800000420080002600580002242005020116114003980000104004340041400434004340044
800244004230000003000400331616025800101080000108000050183944800400154004040040299773300228001020800002024000040043400421180021109101080000800001080000042008000200280000242005020116114003980000104004340044400444004340041
8002440042300000030004002816160258001010800001080000501839448004001740042400422997733002280010208000020240000400424004211800211091010800008000010800151642018007601280002242005020116114003980000104005540064400554005340053
8002440054300110020001400271616025800101080000108000050183947200400184004340042299783300228001020800002024000040042400421180021109101080000800001080000042008000200280002242005020116114003980000104004340043400414004340043
8002440042300000030004004000025800101080000108000050183990800400294005240047299873300348001020800002024000040047400531180021109101080000800001080016164410800160019800001401415020116114005180000104004140044400414004340044
800244004229900003100400501616225800101080000108000050184050800400294005440063299993300328001020800002024000040047400481180021109101080000800001080000042008000200280002242005020116114003980000104006540055400644005540055