Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (register, lsl, S)

Test 1: uops

Code:

  str s0, [x6, x7, lsl #2]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03191e1f22233f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005542400010534161602510001000100022856151754254035334001000100030005515421110011000100010023410020210022073116115391000550543543543543
10045424006005271616025100010001000224240526542542355340810001000300054255011100110001000100034100205100223473116115371000550551543543543
1004542400300527160025100010001000223521515542542355340010001000300054255111100110001000100034100208100223473116115391000543551541543543
1004542400300527161602510001000100022424151754254035534001000100030005425421110011000100010000100202100223473116115391000543543543551551
10045514000005271616025100010001000228561517542550353340810001000300054254911100110001000100034100208100003473116115461000543543543551551
1004549400310527016025100010001000223520517551542353340010001000300054054211100110001000100034100002100223473116115481000543543552552543
10045424003005341616025100010001000224241524540542363340010001000300054254911100110001000100034100202100223473116115391000550551543543543
1004542400300527161622510001000100022424051754254035534001000100030005425421110011000100010003410020210022073116115391000543543543550551
10045504003005351616025100010001000228080517542542355340010001000300054954211100110001000100034100202100223473116115461000543550551543543
10045423003005271616225100010001000224241524542542363340010001000300054254011100110001000100034100200100023473116115391000543543543543543

Test 2: throughput

Count: 8

Code:

  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  str s0, [x6, x7, lsl #2]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)181e1f22243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205400523001110015103400370160258010010080000100800075001839719140015400404004229959729992801072008001620024004840042400401180201100991001008000080000100800000000800020028000223400111512291688400390800001004004340041400414004140043
802044004230000000310240025161602580100100800001008000750018394551400154004240042299617299928010620080016200240048400424004011802011009910010080000800001008000003400800020058000203400111512281688400470800001004004340051400434004340043
80204400423000000012102400270002580100100800001008000750018394551400174004240040299697299948010720080016200240048400424004211802011009910010080000800001008000003400800020028000223400111512281688400370800001004004340052400414005140043
80204400403000000030024002716160258010010080060100800075001839810040025400424004229961729992801072008001620024004840042400421180201100991001008000080000100800000000800020088000223400111512281688400390800001004004140043400414004340051
802044004230000000910240025161602580100100800001008000750018393780400264004240042299597299928010720080016200240048400404004011802011009910010080000800001008000003400800000088000223400111512261688400390800001004004340050400434005140041
80204400423000000030024002716160258010010080000100800075001839839140017400404004229959729994801072008001620024004840040400421180201100991001008000080000100800000000800020008000223400111512281688400390800001004004140041400434004340041
802044004030000000310240025000258010010080000100800065001839378040017400424004229961730002801072008001620024004840040400511180201100991001008000080000100800000000800000028000203400111512281698400390800001004004340043400414004140043
80204400423000000000024002716002580100100800001008000750018398870400154004240042299617299948022120080016200240048400424004011802011009910010080000800001008000003400800020008000023400111512281688400390800001004004340043400414004340052
80204400423000000030024003501602580100100800001008000650018393780400264004240040299617299948010620080016200240048400514004211802011009910010080000800001008000003400800000028001223400111512281688400390800001004004140043400414004340041
802044004029900000900240027161602580100100800001008000750018393780400174004240048299597300028010720080016200240048400404004011802011009910010080000800001008000003400800020008000203400111512281688400390800001004004340043400434004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss data (0b)181e1f223f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025400423000000304002716160258001010800001080000501839784040015400404004029984330020800102080000202400004004240042118002110910108000080000108000003480002028000023450322162240039080000104004140041400514004340043
8002440040300000031400251616025800101080000108000050183935204001740040400422997733002280010208000020240000400424004211800211091010800008000010800000080002028000223450302162240046080000104005040041400514004140041
800244004030000000040027016025800101080000108000050183942404002440042400422997733003180010208000020240000400404004011800211091010800008000010800000080002008000003450322162240047080000104004140043400414004340043
800244004030000003040027016025800101080000108000050183942404001740040400422997733002280010208000020240000400504004211800211091010800008000010800000080000008000023450302162240039080000104005140041400514004340043
8002440042299000901400351616025800101080000108000050183935204001540040400402997533002080010208000020240000400424004011800211091010800008000010800000348000002800020050282162240048080000104004340043400414004340041
800244004030000000140025160025800101080000108000050183935204001540042400402997733002080010208000020240000400494004211800211091010800008000010800000348000208800002050302162240037080000104004340041400414004140041
8002440042300000090400270002580010108000010800005018394240400154005040042299753300228001020800002024000040049400421180021109101080000800001080000008000202800000050303162240037080000104004340050400414004340041
800244004030000003040027160025800101080000108000050183942404002540042400422997533003180010208000020240000400404004011800211091010800008000010800000080000008000223450322162240039080000104004140043400504004340050
800244004230000003040027160025800101080000108000050183942404001540042400422997533002080010208000020240000400424004211800211091010800008000010800000348000202800022050302162240039080000104004340041400434004340043
8002440049300000031400271616025800101080000108000050183935204001540040400422997533003080010208000020240000400424004211800211091010800008000010800000348000202800020050302162240039080000104004340041400434004140043