Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STR (signed offset, Q)

Test 1: uops

Code:

  str q0, [x6, #0x10]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)031e1f223f46494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a2a4a6a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
10055424031528161652510001000100022472151854354335534001000100020005425431110011000100010004200100202100224273116115391000544544543543543
10045424030528161612510001000100022472151854254235534011000100020005425431110011000100010004200100202100224273116115391000543543543555543
10045424030527161612510001000100022448151754254235534001000100020005425421110011000100010004200100202100224273116115391000543543543543543
10045424030527161602510001000100022472151754254235534001000100020005425421110011000100010004200100202100224273116115391000543543543543543
10045424030527161652510001000100022448151754354335534001000100020005425421110011000100010004200100202100224273116115391000543543555543543
10045424030527161602510001000100022448151754254235634001000100020005435431110011000100010004200100202100224273116115511000543555543543543
10045424030527161612510001000100022448151754254235534001000100020005425421110011000100010004200100202100224273116115391000543543543544544
10045424030527161612510001000100022448151754254235534011000100020005425421110011000100010004200100202100224273116115391000543543543543543
10045424030527161602510001000100022448151754254235534001000100020005425431110011000100010004200100202100224273116115391000543543543543543
10045544030527161602510001000100022448151754254235534001000100020005435431110011000100010004200100202100224273116115391000543543543543543

Test 2: throughput

Count: 8

Code:

  str q0, [x6, #0x10]
  str q0, [x6, #0x10]
  str q0, [x6, #0x10]
  str q0, [x6, #0x10]
  str q0, [x6, #0x10]
  str q0, [x6, #0x10]
  str q0, [x6, #0x10]
  str q0, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03080b18191e1f22233f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a6a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)dfe0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205400503000000091040036160025801001008000010080000500183942404001540042400402995533000080100200800002001600004004240042118020110099100100800008000010080000340800020280002234511211612240037800001004004340043400434004340052
802044004229900000310400351616025801001008000010080000500185060204002640042400423032433000080100200800002001600004004240464118020110099100100800008000010080000340800020280002234511211611240037800001004004340043400434005040043
802044004230000000600400271616025801001008000010080000500183978404001740050400422996233000080100200800002001600004004040049118020110099100100800008000010080000340800000280002234511211611240039800001004004340604400434005140043
802044004030000000610400271616325801001008000010080000500183942404001740040400422995333000880100200800002001600004032140182118020110099100100800008000010080000340800620280002234511211611240048800001004004340043400524004340043
80204400423000000031040036016025801001008000010080000500183935204001740042400512996433000080100200800002001600004004040042118020110099100100800008000010080000340800020280002234511211611240039800001004004340043400434005040043
80204400423000000091040027161602580100100800001008000050018394240400154004240040299553300098010020080000200160000400424004211802011009910010080000800001008000000800020280002234511211611240039800001004004340051400434004340598
802044005130000000300400341616025801001008000010080000500183942424001740042400512995532999880100200800002001600004004040042118020110099100100800008000010080000340800000280002034511211611240039800001004004140041400414004140041
802044004030000000300400251616025801001008000010080000500183983204001740049400492995333000080100200800002001600004004040042118020110099100100800008000010080000340800020280002234511211611240039800001004005140041400514004140041
8020440042300000003004002701602580100100800001008000050018393520400174004040050299633300008010020080000200160000400494004211802011009910010080000800001008006234080002108000200511211613240039800001004004340041400434004340043
80204400403000000031040025161602580100100800001008000050018398560400174004240050299643299988010020080000200160000400514004211802011009910010080000800001008000000800023280002234511211611240039800001004004340041400434005040043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire (01)cycle (02)030508090b18191e1f2223243a3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6d9ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
800254005829910110001910014004316168425800101080000108000050183969204003540052400582999433003880010208000020160000400524005211800211091010800008000010800141536018001600188000216361405020716010640050080000104006140060400514005940060
8002440052299111000017100140032160225800101080000108000050183981204003340058400502998533003280010208000020160000400604005111800211091010800008000010800151536018001612208000016361405020616081240045080000104005940048400604005240053
800244006030011100001910014004401682580010108000010800005018401990400274005940059299933300318001020800002016000040059400611180021109101080000800001080015143601800141019800001401405020616061040044080000104005240060400534005940051
80024400473001001000200001400321516125800101080000108000050184031604002740057400472998233003380010208000020160000400474004711800211091010800008000010800161534008001602188000216361415020616071040055080000104004840060400534005940061
80024400582991010000171001400350160258001010800001080000501840340040027400574004729996330032800102080000201600004004740047118002110910108000080000108001514360180016011480002163614050201016010640055080000104006040053400484005340059
800244005030011010001410014003516162992580010108000010800005018403160400274005740060299823300278001020800002016000040058400471180021109101080000800001080014160208001600188000216014150201016010640054080000104005440059400524006040061
8002440059300111000018100140045016625800101080000108021650184022104015640205400612999333003280010208000020160000400574006011800211091010800008000010800151436018001402188000214361415020616061040055080000104004840048400534004840061
8002440047300111100019000140032016525800101080000108000050183969204003340053400582998733003880010208000020160000400474005811800211091010800008000010800151536018001600208000016361405020616010640044080000104005240059400484006040048
80024400593001001000141001400430162258001010800001080000501840340040022400574006029996330027800102080000201600004005940049118002110910108000080000108001515360180014001480002163614150201016010640049080000104004840059400484004840059
80024400593001010000190001400321616925800101080000108000050183988404003340060400522998633003980010208000020160000400504004711800211091010800008000010800151536028001610208000216361405020716061040044080000104005340048400604005140053