Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STR (Q)

Test 1: uops

Code:

  str q0, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b1e1f22243a3f46494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
1005542400006181015391616725100010001000230285275645543653409100010002000555553111001100010001016154401101600161002164414073116115501000555555555553553
1004552411100190015391616625100010001000229335285525563643412100010002000551554111001100010001014154400101601181002164414173116115511000555555555555555
1004554411010170015491616225100010001000230285285525543653412100010002000552554111001100010001015154401101600181002164414073116115601000553552564555555
1004554411110180015391615025100010001000230045275635543653412100010002000552554111001100010001014144400101601181002164414173116115511000552564555555555
1004554410000200015391616425100010001000230045295545643673412100010002000554552111001100010001014154400101601161002164414073116115511000555555555555555
1004552410000180015391615025100010001000230045295545523673422100010002000554552111001100010001015154200101601161002164414073116115511000553564555555555
1004554411100191015361616125100010001000230045385545513673412100010002000554552111001100010001014154401101600171002164414273116115511000555555555555554
1004552410110180015391615025100010001000230045285525543643412100010002000551554111001100010001015154400101600161002164414073116115491000555555554553553
1004563410010190015391616025100010001000230285395545523673410100010002000554554111001100010001015144401101601181002164414173116115511000553564556555555
1004554410100171015391616225100010001000229085295515543663412100010002000553554111001100010001014144401101601161002164414173116115491000555554552553564

Test 2: throughput

Count: 8

Code:

  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03080a1e1f22233f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a2a7a8acafbcdcache store miss (c0)c5cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020540054300000300400271616125801001008000010080000500183947240017400434004329955330000801002008000020016000040040400401180201100991001008000080000100800004280002028000220051103163340037800001004004340043400434004340043
80204400423000000104002816002580100100800001008000050018394484001540042400422995632999880100200800002001600004004240040118020110099100100800008000010080000080002008000220051102163340039800001004004140055400434004140043
80204400423000003104002716012580100100800001008000050018393524001740040400402995332999880100200800002001600004004240042118020110099100100800008000010080000080002008000220051102163340051800001004005140043400434004140043
8020440040300000300400270160258010010080000100800005001839448400174004240043299553300018010020080000200160000400424004311802011009910010080000800001008000042800000080002242051103163340040800001004004140044400414004340041
802044004230000030040025160125801001008000010080000500183944840017400424004229953329998801002008000020016000040042400421180201100991001008000080000100800000800020880000242051103162340039800001004004340043400434004340041
802044004230000030040027160025801001008000010080000500183944840017400424004229967330000801002008000020016000040042400421180201100991001008000080000100800004280000108000200051103163340037800001004004440041400434004440043
8020440043300006300400271600258010010080000100800005001839472400174004040040299563300018010020080000200160000400424004211802011009910010080000800001008000042800020280012242051103163240039800001004004140043400434004340043
802044004230000031040027001258010010080000100800005001839472400184004040043299553300018010020080000200160000400434004311802011009910010080000800001008000042800000280002242051103163340040800001004004340055400434004140043
80204400403001063004002716160258010010080000100800005001839448400154004240042299533300008010020080000200160000400424004211802011009910010080000800001008000042800020280000242051103163340040800001004004340044400434004340044
80204400423000003004002716012580100100800001008000050018394484001540040400402995633000180100200800002001600004004240042118020110099100100800008000010080000080002008000220051103162340040800001004004440044400434004340041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire (01)cycle (02)030508090b1e1f22233a3f46494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002540052301100001410140044160725800101080000108000050183983614002740061400512998633003280010208000020160000400524005811800211091010800008000010800151436028001401198000214361405020116114005680000104004840059400514006040048
800244005730010010200014003716167408001010800001080000501839860140027400614004729986330038800102080000201600004005840060118002110910108000080000108001415369408019611198006216361415020116114004480000104006040048400484006040059
8002440047300110101710140037161692580010108000010800005018399081400224006040052299823300288001020800002016000040048400471180021109101080000800001080014150018001601148000216361425020216114004480000104004840060400484005940052
80024400583001111019101400320002580010108000010800005018398601400364004740058299943300278001020800002016000040052400591180021109101080000800001080014140018001400208000216361415020116114004980000104004940053400624005940062
8002440058300111101900140044161612580010108000010800005018396921400224004740052299883300288001020800002016000040059400471180021109101080000800001080014143601800140021800001601415020116114005080000104004940053400624005940062
80024400583001101014101400320162258001010800001080000501839692140033400514004729987330027800102080000201600004005940047118002110910108000080000108001416000800160116800021401415020116114004880000104005940052400604005440060
8002440058300101102010140033161602580010108000010800005018399081400284005940047299853300398001020800002016000040059400591180021109101080000800001080015160018001601178000214361415020116114004580000104004840208400534004840053
800244005030011110191014003216162258001010800001080000501839692140033400524005929987330027800102080000201600004006040047118002110910108000080000108001414001800160120800001601405020116114005780000104006040061400594005240048
8002440047300111101510140032161622580010108000010800005018402681400334005140052299873300278001020800002016000040059400591180021109101080000800001080014140008001601148000016361415020116114004480000104005340049400534004840048
8002440047300100001400140044160525800101080000108000050184022114002240058400602998233003980010208000020160000400594004711800211091010800008000010800151536018001411188000216361425020116114004880000104004840048400484005440060