Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (Q)

Test 1: uops

Code:

  str q0, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f22243a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005542400006181015391616725100010001000230285275645543653409100010002000555553111001100010001016154401101600161002164414073116115501000555555555553553
1004552411100190015391616625100010001000229335285525563643412100010002000551554111001100010001014154400101601181002164414173116115511000555555555555555
1004554411010170015491616225100010001000230285285525543653412100010002000552554111001100010001015154401101600181002164414073116115601000553552564555555
1004554411110180015391615025100010001000230045275635543653412100010002000552554111001100010001014144400101601181002164414173116115511000552564555555555
1004554410000200015391616425100010001000230045295545643673412100010002000554552111001100010001014154400101601161002164414073116115511000555555555555555
1004552410000180015391615025100010001000230045295545523673422100010002000554552111001100010001015154200101601161002164414073116115511000553564555555555
1004554411100191015361616125100010001000230045385545513673412100010002000554552111001100010001014154401101600171002164414273116115511000555555555555554
1004552410110180015391615025100010001000230045285525543643412100010002000551554111001100010001015154400101600161002164414073116115491000555555554553553
1004563410010190015391616025100010001000230285395545523673410100010002000554554111001100010001015144401101601181002164414173116115511000553564556555555
1004554410100171015391616225100010001000229085295515543663412100010002000553554111001100010001014144401101601161002164414173116115491000555554552553564

Test 2: throughput

Count: 8

Code:

  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  str q0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss instruction (0a)1e1f22233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020540054300000300400271616125801001008000010080000500183947240017400434004329955330000801002008000020016000040040400401180201100991001008000080000100800004280002028000220051103163340037800001004004340043400434004340043
80204400423000000104002816002580100100800001008000050018394484001540042400422995632999880100200800002001600004004240040118020110099100100800008000010080000080002008000220051102163340039800001004004140055400434004140043
80204400423000003104002716012580100100800001008000050018393524001740040400402995332999880100200800002001600004004240042118020110099100100800008000010080000080002008000220051102163340051800001004005140043400434004140043
8020440040300000300400270160258010010080000100800005001839448400174004240043299553300018010020080000200160000400424004311802011009910010080000800001008000042800000080002242051103163340040800001004004140044400414004340041
802044004230000030040025160125801001008000010080000500183944840017400424004229953329998801002008000020016000040042400421180201100991001008000080000100800000800020880000242051103162340039800001004004340043400434004340041
802044004230000030040027160025801001008000010080000500183944840017400424004229967330000801002008000020016000040042400421180201100991001008000080000100800004280000108000200051103163340037800001004004440041400434004440043
8020440043300006300400271600258010010080000100800005001839472400174004040040299563300018010020080000200160000400424004211802011009910010080000800001008000042800020280012242051103163240039800001004004140043400434004340043
802044004230000031040027001258010010080000100800005001839472400184004040043299553300018010020080000200160000400434004311802011009910010080000800001008000042800000280002242051103163340040800001004004340055400434004140043
80204400403001063004002716160258010010080000100800005001839448400154004240042299533300008010020080000200160000400424004211802011009910010080000800001008000042800020280000242051103163340040800001004004340044400434004340044
80204400423000003004002716012580100100800001008000050018394484001540040400402995633000180100200800002001600004004240042118020110099100100800008000010080000080002008000220051103162340040800001004004440044400434004340041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002540052301100001410140044160725800101080000108000050183983614002740061400512998633003280010208000020160000400524005811800211091010800008000010800151436028001401198000214361405020116114005680000104004840059400514006040048
800244005730010010200014003716167408001010800001080000501839860140027400614004729986330038800102080000201600004005840060118002110910108000080000108001415369408019611198006216361415020116114004480000104006040048400484006040059
8002440047300110101710140037161692580010108000010800005018399081400224006040052299823300288001020800002016000040048400471180021109101080000800001080014150018001601148000216361425020216114004480000104004840060400484005940052
80024400583001111019101400320002580010108000010800005018398601400364004740058299943300278001020800002016000040052400591180021109101080000800001080014140018001400208000216361415020116114004980000104004940053400624005940062
8002440058300111101900140044161612580010108000010800005018396921400224004740052299883300288001020800002016000040059400471180021109101080000800001080014143601800140021800001601415020116114005080000104004940053400624005940062
80024400583001101014101400320162258001010800001080000501839692140033400514004729987330027800102080000201600004005940047118002110910108000080000108001416000800160116800021401415020116114004880000104005940052400604005440060
8002440058300101102010140033161602580010108000010800005018399081400284005940047299853300398001020800002016000040059400591180021109101080000800001080015160018001601178000214361415020116114004580000104004840208400534004840053
800244005030011110191014003216162258001010800001080000501839692140033400524005929987330027800102080000201600004006040047118002110910108000080000108001414001800160120800001601405020116114005780000104006040061400594005240048
8002440047300111101510140032161622580010108000010800005018402681400334005140052299873300278001020800002016000040059400591180021109101080000800001080014140008001601148000016361415020116114004480000104005340049400534004840048
8002440047300100001400140044160525800101080000108000050184022114002240058400602998233003980010208000020160000400594004711800211091010800008000010800151536018001411188000216361425020116114004880000104004840048400484005440060