Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (register, Q)

Test 1: uops

Code:

  str q0, [x6, x7]
  mov x0, 0
  mov x7, 8

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f223a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
10055504111002111537161452510001000100022956542558550372341610001000300055255711100110001000101415360110160181002163614273316225491000560553553552551
10045604101101701545161652510001000100022956533558550372341810001000300055955111100110001000101514360110160171002163614173216225481000553554553551561
100456141101020015431616125100010001000228365345525593653417100010003000553558111001100010001014163501101606141002163614273216225551000551561560559559
10045584101001801535161652510001000100022908533558551372340810001000300055955111100110001000101516360110160171002163614273216225501000559553554553552
10045504100101901544161652510001000100023340527551559365341610001000300055255711100110001000101614360010160171002163414173216225501000554553551562559
10045574111101801537161452510001000100023220524558553374341010001000300056155911100110001000101614362010160181002163614173216225551000559559559559554
10045524101002001546161622510001000100022932533552561372341610001000300055954911100110001000101416360110160201002163614073216225491000559559553553553
10045514111101901543161662510001000100023221527550558365341710001000300055255911100110001000101414360010161181000163614073216225471000560553552550561
10045584101102101546161602510001000100023220525557553374341010001000300056155911100110001000101514364210160191002163614073216225551000560559559553553
10045534111001701544161672510001000100023246528550559364341610001000300055155811100110001000101415360010160171002163414273216225501000559559553554553

Test 2: throughput

Count: 8

Code:

  str q0, [x6, x7]
  str q0, [x6, x7]
  str q0, [x6, x7]
  str q0, [x6, x7]
  str q0, [x6, x7]
  str q0, [x6, x7]
  str q0, [x6, x7]
  str q0, [x6, x7]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1f223a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802054005230011119114003716010258010010080000100800075001839939040033400524005929971730011801062008014420024004840052400521180201100991001008000080000100800151436408001626016800021436142111511801600400490800001004004840059400504006040060
802044004730011120114003201611258010010080000100800065001840246140033400584005029979730004801062008001620024004840060400511180201100991001008000080000100800141636108001634020800021636141111511801601400470800001004005940048400484005940059
80204400523001112001400381616125801001008000010080006500183971814002240052400592997772999980107200800162002400484005940059118020110099100100800008000010080014140108001419092801201636140111511801600400490800001004004840060400534004840062
80204400473001011401400431616925801001008000010080006500183971814002540059400592997773001380106200800162002400484005940059118020110099100100800008000010080014153600800162014800021636142111511801600400540800001004005940052400604004840054
80204400473001001401400321616425801001008000010080006500184020014003440051400472996673001080106200800162002400484004740052118020110099100100800008000010080015140008001664014800021636141111511801600400440800001004006040048400594005240053
80204400582991101411400321616725801001008000010080006500183971814003440060400522997073001080107200800162002400484005040047118020110099100100800008000010080014153600800141011480002160141111511801600400440800001004005840048400594004840061
8020440047300100210140037140625801001008000010080006500183971814002740059400592996772999980106200800162002400484005940061118020110099100100800008000010080014153500800169125800021636141111511801600400490800001004005340059400484005840062
802044005830010019014003516160258010010080000100800065001839939140033400524004729966730011801062008001620024004840052400591180201100991001008000080000100800151436008001401323380002160141111511801600400550800001004004840060400484004840048
8020440051300100210140032016025801001008000010080006500183971814002240052400582996673001080107200800162002400484004740058118020110099100100800008000010080014153600800142401780000160140111511801600400550800001004005440049400484006240048
80204400473001001901400431616625801001008000010080007500183971814003440058400502996673000580106200800162002400484006140051118020110099100100800008000010080015153600800141114800021636140111511801600400570800001004005940053400604005340060

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e1f2223243f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800254004530003100400251600258001010800001080000501839808140017040040400502997533002280010208000020240000400424004011800211091010800008000010800000000800020020380002034050201516564003980000104004340041400434005040041
8002440053300060004002516002580010108000010800005018394240400170400404004229977330020800102080000202400004004040050118002110910108000080000108000003400800000024380002200502011165114003780000104005240041400414004340043
80024400422990010040025000258001010800001080000501839856040025040042400402997733002280010208000020240000400424004011800211091010800008000010800000340080002002248000223405020516454003980000104004340043400434005140043
800244005230003000400271600258001010800001080000501839808040015040042400402997533002280010208000020240000400424004211800211091010800008000010800000000800000017080002200502011165124003780000104005240043400434004140051
80024400433000300040027161602580010108000010800005018398081400170400514004229977330020800102080000202400004004240040118002110910108000080000108000003400800020024580002034050201116544003980000104004340043400434005140041
80024400402990010040025161602580010108000010800005018394240400150400514004029977330022800102080000202400004004240040118002110910108000080000108000003400800020015880000234050204161054003980000104004340041400434004340041
80024400422990300040027161602580010108000010800005018393521400170400404005129975330020800102080000202400004004240042118002110910108000080000108000003400800020024280002200502012161044003980000104004140041400434004340043
8002440042300030004002716160258001010800001080000501839808040015040042400512998633002280010208000020240000400424004211800211091010800008000010800000340080002002638000223405020716464003980000104005040043400434004340043
800244004530000000400271616025800101080000108000050183942404001704004240042299753300308001020800002024000040050400421180021109101080000800001080000034008000000207800021234050204161064003980000104004140043400434004340043
80024400403000910040025160225800101080000108000050183942414001504004040042299773300228001020800002024000040040400421180021109101080000800001080000000080000002348000223405020516584004780000104004140041400414004140043