Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STR (unsigned offset, 64-bit)

Test 1: uops

Code:

  str x0, [x6, #8]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f223a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005559411010201153516160251000100010002319715475583603405100010002000551547111001100010001014143600101411181002163614273116115491000559548548548548
1004547410100211153616160251000100010002269215525583603416100010002000552558111001100010001014143401101601181002163614073116115571000553551548558548
100454741111017115330160251000100010002269215585503723417100010002000558547111001100010001014143601101610141000143614073116115471000560553548550561
1004558411010141154516165251000100010002324415475473603416100010002000559547111001100010001015143600101601201002143614173116115561000548548560548560
100455941000021115321602251000100010002271815475583643419100010002000558553111001100010001014163600101600181000163614073116115551000553548562558560
100454741110020115320162251000100010002329215525473653409100010002000560559111001100010001015163600101600141000143614173116115441000548551548549559
10045584111101401532005251000100010002269215475523723416100010002000547549111001100010001015143600101401161002163514073116115551000558559559553548
100455341010014015461616225100010001000232681547547371341010001000200054754711100110001000101514001101600171000163614273116115551000553551562558560
10045474111101411544005251000100010002271815475593643417100010002000559552111001100010001014143601101600141002163614273116115541000550548559559559
10045474101102011532016225100010001000232921551547366341110001000200055155811100110001000101515000101600181002143614073116115441000553552550559560

Test 2: throughput

Count: 8

Code:

  str x0, [x6, #8]
  str x0, [x6, #8]
  str x0, [x6, #8]
  str x0, [x6, #8]
  str x0, [x6, #8]
  str x0, [x6, #8]
  str x0, [x6, #8]
  str x0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e1f22233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020540049300091040027016225801001008000010080000500183942404936962400514004229963330000801002008000020016000040058320041180201100991008000010080000100800003480000008000223451102162240039800001004004140050400434004140043
80204400503000310400251616025801001008000010080121500183980804936962400404004229955330000801002008000020016000040042319951180201100991008000010080000100800003480002088000223451102162240039800001004004140041400434005040043
8020440042300091040027161602580100100800001008000050018394241493397040050400422995532999880100200800002001600004004231995118020110099100800001008000010080000348000202800022051102162240046800001004005040043400514004340043
802044004230003004002716002580100100800001008000050018394241493696240042400422995533000080100200800002001600004005131995118020110099100800001008000010080000348000208800022051102162240037800001004004340051400434012440043
80204400493000310400271616025801001008012010080216500183980814937107400424014629963330000801002008000020016000040202401991180201100991008000010080000100800003480002028000223451102162240037800001004005140043400434004340043
8020440042300031040027161602580100100800001008000050018397841493696240042400502996433000080100200800002001600004005032003118020110099100800001008000010080000348000202800020051102162240048800001004004340043400434004340043
8020440042299001040027161602580100100800001008000050018394240493696240049400402995533000080100200800002001600004005931995118020110099100800001008000010080000080002058000223451102162240037800001004005040043400414004340043
8020440042299030040027016025801001008000010080000500183942414936962400424004029955330000801002008000020016000040045319951180201100991008000010080000100800003480002028000223451102162240039800001004005140043400514004340041
80204400403000310400251616025801001008000010080000500183942414936962400504004229953330000801002008000020016000040042320031180201100991008000010080000100800003480002028000223451102162240048800001004004340043400524004340041
80204400402990310400271616025801001008000010080000500183942404936970400424004229955330007801002008000020016024240052319951180201100991008000010080000100800003480002008000223451102162240039800001004005140043400514004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)18191e1f223f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800254004230000066140035016025800101080000108000050183942414936960400404004229977330022800102080000201600004004240042118002110910800001080000108000034800021128000223405020023161784003980000104004140050400434005140051
800244004030000009140027016282580010108000010800005018398560493696240042400422997733002080010208000020160000400424004011800211091080000108000010800003480002118800022340502008168174004780000104012140043400524004340043
80024400423000000304002716160258001010800001080000501839424049369624004040042299773300228001020800002016000040049400421180021109108000010800001080000348000239280002234050200171617174004880000104004340043400434004340043
800244005030000000140027161602580010108000010800005018394240493696240040400492998533002280010208000020160000400504004211800211091080000108000010800003480000998000223405020017161764003980000104004340051400434004340043
800244005029900003140027161697258001010800001080000501839808049369624004240042299773300228001020800002016000040040400421180021109108000010800001080000348000211148000223405020017161764003980000104004140043400414004340043
8002440042300000030400251616025800101080000108000050183935204936960400424004229977330031800102080000201600004004240042118002110910800001080000108000034800027178000223405020081617174003980000104004340043400434004340043
80024400423000000314002716160258001010800001080000501839424049369624004240042299753300318001020800002016000040042400421180021109108000010800001080000348000295800020015020017161764003980000104004340041400434004140041
8002440042300000031400361616225800101080000108000050183942404936962400424004229977330022800102080000201600004004240040118002110910800001080000108000008000210238000223405020017161464003980000104004340051400434005140043
8002440042300000091400271616025800101080000108000050183942404936962400504004229975330022800102080000201600004005040042118002110910800001080000108000008000210178000223405020017161764003980000104004340041400434004340052
800244004230000003040027016025800101080000108000050183942404936962400424004229977330031800102080000201600004004240040118002110910800001080000108000034800025680002234050200111617174004780000104004340043400504004340043