Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STP (signed offset, 32-bit)

Test 1: uops

Code:

  stp w0, w1, [x6, #8]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f223f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
100555440000315271616025100010001000224485425423553401100010003000552542111001100010001000420100202100224273216225391000543543543543543
100454240000315271616025100010001000224485425423553410100010003000542542111001100010001000420100202100224273216225391000544543543543564
100455440000315281616025100010001000224725435433563400100010003000542542111001100010001000420100202100224273216225391000543543543543543
100454240000305271616125100010001000224725425433563400100010003000542542111001100010001000420100202100224273216225401000555544544554543
100454241010315271616125100010001000224485425423553400100010003000542542111001100010001000420100202100224273216225391000544544544543543
100454240000315271616025100010001000224485435423553400100010003000542542111001100010001000420100202100224273216225391000544544544546543
100454240000315271616125100010001000224485425423673400100010003000542543111001100010001000420100202100224273216225391000543543543543543
100454240000315281616025100010001000224725425433563400100010003000554542111001100010001000420100202100224273216225391000543543543543543
100454230000305271616025100010001000224485425433563400100010003000554542111001100010001000420100202100224273216225391000543543555543543
100454230000315281616125100010001000224725435433553400100010003000542543111001100010001000420100202100224273216225391000543543543543543

Test 2: throughput

Count: 8

Code:

  stp w0, w1, [x6, #8]
  stp w0, w1, [x6, #8]
  stp w0, w1, [x6, #8]
  stp w0, w1, [x6, #8]
  stp w0, w1, [x6, #8]
  stp w0, w1, [x6, #8]
  stp w0, w1, [x6, #8]
  stp w0, w1, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205400543001100006171014003216161258010010080000100800005001839908049369830400544005429967330012801002008000020024000040063320071180201100991008000010080000100800161544018001601208000216441410511021622400510800001004005240048400534005640053
8020440047300010000018001400321616125801001008000010080000500184002804936972040064400522996133001280100200800002002400004005432001118020110099100800001008000010080014144401800140214800021601410511021623400490800001004004940048400534005540055
80204400543000101000191004002701602580100100800001008000050018393520493696304004040043299553299988010020080000200240000400543201611802011009910080000100800001008000004200800020058000020000511021622400390800001004004340044400444004340044
80204400423000000000000040027161602580100100800001008000050018394480493696004004240043299553300018010020080000200240000400543200011802011009910080000100800001008000004200800020028000000000511021622400390800001004004440043400434004140043
80204400433000000000300040027161602580100100800001008000053518394960493696004004340043299556300008010020080000200240000400543200611802011009910080000100800001008000000008000210080002042000511021622400390800001004004140043400554004140043
80204400403000000000300040028016025801001008000010080000500183944804936963040042400422995333000180100200800002002400004005432010118020110099100800001008000010080000042008000200280002042000511021622400390800001004004340043400444004140044
802044004230000000003100400251616125801001008000010080000500184000004936962040042400422995533000080100200800002002400004005432000118020110099100800001008000010080000042008000000280002242000511021622400390800001004004340043400434004340043
802044005430000000003010400251616025801001008000010080000500183944804936963040043400432995533000180100200800002002400004005432007118020110099100800001008000010080000042008000200080002242000511021622400390800001004004140041400434004140043
802044004030000000003000400281616125801001008000010080000500183944804936960040043400432995533000180100200800002002400004005232007118020110099100800001008000010080000042008000200280002242000511021622400370800001004004340043400434004340041
80204400423000000000300040027016025801001008000010080000500183944804936960040040400402995332999880100200800002002400004005440132118020110099100800001008000010080000042008000200280000242000511021622400510800001004004340043400434004140041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e1f223f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025400423000304002716160258001010800001080000501839352049369700400424005129975330030800102080000202400004004240042118002110910800001080000108000034008000200800022005020516424004880000104004140041400414004140043
80024400503004293140025161602580010108000010800005018398081493696904004040040299843300228001020800002024000040042400501180021109108000010800001080000340080002008000023605051216244003980000104004340041400434005140043
800244004030006040035000258001010800001080000501839424049369690400424005029975330022800102080000202404204004240050118002110910800001080000108000000080002028000203405020216424003780000104005340041400414004340041
80024400423000314003516162258001010800001080000501839760049369620400424004229977330020800102080000202400004004040040118002110910800001080000108000003008000000800020005020416244004880000104004340043400434004340043
8002440049300000400271600258001010800001080000501839424049369620400504004229984330020800102080000202400004005040042118002110910800001080000108000000080002008000223415020216244003980000104004140043400434004340051
80024400403000314002516002580010108000010800005018398080493696204004240042299753300228001020800002024000040042400421180021109108000010800001080000340080002008000203405020416424003780000104004140043400524004340041
8002440042300000400360160258001010800001080000501839808049369620400424004229977330020800102080000202400004004240042118002110910800001080000108000034008000208800020005020216244003980000104005140041400514004340043
80024400423000314002501602580010108000010800005018394240493696004004240042299773300228001020800002024000040042400421180021109108000010800001080000340080002088000223405020416244004780000104004340041400504004340043
80024400403000314002516164258001010800001080000501839424049369620400424005129975330022800102080000202400004004240051118002110910800001080000108000000080000028000223405020216424003780000104005240043400434004340052
8002440042300000400251600258001010800001080000501839352049369620400404004029975330031800102080000202400004004240042118002110910800001080000108000034008000205800022005020416444003780000104004140051400434005040043