Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STP (64-bit)

Test 1: uops

Code:

  stp x0, x1, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f223a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1005547411110210154316169251000100010002317415475593643410100010003000547552111001100010001014143600101600181000163614173316445441000553553551548559
100455741110015115451616025100010001000232920559552371341010001000300054755211100110001000101415362110140117100214014173316445551000562560560560548
1004547411000140153200225100010001000226921547560360340510001000300054754711100110001000101415000101401181002163614173416445441000549559548553554
1004552410110170153716167251000100010002319705475513613408100010003000547547111001100010001016143602101400181002143614073416445491000548560553548552
100455041010014015321616525100010001000227181560547360340510001000300055155911100110001000101414000101600181002163614173416445441000548559548554554
10045524111102101532160025100010001000229321547549360341810001000300055855211100110001000101414360110140020100014014173416445561000559553554548552
100454741110019016271616325100010001000227880559551361340910001000300055854711100110001000101514001101421181002163614173416445551000552561559560548
10045475111001411543161652510001000100022692054755237334051000100030005605471110011000100010151500010140018100216014073416445491000548548548548561
100456041011014015321606251000100010002269205585533603410100010003000558547111001100010001015163601101400141002163514173416445501000560548548551548
100454741101018115361616525100010001000232680553557360340510001000300054754711100110001000101415000101601171000163614173416445551000560560548553552

Test 2: throughput

Count: 8

Code:

  stp x0, x1, [x6]
  stp x0, x1, [x6]
  stp x0, x1, [x6]
  stp x0, x1, [x6]
  stp x0, x1, [x6]
  stp x0, x1, [x6]
  stp x0, x1, [x6]
  stp x0, x1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03181e1f2223243f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020540042299424310040025161612580100100800001008000650018394741493696004004240042299627299948010620080016200240048400403199311802011009910080000100800001008000042080002008000224211151180160140037800001004004140054400414004340044
8020440040300003000400271600258010010080000100800065001839936149369600400424004229961729992801062008001620024004840042319951180201100991008000010080000100800004208000202800020011151180160140037800001004004340043400444004440044
80204400433000031004002701602580100100800001008000650018393781493696204005440040299617299948010620080016200240048400423199311802011009910080000100800001008000042080002008000224211151180160140039800001004004140043400414004140043
802044004029900310040025161602580100100800001008000650018394741493696204004040042299597299948010620080016200240048400423199311802011009910080000100800001008000042080000028000224211151180160140041800001004004140043400444004440041
802044004029900000040027160025801001008000010080006500183947414936962040042400422996172999580106200800162002400484004231996118020110099100800001008000010080000008000202800022011151180160140040800001004004140043400434004340055
8020440040300003000400271601258010010080000100800065001839474149369620400424004229967330000801002008000020024000040042319951180201100991008000010080000100800000080002008000224200051101161240037800001004004340044400444004340044
80204400422990030004002716002580100100800001008000050018394481493696204004240042299533299988010020080000200240000400403200711802011009910080000100800001008000042080000028000024200051101161240040800001004004140055400414004340043
802044004030000300040027161602580100100800001008000050018394481493696204004240042299563300008023020080000200240000400433199611802011009910080000100800001008000042080002068000004200051101161240040800001004004340044400444004340041
80204400423000031004002701602580100100800001008000050018393521493696204004340042299553300008010020080000200240000400433199511802011009910080000100800001008000042080002028000224200051101161240037800001004004340041400434004340043
8020440042300012300040027161612580100100800001008000050018394481493696204004040040299553299988010020080000200240000400423199511802011009910080000100800001008000042080002028000224200051101161240037800001004004140043400434004340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800254004229900000918100400270002580010108000010800005018394484936960400424004329977330020800102080000202400004004340043118002110910800001080000108001515440080016001680002144414205020014163240050080000104005240048400524005540064
800244005430011100020000400280160258001010800001080000501839448493696040043400432997733003480010208000020240000400534005411800211091080000108000010800141544018001600168000214441400502002162240039080000104004340043400414004140041
80024400402990000003000400271616025800101080000108000050183944849369634004340043299773300228001020800002024000040040400401180021109108000010800001080000042008000000080000242000502003162240039080000104004340043400414004340043
800244004230000000031004002501602580010108000010800005018400004936962400424004229977330024800102080000202400004004240040118002110910800001080000108000004200800020028000220000502003163240037080000104004140041400444004340044
80024400423000000002210040027016025800101080000108000050183944849369604004240040299773300208001020800002024000040043400421180021109108000010800001080000042008000200080002042000502002162640037080000104004440044400434004340044
8002440040300000000191004003916161258001010800001080000551839448493696040042400402997733002080442208000020240000400404004211800211091080000108000010800000000800000028000020000502002166240037080000104004440043400434004440043
8002440043300000000300040025160025800101080000108000050183935249369624004040042299783300238001020800002024000040040400421180021109108000010800001080000042008000000580000242001502002162240050080000104005340048400534004840064
800244005430011000018101400391602258001010800001080000501840508493697440047400542998233002780010208000020240000400544005211800211091080000108000010800151443018001600148000216441400502006162640039080000104004140041400434004340044
80024400423000000012170014003915161258001010800001080000501839956493698340054400542998933004380010208000020240000400544005511800211091080000108000010800151544028001611198000214441400502002162240039080000104004440043400414004140043
8002440043300000000310040027161602580010108000010800005018393524936974400424004329977330034800102080000202400004004040040118002110910800001080000108000004200800021008000020000502002162240039080000104004340041400414004340041