Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STP (32-bit)

Test 1: uops

Code:

  stp w0, w1, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e1f22233a3f46494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int store (96)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
10055634111101810153916161251000100010002350805545523673412100010003000554552111001100010001015164400101602181002164214273216115501000555555553553553
10045644101101710153916162251000100010002300405515543663412100010003000553554111001100010001014154401101600181002164414173116115511000555555555555555
10045534111102010153816162251000100010002302805545633673421100010003000554551111001100010001016144411101601171002164414173116115511000555555555555554
10045524110001710153916166251000100010002295605635543653412100010003000552554111001100010001014144402101602181002164414173116115511000555553553564564
10045544110101710153916166251000100010002343805545523673409100010003000554554111001100010001014144400101610161002164414273116115511000555555555554553
10045524100102110153916164251000100010002290805545533673410100010003000554554111001100010001014144400101600191002164414173116115491000555555555554553
10045514111102010153916161251000100010002302805545523673410100010003000554553111001100010001016144400101602161002164314173116115511000553553565555555
10045544111001910153716161251000100010002300405545633673412100010003000554552111001100010001016154401101602161002164414073116115511000555555555555555
10045534111101710153916161251000100010002300405525543643412100010003000551554111001100010001014154401101601181002164414273116115511000555555555553553
10045524111101710153715161251000100010002300415545533763410100010003000563554111001100010001015154401101600171002164214073116115491000552553564556555

Test 2: throughput

Count: 8

Code:

  stp w0, w1, [x6]
  stp w0, w1, [x6]
  stp w0, w1, [x6]
  stp w0, w1, [x6]
  stp w0, w1, [x6]
  stp w0, w1, [x6]
  stp w0, w1, [x6]
  stp w0, w1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f22233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)cdcfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802054004229900012900400250160258010010080000100800005001839424049369714004240042299553300008010020080000200240000400423199311802011009910080000100800001008000034080000008000223400511001161140037800001004005240043400524004340041
80204400403000000010400271600258010010080000102800005001839424049369604004240042299533300008010020080000200240000400423200411802011009910080000100800001008000034080002008000223400511001161140039800001004004140052400434005140043
802044004030000009004003500025801001008000010080000500183935214936962400494004229955330000801002008000020024000040042319951180201100991008000010080000100800003408000208800002000511001161140039800001004004340050400434004340043
80204400403000000010400271616025801001008000010080000500183942404936962400424004229953330007801002028012120024000040042319951180201100991008000010080000100800003408000202800000000511001161140037800001004004140043400414004140043
8020440040300000031040036016025801001008000010080000500183935214936970400424004229955330007801002008000020024000040042319931180201100991008000010080000100800000080000058000223400511001161140039800001004004340043400414004340041
8020440042300000000040027016025801001008000010080000500183942414936962400424004229953330007801002008000020024000040042319951180201100991008000010080000100800003408000212800022000511001161140037800001004004140043400414004340052
80204400423001000600400271616025801001008000010080000500183986004936962400424004029955330000801002008000020024000040042319931180201100991008000010080000100800000080002008000223400511011161140039800001004004340050400414005140043
80204400423000000900400271616025801001008000010080000500183935204936969400424004229953330008801002008000020024000040042319931180201100991008000010080000100800003408000200800002000511001161140039800001004005140043400414004140043
80204400423000000301400251600258010010080000100800005001839352049369604004240042299553300098010020080000200240000400423199511802011009910080000100800001008000034080002008000223400511001161140039800001004004340043400434005040041
802044004230000003004002501602580100100800001008000050018396920493696240051400422996333000080100200800002002400004005131995118020110099100800001008000010080000342880002008000203400511001161140037800001004004140041400514004340052

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)1e1f2223243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025400543001110171001400391616525800101080000108000050183988414936973400634005129987330032800102080000202400004005440063118002110910800001080000108001615440180016001680002164414105020716224005180000104005440055400554004840052
8002440054300110019100140039160225800101080000108000050184000404936974400524005229982330034800102080000202400004005240053118002110910800001080000108001416440180016013080002164414105020316264005180000104005440055400554006540053
80024400543001110151001400391616725800101080000108000050184002804936974400524005229987330034800102080000202400004005240053118002110910800001080000108001514440080016003580002164414205020216264004980000104005340048400484004840065
8002440054300111618100140038016525800101080000108000050184002804936972400544006329999330032800102080000202400004005440052118002110910800001080000108001614440180016005180240144414005020216224005180000104005540053400554005340055
80024400573001000171001400391616025800101080000108000050183995604936975400544004729989330043800102080000202400004005440054118002110910800001080000108001514440080014011880002144414005020216624004980000104006540055400644005540055
8002440047299100020100140039161652580010108000010800005018400281493697440054400542998933002780010208000020240000400544005411800211091080000108000010800141544008001411178000014014005020216224004980000104006440055400644005540055
80024400523001119171001400371616625800101080000108000050184002804936974400524005229987330034800102080000202400004005240053118002110910800001080000108001414440080016012680002164414105020216224006180000104005540053400564005440055
80024400523001100140001400391516125800101080000108000050183990804936974400544005429989330028800102080000202400004005440054118002110910800001080000108001415440180016002280002164414105020216224004580000104005640054400484005540055
80024400473001110190001400361616625800101080000108000050184043904936967400534005429989330032800102080000202400004005440052118002110910800001080000108001515440180016004880242164414005020216624004980000104005540053400554005340055
80024400543001110221001400391616025800101080000108000050184048604936974400534005429989330027800102080000202400004004740054118002110910800001080000108001515440180016011780002164414005020216224005280000104004840053400554004840048