Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

MOV (bitmask immediate, 32-bit)

Test 1: uops

Code:

  mov w0, #0xaaaaaaaa
  nop ; nop ; nop

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 3 nops): 1.000

Issues: 0.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f51606d6emap rewind (75)map stall (76)map int uop (7c)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)e0? int retires (ef)f5f6f7f8fd
4004528402852515285283101000528951140011000002602165251000529529529529529
4004528402852515285283101000528951140011000102600165251000529529529529529
4004528402852515285283101000528951140011000002600165251000529529529529529
40045284122852515285283101000528951140011000002600165251000529529529529529
4004528402852515285283101000528951140011000002600165251000529529529529529
4004528402852515285283101000528951140011000002600165251000529529529529529
40045284122852515285283101000528951140011000002600165251000529529529529529
40045284152852515285283101000528951140011000002600165251000529529529529529
4004528402852515285283101000528951140011000002600165251000529529529529529
4004528402852505285283101000528951140011000002600165251000529529529529529

Test 2: throughput

Count: 8

Code:

  mov w0, #0xaaaaaaaa
  mov w1, #0xaaaaaaaa
  mov w2, #0xaaaaaaaa
  mov w3, #0xaaaaaaaa
  mov w4, #0xaaaaaaaa
  mov w5, #0xaaaaaaaa
  mov w6, #0xaaaaaaaa
  mov w7, #0xaaaaaaaa

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.1258

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)a9acbranch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
8020410091780000000352560050600506005030025014969801006010060318600508020020010060351180201100991008010010000305115516551005759950801001107010061100611006110061
8020410060800000090352560050600506005030025014969801006010060318600508020020010060351180201100991008010010000005115316351005759950801001006110061100611006110061
8020410060780000000352560050600506005030025004969801006010060318600508020020010060351180201100991008010010000005115516551005759950801001006110061100611006110061
8020410060780000000352560050600506005030025014969801006010060318600508020020010060351180201100991008010010000005115516551005759950801001006110061100611006110061
8020410060770000000632560050600506005030025014969801006010060318600508020020010060351180201100991008010010000005113316551005759950801001006110061100611006110061
8020410060780000000352560050600506005030025014969801006010060318600508020020010060351180201100991008010010010005113316551005759950801001006110061100611006110061
8020410060780000000352560050600506005030025014969801006010060318600508020020010060351180201100991008010010000305113316551005759950801001006110061100611006110061
8020410060780000000352560050600506005030025014969801006010060318600508020020010060351180201100991008010010000015115516551005759950801001006110061100611006110061
80204100607800000120352560050600506005030025014969801006010060318600508020020010060351180201100991008010010040005115516551005759950801001006110061100611006110061
8020410060780000000352560050600506005030025014969801006010060318600508020020010060351180201100991008010010000005115516531005759950801001006110061100611006110061

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.1255

retire uop (01)cycle (02)03l1i tlb fill (04)191e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
8002410053800012875256000460004600043000200496958100381003831860004800202010038351180021109108001010035025616361003559994800101003910039100391003910039
80024100387800035256000460004600043000200496958100381003831860004800202010038351180021109108001010105023316361003559994800101003910039100391003910039
80024100387800035256000460004600043000200496958100381003831860004800202010038351180021109108001010005025316531003559994800101003910039100391003910039
8002410038782012352560004600046000430002014969581003810038318600048002020100383511800211091080010102535026516531003559994800101003910039100391003910039
800241003877000352560004600046000430002004969581003810038318600048002020100383511800211091080010103545025316461003559994800101003910039100391003910039
800241003881001235256000460004600043000200496958100381003831860004800202010038351180021109108001010005025516641003559994800101003910039100391003910039
80024100387800035256000460004600043000200496958100381003831860004800202010038351180021109108001010035025616531003559994800101003910039100391003910039
80024100387800035256000460004600043000200496958100381003831860004800202010038351180021109108001010005025416461003559994800101003910039100391003910039
800241003878013510256000460004600043000200496958100381003831860004800202010038351180021109108001010105023516551003559994800101003910039100391003910039
80024100387700035256000460004600043000200496958100381003831860004800202010038351180021109108001010005026316351003559994800101003910039100391003910039