Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

MOVI (vector, 8H, zero)

Test 1: uops

Code:

  movi v0.8h, #0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 0.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f5160696d6emap rewind (75)map stall (76)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
10041521027150013415215231015215211100110000075215221491000153153153153153
10041521050150013415215231015215211100110000075215221491000153153153153153
10041521027150013415215231015215211100110000075215221491000153153153153153
10041521027150013415215231015215211100110000075215221491000153153153153153
10041521027150013415215231015215211100110000075215221491000153153153153153
10041521027150013415215231015215211100110000075215221491000153153153153153
10041521027150113415215231015215211100110000075215221491000153153153153153
10041521027150113415215231015215211100110000075215221491000153153153153153
10041521027150013415215231015215211100110000075215221491000153153153153153
10041521027150013415215231015215211100110000075215221491000153153153153153

Test 2: throughput

Count: 8

Code:

  movi v0.8h, #0
  movi v1.8h, #0
  movi v2.8h, #0
  movi v3.8h, #0
  movi v4.8h, #0
  movi v5.8h, #0
  movi v6.8h, #0
  movi v7.8h, #0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.1258

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
80204100777800000420035955410010010050001003410060100603181002002001025010060118020110099100100800001000000000000511221622100570800001001006110061100611006110061
8020410060780000060035955410010010050001003410060100603181002002001006010060118020110099100100800001000000000000511221622100570800001001006110061100611006110061
80204100607700000240035955410010010050001003410060100603181002002001006010060118020110099100100800001000000000000511221622100570800001001006110061100611006110061
80204100607800000240035955410010010050001003410060100603181002002001006010060118020110099100100800001000000000000511221622100570800001001006110061100611006110061
8020410060780000000035955410010010050001003410060100603181002002001006010060118020110099100100800001000000000000511221622100570800001001006110061100611006110061
80204100608100000600175955410010010050001003410060100603181002002001006010060118020110099100100800001000000000000512821622100570800001001006110061100611006110061
8020410060780000000035955410010010050001003410060100603181002002001006010060118020110099100100800001000000000000511221622100570800001001006110061100611006110061
8020410060780000000035955410010010050001003410060100603181002002001006010060118020110099100100800001000000000000511221622100570800001001006110061100611006110061
8020410060780000000035955410010010050011003410060100603181002002001006010060118020110099100100800001000000000000511221622100570800001001006110061100611006110061
80204100607800000420035955410010010050011003410060100603181002002001006010060118020110099100100800001000000000000511221622100570800001001006110061100611006110739

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.1255

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acc2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
80024100447830359982101010500100121003810038318102020100381003811800211091010800001000050240616341003580000101003910039100391003910039
80024100387812359982101010500100121003810038318102020100381003811800211091010800001000050280416431003580000101003910039100391003910039
8002410038780359982101010500100121003810038318102020100381003811800211091010800001003050230416341003580000101003910039100391003910039
8002410038779359982101010500100121003810038318102020100381003811800211091010800001000050260716771003580000101003910039100391003910039
8002410038770359982101010501100121003810038318102020100381003811800211091010800001000050270416431003580000101003910039100391003910039
8002410038780359982101010500100121003810038318102020100381003811800211091010800001000050240716471003580000101003910039100391003910039
8002410038786359982101010500100121003810038318102020100381003811800211091010800001000050260616441003580000101003910039100391003910039
8002410038780639982101010500100121003810038318102020100381003811800211091010800001000050240416471003580000101003910039100391003910039
8002410038786359982101010500100121003810038318102020100381003811800211091010800001000050240416671003580000101003910039100391003910039
80024100387803599821010105001001210038100381718102020100381003811800211091010800001000050270416431003580000101003910039100391003910039