Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

MOVI (vector, 2S, zero)

Test 1: uops

Code:

  movi v0.2s, #0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 0.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)033f5160696d6emap rewind (75)map stall (76)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)l1d cache writeback (a8)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
100415212715001341521523101521521110011000074115111491000153153153153153
100415212715001341521523101521521110011000074115111491000153153153153153
100415212715011341521523101521521110011000074115111491000153153153153153
100415212715001341521523101521521110011000074115111491000153153153153153
100415212715001341521523101521521110011000174115111491000153153153153153
100415212715001341521523101521521110011000074115111491000153153153153153
100415212715001341521523101521521110011000074115111491000153153153153153
100415222715001341521523101521521110011000074115111491000153153153153153
100415212715011341521523101521521110011000074115111491000153153153153153
100415212715001341521523101521521110011000074115111491000153153153153153

Test 2: throughput

Count: 8

Code:

  movi v0.2s, #0
  movi v1.2s, #0
  movi v2.2s, #0
  movi v3.2s, #0
  movi v4.2s, #0
  movi v5.2s, #0
  movi v6.2s, #0
  movi v7.2s, #0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.1258

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)1e1f3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d cache writeback (a8)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
80204100648100660035955410010010050011003410060100603181002002001006010060118020110099100100800001000051133163310057800001001006110061100611006110061
8020410060780000035955410010010050011003410060100603181002002001006010060118020110099100100800001000051133163310057800001001006110061100611006110061
8020410060780000035955410010010050011003410060100603181002002001006010060118020110099100100800001000051133163310057800001001006110061100611006110061
80204100607800000415955410010010050011003410060102233181002002001006010060118020110099100100800001000051133163310057800001001006110061100611006110061
8020410060780000035955410010010050011003410060100603181002002001006010060118020110099100100800001000051133163310057800001001006110061100611006110061
8020410060780000035955410010010050011003410060100603181002002001006010060118020110099100100800001000051133163310057800001001006110061100611006110061
8020410060780000035955410010010050011003410060100603181002002001006010060118020110099100100800001000051133163310057800001001006110061100611006110061
8020410060780000098955410010010050011003410060100603181002002001006010060118020110099100100800001000051133163310057800001001006110061100611006110061
80204100607800150035955410010010050011003410060100603181002002001006010060118020110099100100800001000051133163310057800001001006110061100611006110061
8020410060780000035955410010010050011003410060100603181002002001006010060118020110099100100800001000051133163310057800001001006110061100611006110061

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.1255

retire uop (01)cycle (02)0318191e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)5f60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0ec? simd retires (ee)? int retires (ef)f5f6f7f8fd
8002410053804093599821010105001100121003810038318102020100751003811800211091010800001000050211161110035080000101003910039100391003910039
8002410038780003599821010105001100121003810038318102020100381003811800211091010800001000050211161110035080000101003910039100391003910039
8002410038780003599821010105001100121003810038318102020100381003811800211091010800001000050211161110035080000101003910039100391003910039
8002410038780003599821010105001100121003810038318102020100381003811800211091010800001000050211161110035080000101003910039100391003910039
8002410038780003599821010105001100121003810038318102020100381003811800211091010800001000050211161110035080000101003910039100391003910039
8002410038770003599821010105001100121003810038318102020100381003811800211091010800001000050211161110035080000101003910039100391003910039
80024100387800097299821010105001100121003810038318102020100381003811800211091010800001000050211161110035080000101003910039100391003910039
8002410038770003599821010105001100121003810038318102020100381003811800211091010800001000050211161110035080000101003910039100391003910039
80024100387800183599821010105001100121003810038318102020100381003811800211091010800001000050211161110035080000101003910039100391003910039
8002410038780003599821010105001100121003810038318102020100381003811800211091010800001000050211161110035080000101003910039100391003910039