Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

MOVI (vector, 4S, zero)

Test 1: uops

Code:

  movi v0.4s, #0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 0.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)191e3f5160696d6emap rewind (75)map stall (76)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)acc2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
10041522000271500134152152310152152111001100000740115111491000153153153153153
10041521000271500134152152310152152111001100000740115111491000153153153153153
10041521000271500134152152310152152111001100000740115111491000153153153153153
10041521000271500134152152310152152111001100000740115111491000153153153153153
10041521000271500134152152310152152111001100000740115111491000153153153153153
10041521000271500134152152310152152111001100000740115111491000153153153153153
10041521000271500134152152310152152111001100000740115111491000153153153153153
10041522000271500134152152310152152111001100000740115111491000153153153153153
10041521000271501134152152310152152111001100000740115111491000153153153153153
10041521000271500134152152310152152111001100000740115111491000153153153153153

Test 2: throughput

Count: 8

Code:

  movi v0.4s, #0
  movi v1.4s, #0
  movi v2.4s, #0
  movi v3.4s, #0
  movi v4.4s, #0
  movi v5.4s, #0
  movi v6.4s, #0
  movi v7.4s, #0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.1258

retire uop (01)cycle (02)03181e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
8020410076780035955410010010050001003410060100603181002002001006010060118020110099100100800001000051156165510057800001001006110061100611006110061
8020410060780035955410010010050001003410060100603181002002001006010060118020110099100100800001001051155163510057800001001006110061100611006110061
8020410060770035955410010010050001003410060100603181002002001006010060118020110099100100800001000051155165510057800001001006110061100611006110061
8020410060780035955410010010050001003410060100603181002002001006010060118020110099100100800001000051154165510057800001001006110061100611006110061
80204100607800359554100100100500010034100601006031810020020010060100601180201100991001008000010029051133163510057800001001006110061100611006110065
80204100607800676955410010010050001003410060100603181002002001006010060118020110099100100800001000051163165510057800001001006110061100611006110061
80204100607800359554100100100500010034100601006031810020020010060100601180201100991001008000010026051166165510057800001001006110061100611006110061
8020410060780035955410010010050011003410060100603181002002001006010060118020110099100100800001000051133164510057800001001006110061100611006110061
8020410060780035955410010010050001003410060100603181002002001006010060118020110099100100800001000051146164610057800001001006110061100611006110061
8020410060780035955410010010050001003410060100603181002002001006010060118020110099100100800001000351165164610057800001001006110061100611006110061

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.1255

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)18191e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)5f60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)daddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
8002410053780012077998210101050001001210038100383181020201003810038118002110910108000010035034171608171003580000101003910039100391003910039
80024100387800000359982101010500010012100381003831810202010038100381180021109101080000102650361016017151003580000101003910039100391003910039
800241003878000003599821010105000100121003810038318102020100381003811800211091010800001001250361616014161003580000101003910039100391003910039
800241003878000003599821010105001100121003810038318102020100381003811800211091010800001001250341416017131003580000101003910039100391003910039
800241003878000003599821010105001100121003810038318102020100381003811800211091010800001002150361616013161003580000101003910039100391003910039
80024100387720000359982101010560110012100381003831810202010038100381180021109101080000100050321616016171003580000101003910039100391003910118
8002410038781000035998210121050011001210038100383181020201003810038118002110910108000010065029916017141003580000101003910039100391003910104
800241003878010003599821012105000100121003810038318102020100381003811800211091010800001009050341416017141003580000101003910039100391003910104
800241003878000003599821012105001100121003810038318102020100381003811800211091010800001020050361616016161003580000101003910039100391003910104
800241003877000003599821012105001100121003810038318102020100381003811800211091010800001070503091601781003580000101003910039100391003910120