Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

MOVI (vector, 4H, zero)

Test 1: uops

Code:

  movi v0.4h, #0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 0.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f5160696d6emap rewind (75)map stall (76)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)l1d cache writeback (a8)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
1004152102715001341521523101521521110011000074215111491000153153153153153
1004152102715001341521523101521521110011000074115111491000153153153153153
1004152102715001341521523101521521110011000074115111491000153153153153153
1004152102715001341521523101521521110011000074115111491000153153153153153
1004152102715001341521523101521521110011000074115111491000153153153153153
1004152102715001341521523101521521110011000074115111491000153153153153153
1004152102715001341521523101521521110011000074115111491000153153153153153
1004152102715011341521523101521521110011000074115111491000153153153153153
1004152102715011341521523101521521110011000074115111491000153153153153153
1004152102715001341521523101521521110011000074115111491000153153153153153

Test 2: throughput

Count: 8

Code:

  movi v0.4h, #0
  movi v1.4h, #0
  movi v2.4h, #0
  movi v3.4h, #0
  movi v4.4h, #0
  movi v5.4h, #0
  movi v6.4h, #0
  movi v7.4h, #0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.1258

retire uop (01)cycle (02)03mmu table walk data (08)1e3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
802041009078015028956110010010050011004210064100646101002002001006410064118020110099100100800001000000000051133163310057800001001006110061100611006110061
80204100607800139955410010010050011003410060100603181002002001006010060118020110099100100800001000000000051133163310057800001001006110061100611006110061
8020410060780258139955410010010050001003410060100603181002002001006010060118020110099100100800001000000000051133163310057800001001006110061100611006110061
80204100607800139955410010010050011003410060100603181002002001006010060118020110099100100800001000000000051133163310057800001001006110061100611006110061
8020410060780360139955410010010050011003410060100603181002002001006010060118020110099100100800001000000000051133163310057800001001006110061100611006110061
80204100607700139955410010010050011003410060100603181002002001006010060118020110099100100800001000000000051133163310057800001001006110061100611006110061
8020410060780129139955410010010050001003410060100603181002002001006010060118020110099100100800001000000000051133163310057800001001006110061100611006110061
80204100608106195956110010010050001004210064100646821002002001006410064118020110099100100800001000010011151191161110061800001001006510065100651006510065
80204100647800035955410010010050011003410060100603181002002001006010060118020110099100100800001000000000051133163310057800001001006110061100611006110061
802041006078045035955410010010050011003410060100603181002002001006010060118020110099100100800001000000000051133163310057800001001006110061100611013410061

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.1255

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)191e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)accfd0d5map dispatch bubble (d6)ddfetch restart (de)e0ea? simd retires (ee)? int retires (ef)f5f6f7f8fd
800241005478000321035998212101050110012100381003831810202010038100381180021109101080000100000502404162410035080000101003910039100391003910039
8002410038780000035998210101050110012100381003831810202010038100381180021109101080000100030502402162410035080000101003910039100391003910039
8002410038780000035998210101050110012100381003831810202010038100383180021109101080000109000502402164210035080000101003910039100391003910039
8002410038770000035998210101050110012100381003831810202010038100381180021109101080000100000502404162510035080000101003910039100391003910039
8002410038780003035998210101050110012100381003831810202010038100381180021109101080000100020502302162410035080000101003910039100391003910039
80024100387800027035998210101050110012100381003831810202010038100381180021109101080000104000502204162410035080000101003910039100391003910039
8002410038770000035998210101050110012100381003831810202010038100384180021109101080000100000502204164210035080000101003910039100391003910039
8002410038800007503599821010105011001210038100883181020201003810042218002110910108000010001405024021642100351880000101003910039100391003910039
8002410038780000035998210101050010012100381003831810202010038100381180021109101080000100000502405162410035080000101003910039100391003910039
8002410038780000035998210101050010012100381003831810202010038100381180021109101080000100000502404167510035080000101003910039100391003910039