Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

DUP (general, 8B)

Test 1: uops

Code:

  dup v0.8b, w0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6accfd0d5d6d9ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200437520360025200010001000100010001407522820003563753757231082000100010001000100037537511100110001000073011601137210001000376376376376376
200437520360025200010001000100010001407522820003563753757231082000100010001000100037537511100110001000073011601137210001000376376376376376
200437530360025200010001000100010001407522820003563753757231082000100010001000100037537511100110001000073011601137210001000376376376376376
200437530360025200010001000100010001407522820003563753757231082000100010001000100037537511100110001000073011601137210001000376376376376376
200437530360025200010001000100010001407522820003563753757231082000100010001000100037537511100110001000073011601137210001000376376376376376
200437530360025200010001000100010001407522820003613753757231082000100010001000100037537511100110001000073011601137210001000376376376376376
200437530360025200010001000100010001407522820003563943777231082000100010001000100037537511100110001000073011601137210001000376379376376376
200437530360025200010001000100010001407522820003563753757231082000100010001000100037537511100110001000073011601137210001000376376376376376
200437530360025200010001000100010001407522820003563753757231082000100010001000100038037511100110001000073011601137210001000376376376376376
200437530360025200010001000100010001407522820003563753757231082000100010001000100037537511100110001000373011601137210001000376376376376376

Test 2: Latency 1->2 roundtrip

Code:

  dup v0.8b, w0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 12.0032

retire (01)cycle (02)0308090b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9accfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020412003289910000001200171094562540100101002000010000100200001000050057356721367205301200171200321200321155253116240301002001000020000200100612000012003212003211202011009910010100100001000010000000131014162311957410000100001000010100120033120033120033120033120033
30204120032899000101201200171094562540100101002000010000100201131000050057356721367205301200131200321200321155253116240301002001000020000200100002000012003212003211202011009910010100100001000010000000131012162311957610000100001000010100120033120033120033120033120033
3020412003289900000001200171094562540100101002000010000100200001000050057356721367205301200131200321200321155253116240301002001000020000200100002000012003212003211202011009910010100100001000010000003131012162311957410000100001000010100120033120033120033120033120033
3020412003289900000001200171094562540100101002000010000100200001000050057356721367205301200131200321200321155253116240301002001000020000200100002000012003212003211202011009910010100100001000010000000131013163211957410000100001000010100120033120033120033120033120033
3020412003589900000001200171094562540100101002000010000102200001000050057356721367205311200131200321200321155253116240301002001000020000200100002000012003212003211202011009910010100100001000010000009131012163211957410000100001000010100120033120033120033120033120033
302041200328990000012881200171094562540100101002000010000100200001000050057356721367205311200131200321200321155253116240301002001000020000200100002000012003212003211202011009910010100100001000010000003131012163211957410000100001000010100120033120033120033120033120033
3020412003289900000001200171094572540133101002000010000100200001000050057356721367205301200131200321200321155253116240301002001000020000200100002000012003212003211202011009910010100100001000010000000131012163311957410002100001000010100120033120033120033120033120033
3020412003289900000001200171094562540100101002000010000100200001000050057356721367205301200131200321200321155253116240301002021000020000200100002000012003212003211202011009910010100100001000010005000131013162311957410000100001000010100120033120116120037120033120033
30204120033899000000012001710949325401001010020000100001002000010000500573567213672053012001312003212003211552531162403010020010000200002001000020000120032120032112020110099100101001000010000100000024131013162211957910000100001000010100120033120033120033120033120033
3020412003289910000001200171094562540100101002000010000100200001000050057356721367205301200131200321200321155253116240301002001000020000200100002000012003212003211202011009910010100100001000010000000131012163311957410000100001000010100120033120033120033120034120033

1000 unrolls and 10 iterations

Result (median cycles for code): 12.0032

retire (01)cycle (02)03081e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
300241200328990001200171094562540010100102000010000102000010000505735672136700291120013120033120032115548311626230010201000020000201000020000120032120068112002110910100101000010100000012703161111957410000100001000010010120033120033120033120033120033
300241200328990001200171094562540010100102000010000102000010000505735672136700291120013120032120032115548311626230010201000020000201000020000120032120032112002110910100101000010100000012701161111957410000100001000010010120033120033120033120033120033
300241200328990001200171094562540010100102000010000122000010000505737040136704801120013120032120032115548311626230010201000020000201000020000120032120032112002110910100101000010100000012701161111957410000100001000010010120033120033120033120033120033
300241200328990001200171094562540010100102000010000102000010000505735672136700291120013120032120032115548311626230010201000020000201000020000120032120032112002110910100101000010100000012701161111957410000100001000010010120033120033120033120033120033
300241200328990001200171094562540010100102000010000102000010000505735672136700290120013120032120032115550311626230010201000020000201000020000120032120032112002110910100101000010100000012701162111957410000100001000010010120033120033120033120033120033
3002412003289900012001710945625400101001020000100001020000100005057356721367002911200131200321200321155643116262300102010000200002010000200001200321200321120021109101001010000101000002249012951161111957410000100001000010010120033120033120033120033120033
300241200328990001200171094562540010100102000010000102000010000505735672136700291120013120032120032115548311626230010201000020000201000020000120032120032112002110910100101000010100000012701161111957410000100001000010010120033120033120033120033120033
300241200328990001200171094562540010100102000010000102000010000505735672136700291120013120032120032115548311626230010201000020000201000020000120032120032112002110910100101000010100000012701161111957410000100001000010010120033120033120033120033120033
300241200328990001200171094562540010100102000010000102000010000505735672136700291120013120032120032115548311626230010201000020000201000020000120032120032112002110910100101000010100003012701161111957410000100001000010010120033120033120033120033120033
300241200328990001200171094562540010100102000010000102000010000505735672136700291120013120032120032115548311626230010201000020000201000020000120032120032112002110910100101000010100000012701161111957410000100001000010010120033120033120033120033120033

Test 3: throughput

Count: 8

Code:

  dup v0.8b, w8
  dup v1.8b, w8
  dup v2.8b, w8
  dup v3.8b, w8
  dup v4.8b, w8
  dup v5.8b, w8
  dup v6.8b, w8
  dup v7.8b, w8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)031e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204267092000266930251601001008000080000100800208001550011689511884163126689267082670866326665716013520080020800202008002080020268222685011802011009910010080000100800000001115117016002671180000800001002673926709267092671126709
160204267082000266930251601001008000080000100800208001550011689511884163126689267082670866326665716013520080020800202008002080020267082682911802011009910010080000100800000001115140016002672080000800001002670926709267092670926709
160204267082000266930251601001008000080000100800208001550011689511884163126689267082670866326665716013520080020800202008002080020269122674211802011009910010080000100800000001115117016002670580000800001002670926709267092670926709
160204267082000266930251601001008000080000100800208001550011689511884163126689267082670866326665716013520080020800202008002080020267082677211802011009910010080000100800000001115117016002671180000800001002670926709267092670926709
160204267082000266930251601001008000080000100800208001550011689511884163126689267082670866326665716013520080020800202008002080020267082684411802011009910010080000100800000001115117016002670680000800001002670926709267092670926709
160204267082000266930251601001008000080000100800208001550011689511884163026689267112670866326665716050520080020800202008002080020267082676911802011009910010080000100800000001115117016002681480000800001002671526709267092670926709
160204267082000266930251601001008000080000100800208001550011689511884163126689267082670866326665716013520080020800202008002080020267082676111802011009910010080000100800000001115117016002670580000800001002670926709267092670926709
160204267082000266930251601001008000080000100800208001550011689511884163126689267082670866326665716013520080020800202008002080020267082670811802011009910010080000100800000001115117016002671780000800001002670926709267092670926709
160204267082000266930251601001008000080000100800208001550011689511884163126689267082670866326665716013520080020800202008002080020267082677611802011009910010080000100800000001115117016002670580000800001002670926709267092670926709
160204267082000266930251601001008000080000100800208001550011689511884163126689267082670866326665716013520080020800202008002080020267082679311802011009910010080000100800000001115117016102670580000800001002670926709267092670926709

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)031e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6accdcficache miss (d3)d5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242670819992669302516001010800008000010800008000050116888018840320266892670826708665336688160010208000080000208000080000267142677111800211091010800001080000005020631611267058000080000102670926709267092670926709
1600242670820002669302516001010800008000010800008000050116888018840321266892670826708665536688160010208000080000208000080000267112670821800211091010800001080000005020611611267108000080000102670926713267092688926709
1600242670820002669302516001010800008000010800008000050116888018840321266892670826708665336688160010208000080000208000080000267142680411800211091010800001080000005020611611267058000080000102670926709267092670926709
1600242670820102669302516001010800008000010800008000050116888018840320266892670826708665336688160010208000080000208000080000267112671611800211091010800001080000005020621611267178000080000102670926709267092670926709
1600242670820002669302516001010800008000010800008000050116888018840321266892670826708665336688160010208000080000208000080000267082670811800211091010800001080000005020511611267138000080000102670926709267092670926709
1600242670820062671302516001010800008000010800008000050116888018840320266892670826708665336688160010208000080000208000080000267982671611800211091010800001080000005020621611267058000080000102670926709267092670926709
1600242670820002669312516001010800008000010800008000050116888018840321266892670826708665336688160010208000080000208000080000267112670951800211091010800001080000005020611612267118000080000102670926709267092670926709
1600242670820002669302516001010800008000010800008000050116888018840320266892670826708665336688160010208000080000208000080000267132679911800211091010800001080000005020621611267058000080000102670926709267092670926709
1600242670820002669302516001010800008000010800008000050116888018840320266892670826708665336688160010208000080000208000080000267112671411800211091010800001080000005020411611267058000080000102670926709267092670926709
1600242670820002669302516001010800008000010800008000050116888018840321266892670826708665336688160010208000080000208000080000267082671411800211091010800001080000015020421611267058000080000102670926709267092670926709