Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMOV (H from W)

Test 1: uops

Code:

  fmov h0, w0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)03041e3a3f4f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
100437430023590251000100010001406003493743741973232100010001000374374111001100010000077416443711000375375375375375
100437421023590251000100010001406003493743741973232100010001000374374111001100010000077416443711000375375375375375
100437431023590251000100010001406003493743761973232100010001000374374111001100010000077416443711000375378387375375
100437431023593251000100010001406003493743741973232100010001000374374111001100010000077416443711000375375375375375
100437431023642251000100010001406013523743741973232100010001000374374111001100010000077416443711000375375375375375
100437431033590251000100010001412113493743791973232100010001000374374111001100010000077416443711000375375375375375
100437431023590251000100010001406013493743741973232100010001000374374111001100010000077416443711000375375375375375
100437431023590251000100010001406013493743791983232100010001000374374111001100010000077416443711000375375388380375
100437420023590251000100010001406013533743741973232100010001000374374111001100010000077416443711000375375375375375
100437431023590251000100010001406013493743741973236100010001000374374111001100010000077416443751000375375375379375

Test 2: Latency 1->2 roundtrip

Code:

  fmov h0, w0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0032

retire (01)cycle (02)03080b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
2020410003277600000001000178955525301001010010000100001001000010000500477813456799300410001310003210003296900697486201002001000410004200100041000410003210003211202011009910010100100001000010000000111131801161199640100001000010100100120100374100116100034100033
2020410003277500000001000178955825301001010010000100001001000010000500477813456799300010001310003210003596900697489201002001000410004200100041000410005810003211202011009910010100100001000010000000111131701161299640100001000010100100033100033100033100033100033
2020410003277600000001000178955544301001010010000100001001005910000500477813456799300010012510003210037296894397493202232001000010000200100001000010005910008311202011009910010100100001000010000000000131113172299631100001000010100100034100033100033100033100033
2020410003277600000011000178955525301001010010000100001001000010000500477813456799300010001310003310003296893397490201002001000010000200100001000010004610003411202011009910010100100001000010000100000131113163399631100001000010100100033100033100033100035100033
2020410003277500009001000178955825301001010010000100001001000010000500477813456799300010001310003210003296893397491201002001000010000200100001000010006510003311202011009910010100100001000010000000000131013483299631100001000010100100033100033100033100033100033
2020410003277500000011000178955525301001010010000100001001000010000500477813456799300010001310003210003596893397490201002001000010000200100001000010003410003211202011009910010100100001000010000000000131112163399631100001000010100100033100033100033100033100033
2020410003277500000001000178955525301001010010000100001001000010000500477813456799300010001310003510003296893397490201002001000010000200100001000010007010003511202011009910010100100001000010000200000131013163399631100001000010100100033100033100033100033100033
2020410003277500000011000178955525301001010010000100001001000010050500477813456799300010001310003210003296893397490201002001000010000200100001000010005910003211202011009910010100100001000010000003000131112163299631100001000010100100033100033100034100033100033
2020410003277600000011000178955525301001010010000100001041000010000581477813456800960010017410012710003296893397490202062001000010000200100001000010005510006511202011009910010100100001000010008103426000131012162299631100001000010100100033100033100033100033100033
2020410003277500100011000178955525301001010010000100001001000010000500477813456799300010001310003210003396893397490201002001000010000200100001000010007310003611202011009910010100100001000010000000000131013483399631100001000010100100033100033100135100033100034

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0032

retire (01)cycle (02)030708090b18191e1f3a3f4f5051inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5cfd0d5d6dbddinst fetch restart (de)e0? int output thing (e9)eald/st retires (ed)gpr retires (ef)f5f6f7f8fd
20024100039775000000120010001789555025300101001010000100001010000100005047781345678535001000130100032100032969153975122001020100001000020100001000010003210003211200211091010010100001000100001000012700316023996311000001000010010100033100033100033100033100033
2002410003277500000000010001789555025300101001010000100001010000100005047781345678535101000130100032100032969153975122001020100001000020100001000010003210003211200211091010010100001000100000000012700216043996311000001000010010100034100033100033100033100033
2002410003277600000000010001789555025300101001010000100001010000100005047781345678535001000130100032100032969163975142001020100001000020100001000010003210003511200211091010010100001000100000000012700316023996311000001000010010100033100033100033100035100033
20024100032775000100120010001789555025300101001010000100001010000100005047781345678535001000140100032100032969163975122001020100001000020100001000010003210003211200211091010010100001000100001000012700316032996311000001000010010100035100033100033100062100033
2002410003277500000000010001789555025300101001010000100001010000100005047781345678535001000130100032100036969153975122001020100001000020100001000010003210003211200211091010010100001000100001000012700316033996311000001000010010100033100033100033100033100033
2002410003277500000000010001789555025300101001010000100001010000100005047781345678535001000130100032100032969153975122001020100001000020100001000010003210003311200211091010010100001000100000000012700316033996311000001000010010100033100033100033100033100033
2002410003277600000000010001789555025300101001010000100001010000100005047781345679047101000130100033100032969153975122001020100001000020100001000010003210003211200211091010010100001000100000000012700316033996311000001000010010100033100033100033100033100033
2002410003277600000000010001789557025300101001010000100001010000100005047790435678535011000130100032100032969153975122001020100001000020100001000010003210003211200211091010010100001000100001000012700216033996991000001000010010100033100033100033100033100033
2002410003377501000000010001789555025300101001010000100001010000100005047781345678535011000130100032100032969183975122001020100001006620100001000010003210003211200211091010010100001000100000000012700316023996311000001000010010100033100033100033100033100033
20024100032775000000120010001789555025300101001010000100001010000100005047782785678535011000130100032100032969153975122001020100001000020100001000010003210003211200211091010010100001000100000030012700316033996311000001000010010100033100033100033100033100033

Test 3: throughput

Count: 8

Code:

  fmov h0, w8
  fmov h1, w8
  fmov h2, w8
  fmov h3, w8
  fmov h4, w8
  fmov h5, w8
  fmov h6, w8
  fmov h7, w8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)03080b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa5a6a8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
802042671220700000002669202580100100800001008001550011665962668226707267071663561665980115200800242008002426833267151180201100991001008000010008000000011151180160026704800001002671226708267082670826708
8020426707206000000026692292580100100800001008001550011665962668226707267071663561665980115200800242008002426707267181180201100991001008000010008000000011151180160026704800001002670826708267082670826708
802042670720700000002669222580100100800001008001550011665962668226707267071663561665980115200800242008002426713267131180201100991001008000010008000000011151180160026704800001002670826708267082670826708
802042670720700000002669502580100100800001008001450011665962668626707267071663961666380115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
802042670720700000002669202580100100800001008001550011665962668226707267071663561665980115200800242008002426709272041180201100991001008000010008000000011151180160026704800001002670826708267082670826708
802042670720700000002669202580100100800001008001550011665962668226707267071663561665980115200800242008002426707267141180201100991001008000010008000000011151180160026704800001002670826708267082670826708
802042670720700000002669202580100100800001008001550011665962668226707267071663561665980115200800242008002426707267221180201100991001008000010008000000011151180161026704800001002670826708267082670826708
802042670720600000002669202580100100800001008001550011665962668226707267071663561665980115200800242008002426708267221180201100991001008000010008000000011151180160026704800001002670826708267082670826708
802042670720700000002669202580100100800001008001550011665962668226707267071663561665980115200800242008002426707267161180201100991001008000010008000000011151180160026704800001002670826708267082670826708
802042670720700000002669202580100100800001008001550011665962668226707267071663561665980115200800242008002427304267111180201100991001008000010008000000011151180160026704800001002670826708267082670826708

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)03191e1f3f4f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)dfe0eald/st retires (ed)gpr retires (ef)f5f6f7f8fd
800242670820700026693025800101080000108000050116675001266832671826708166523166888001020800002080000267082670811800211091010800001080000000502330041600044326715080000102670926709267092670926709
800242670820700026693025800101080000108000050116675000266832671226708166523166888001020800002080000267082670811800211091010800001080000030502330031600044326705080000102670926709267092670926709
800242670820700026693025800101080000108000050116675001266832670826708166523166888001020800002080000267082670811800211091010800001080000000502330031600034326705080000102670926709267092670926709
800242670820700026693025800101080000108000050116675001266872670826708166523166928001020800002080000267082670811800211091010800001080000000502330041600044326705080000102670926709267092670926709
800242670820700026696025800101080000108000050116675001266832670826708166523166888001020800002080000267082670811800211091010800001080000000502330041600044326705080000102670926709267092670926709
800242670820700026693025800101080000108000050116675011266832670826708166523166888001020800002080000267082670811800211091010800001080000000502330041600034326705080000102670926712267092670926709
800242670820700026693025800101080000108000050116675000266832670826708166523166888001020800002080000267082670811800211091010800001080000000502330031600034326705080000102670926709267092670926709
800242670820700026697025800101080000108000050116660000266832670826708166523166888001020800002080000267082670811800211091010800001080000000502330031600044326705080000102671326709267122670926709
800242670820600026693025800101080000108000050116675011266832670826708166523166888001020800002080000267082670811800211091010800001080000000502330031600044326705080000102670926712267092670926709
800242671120700026693025800101080000108000050116675000266832670826708166523166888001020800002080000267112670811800211091010800001080000000502330041600043326705080000102670926709267092670926709