Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMOV (H from X)

Test 1: uops

Code:

  fmov h0, x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)03081e3f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a5a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
10043743003592510001000100014060134937437419732321000100010003743741110011000010000073116113711000375375375375375
10043743003592510001000100014102034937437419732321000100010003743741110011000010000073116113711000375375375375375
10043743003592510001000100014060034937437419732321000100010003743741110011000010000073116113711000375375375376375
10043763003592510001000100014060034937437419732321000100010003743741110011000010000373116113711000375375375375375
10043743003592510001000100014060034937437419732321000100010003743741110011000010000073116113711000375375375375375
10043743003592510001000100014072034937437419732321000100010003743741110011000010000073116113711000375375375375375
10043742003592510001000100014060034937437419732321000100010003743741110011000010000073116113711000375375375375375
10043743003612510001000100014060034937437419732321000100010003743741110011000010000073116113711000375375375375375
10043743003592510001000100014060034937437419732321000100010003743771110011000010000373116113711000375375375375375
100437430213592510001000117814108034937437419732321000100010003743741110011000010002073116113711000375375375375375

Test 2: Latency 1->2 roundtrip

Code:

  fmov h0, x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0032

retire (01)cycle (02)030918191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a7a8acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
20204100032775000000100017895552530100101001000010000100100001000050047781345679930010001310003210003296893397490201002001000010000200100001000010003210003211202011009910010100100001000100000030131013172299631100001000010100100033100033100033100033100033
20204100032775000000100017895552530100101001000010000100100001000050047782785679930010001310003210003296893397490201002001000010000200100001000010003210003211202011009910010100100001000100000000131012162299631100001000010100100033100033100068100033100033
20204100032775000000100017895562530100101001000010000100100001000050047781345679930010001310003210003296893797490201002001000010000200100001000010003210003211202011009910010100100001000100000030131012162299631100001000010100100033100033100033100033100033
20204100032775000600100017895552530100101001000010000100100001000050047781345679930010001310003210003296893397490201002001000010000200100001000010003210003211202011009910010100100001000100000130131012162299631100001000010100100035100033100033100033100035
20204100035776000000100017895552530100101001000010000100100001000050047781345679930010001310003210003296893397491201002001000010000200100001000010003210003211202011009910010100100001000100000000131012162299631100001000010100100033100033100033100033100033
20204100032775000000100017895552530100101001000010000100100001000052147781345679930010001310003410003496893397490201002001000010000200100001000010003210003211202011009910010100100001000100000100131012163299631100001000010100100033100033100033100033100033
202041000327750002100100017895552530100101001000010000100100001000050047781345679930010001310003210003296893397490201002001000010000200100001000010003210003211202011009910010100100001000100000100131012162299631100001000010100100033100033100033100033100033
20204100032776000000100017895552530100101001000010000100100001000050047781345679930010001310003210003296893397490201002001000010000200100001000010003210003211202011009910010100100001000100000000131012332299631100001000010100100033100033100033100033100033
2020410003277602013201100017895562530100101001000010000100100001000050047781345679930010001310003210003296893397490201002001000010000200100001000010003210003211202011009910010100100001000100000030131013162299631100001000010100100033100033100033100033100033
20204100032775000000100020895552530100101001000010000100100001000050047781345680095010001310003210003296893397490201002001000010000200100001000010003210003211202011009910010100100001000100000000131012242299836100001000010100100123100034100225100033100126

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0032

retire (01)cycle (02)0308090b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
2002410003277600000000100017895552530010100101000010000101000010000504778134567853511000131000321000329691539751220010201000010000201000010000100032100032112002110910100101000010001000000000012703161199631100001000010010100036100033100033100033100033
2002410003277500000000100017895552530010100101000010000101000010000504778134567853501000131000321000329691539751220010201000010000201000010000100032100032112002110910100101000010001000000000012701161199631100001000010010100059100033100033100033100033
2002410003277500051000100017895552530010100101000010000101000010000504778134567853511000131000321000329691539751220010201000010000201000010000100032100032112002110910100101000010001000001000012701161199631100001000010010100033100033100033100033100033
2002410003277500000000100017895552530010100101000010000101000010000504778134567853511000131000321000329691539751220010201000010000201000010000100032100032112002110910100101000010001000000000012701162199631100001000010010100033100033100033100033100033
20024100033775000000001000178955525300101001010000100001010000100005047781345678927110001310012710003296915189772420010201024210000201000010000100032100377512002110910100101000010401000400000012881162199797100001000010010100033100033100034100033100033
20024100032776000002400100017895552530010100101000010000101000010000504778134567853511000131000321000349694439751220010201000010000201000010000100032100032112002110910100101000010001000000030012701161199631100001000010010100035100034100033100033100033
2002410003277500000000100017895552530010100101000010000101000010000504778134567853511000131000321000329691539751220010201000010000201000010000100032100032112002110910100101000010001000001000012701161199631100001000010010100033100068100035100033100033
2002410003277600000000100017895552530010100101000010000101000010000504778134567853511000131000321000329691539751220010201000010000201000010000100032100032112002110910100101000010001000000000012701161199631100001000010010100033100033100033100033100033
2002410003277600000000100017895552530010100101000010000101000010000504778134567853501000131000331000329691539751420010201000010000201000010000100032100032112002110910100101000010001000000030012701161199631100001000010010100033100033100033100033100033
2002410003277500000000100017895552530010100101000010000101000010000504778134567853511000131000321000329691539751220010201000010000201000010000100032100032112002110910100101000010001000001000012701161199660100001000010010100033100033100033100034100033

Test 3: throughput

Count: 8

Code:

  fmov h0, x8
  fmov h1, x8
  fmov h2, x8
  fmov h3, x8
  fmov h4, x8
  fmov h5, x8
  fmov h6, x8
  fmov h7, x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)03080b18191e1f3f4f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020426709207000000266940258010010080000100800155001166596126682267072670716635616732801142008002420080024267072670711802011009910010080000100008000000011151182160026704800001002670826708267082670826708
8020426707207000000266960258010010080000100800155001166596126682267122670716635616726801152008002420080024267072670711802011009910010080000100008000000011151180160026704800001002670826708267082670826708
8020426707207000000266920258010010080000100800155001166596126682267112670716639616733801152008002420080024267072670711802011009910010080000100008000000011151180160026704800001002670826708267082670826712
8020426707207000000266920258010010080000100800155001166596126686267072670716635616776801142008002420080024267072670711802011009910010080000100008000000011151180160026704800001002670826708267082670826715
8020426707207000000266920258010010080000100800155001166596026682267072670716639616775801152008002420080024267072670711802011009910010080000100008000000011151180160026704800001002670826708267082670826708
8020426707207000000266922258010010080000100800155001166596126682267072670716635616728801152008002420080024267072670711802011009910010080000100008000003011151180160026704800001002670826708267082670826708
8020426707207000000266920258010010080000100800155001168008026682267072670716635616786801162008002420080024267072670711802011009910010080000100008000003011151180160026704800001002671226708267082670826708
8020426707207000000266920258010010080000100800155001166596026682267072671116635616659801152008002420080024267162670711802011009910010080000100008000010011151180160026704800001002685726708267082671226708
8020426860207000000266920258010010080000100800155001166596027096267072670716635616890801152008002420080024273212670711802011009910010080000100208000000011151180160027110800001002670826708267082670826708
8020426707207000060266920258010010080000100800155001166596126686267072715716635616779801152008002420080024267072671111802011009910010080000100008000000011151180160026704800001002670826708271572670826712

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)031e3f4f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002426708207026693025800101080000108000050116675001266912670826708166523166928001020800002080000267082670811800211091010800001080000005020116112670580000102670926709267092670926709
8002426708207026693025800101080000108000050116675001266912670826711166523166888001020800002080000267082670811800211091010800001080000005020116112670580000102670926709267092671226709
8002426708206026693025800101080000108000050116675001266902670826708166523166888001020800002080000267082670811800211091010800001080000105020116112670580000102670926709267092670926709
80024267082076026693025800101080000108000050116675001267262670826711166583166888001020800002080000268822686611800211091010800001080000005020116112670580000102670926709267092670926709
8002426708207026693025800101080000108000050116675001266902670826708166523166888001020800002080000267082670811800211091010800001080000005020116112670580000102670926709267092670926709
8002426708207026693025800101080000108000050116675001266922670826708166523166928001020800002080000267082670811800211091010800001080000005020116112670580000102671326709267092670926709
8002426711207026693025800101080000108000050116675001266922671126709166523166768001020800002080000267082670811800211091010800001080000005020116112670580000102670926709267092671226709
8002426708207026693025800101080000108000050116675001266932670826709166523166918001020800002080000267112670811800211091010800001080000005020116112670880000102670926709267102670926709
800242670820738426693325800101080000108000050116675001268242670826708166573166888018820800002080000269162685811800211091010800001080000005020116112670580000102671326709267092671226709
8002426708207026693025800101080000108000050116675001266902670826708166523166888001020800002080000267112670811800211091010800001080000105020116112670580000102670926709267092670926709