Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMOV (S from W)

Test 1: uops

Code:

  fmov s0, w0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
1004374303590251000100010001406003493743741973232100010001000374374111001100010000073116113711000381375376375375
1004374203590251000100010001406013493743742413232100010001000374374111001100010000073116113711000379375375375375
1004374303590251000100010001406003493743741973232100010001000374374111001100010000073116113711000375375375375375
1004374203590251000100010001406013493743741973232100010001000374374111001100010001373116113711000375375375375375
1004374303590251000100010001406013493743741973232100010001000374374111001100010000073116113711000375375378375375
1004377203590251000100010001441803493743741973232100010001000374374111001100010000073116113711000375375375375375
1004374303590251000100010001406003493743741973232100010001000374374111001100010000073116113711000375375375375375
1004374303590251000100010001406014393773741973232100010001000374374111001100010000073116113711000375375375375375
1004374303590251000100010001406003493743741973232100010001000374374111001100010007373116113711000375375375375375
1004374303590251000100010001406003493743741973232100010001000374374111001100010000073116113711000375375375375375

Test 2: Latency 1->2 roundtrip

Code:

  fmov s0, w0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0032

retire (01)cycle (02)0308090b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)7a~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8a9acc5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
2020410003277500000990100020895552530100101001000010000100100001000050047781345679930110001310003210003296900697486201000200100041000420010004100041000321000321120201100991001010010000100010000100111131702161199640100001000010100100035100033100033100033100033
202041000327750000000100017895552530100101001000010000100100001000050047781345679988110001310003210003296900697486201000200100041000420010004100041000341000321120201100991001010010000100010000100111131701161199640100001000010100100033100033100033100033100033
202041000327750000000100017895552530100101001000010000100100001000050047782305679930110001610003210003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000006000131011162299631100001000010100100034100033100033100033100033
202041000327760000000100019895552530100101001000010000100100001000050047781345679930110001310003210003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000000000131012162299631100001000010100100070100033100033100033100036
2020410003377500000088100017895552530100101001000010000100100001000050047781345679930010001310003210003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000000000131012162299631100001000010100100033100033100033100033100033
202041000327750000000100017895552530100101001000010000100100001000050047781345679988110001310003210003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000000000131012162299631100001000010100100033100033100033100033100037
202041000327750000000100017895552530100101001000010000100100001000050047781345679930110001310003210003396895397490201000200100001000020010000100001000341000351120201100991001010010000100010000100000131012162299631100001000010100100033100033100035100033100033
2020410003277500000120100017895552530100101001000010000100100001005050047781345679930010001310003210003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000000000131012162299631100001000010100100033100033100033100033100033
2020410003277500000120100017895552530100101001000010000100100001000050047781345679930110001310003210003296893397490202150200100001000020010000100001000351000321120201100991001010010000100010000100000131012162299711100171000010100100033100033100033100033100033
20204100032775000000108100017895552530100101001000010000100100001000050047781345679930010001310003310003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000003000131012162299631100001000010100100033100033100033100033100033

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0032

retire (01)cycle (02)03080b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
200241000327750000000100017895572530010100101000010000101000010000504778134567853510001310003210003296915397512200102010000100002010183100001000351000321120021109101001010000100100089000012701161199631100001000010010100082100074100033100033100033
200241000327750000000100017895552530010100101000010000101000010000504778134567853510001410003210003296915397512200102010000100002010000100001000321000321120021109101001010000100100000000012701161199631100001000010010100048100034100037100033100033
200241000327760000000100017895552530010100101000010000101000010000504778134567853510001310003310003296915397512200102010000100002010000100001000321000341120021109101001010000100100001000012701162199631100051000010010100064100033100033100033100033
200241000337750000000100017895552530010100101000010000101000010000504778182567853510001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000100100000000012701161199631100001000010010100034100034100033100033100033
200241000327760000000100019895552530010100101000010000101000010000504778134567853510001410003210003296915397512200102010000100002010000100001000321000321120021109101001010000100100000000012701161199634100091000010010100403100365100390100391100388
200241003807781044528352010036789556253001010010100031000811104001020066478947456053211002671003761003719711417977662044520102411030120103041024110037610041261200211091010010100001041001002135950012701161299634100001000010010100105100039100033100033100033
200241000387750000000100017895572530010100101000010000101000010000504778134567853510013510003210003296915397517202252010000100002010000100001000321000321120021109101001010000100100000000012701321199631100071000010010100033100033100033100033100033
200241000327760000000100017895562530010100161000010000101000010000504778134567853510001310003310020996915397512200102010000100002010000100601000321000321120021109101001010000100100000000012701161199631100001000010010100033100033100033100033100033
2002410003277500001200100017895552530010100101000010000101000010100504778134567853510001310003210003296917397512200102010000100002010000100001000321000321120021109101001010000100100001000012701251199631100001000010010100033100034100033100033100036
20024100032776000000010001789555253001010010100001000010100001000050477813456785351000131000321000329691539762920010201000010000201000010000100032100035212002110910100101000010010000101530012701161199631100001000010010100033100033100033100033100037

Test 3: throughput

Count: 8

Code:

  fmov s0, w8
  fmov s1, w8
  fmov s2, w8
  fmov s3, w8
  fmov s4, w8
  fmov s5, w8
  fmov s6, w8
  fmov s7, w8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)0308191e1f3f4f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80204267072070042602669202580100100800001008001550011640381266822670726707166396166648011520080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826712267082670826708
8020426707207001802669202580100100800001008001550011665961266862670726707166356166608011520080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826708267082670826708
80204267072070018902669202580100100800001008001550011665961266822670726707166356167518011520080024200800242670726711118020110099100100800001000008000000011151180160026704800001002670826708267082670826708
80204267072070036902669202580100100800001008001550011665961266822670726707166356166598011420080024200800242670726707118020110099100100800001000008000000011151180160126704800001002670826708267082670826708
80204267072070036002669202580100100800001008001450011665961266862670726707166396167778011620080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826708267082670826708
80204267072070010202669202580100100800001008001550011665961266822670726710166356166598011520080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826708267082670826708
80204267072070038102669202580100100800001008001550011665961266872670726707166356167658011420080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826712267082670826708
80204267072070037502669202580100100800001008001550011665961266822670726707166356168088011520080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826708267082670826708
80204267112070028502669202580100100800001008001450011665960266822670726711166356167448011520080024200800242670726707118020110099100100800001000008000010011151180160026704800001002671226708267082670826708
80204267072070036902669202580100100800001008001550011665961266822670726707166356167788011520080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826708267082670826708

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)03181e1f3f4f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
800242670820700026693025800101080000108000050116675001266942670826712166523166888001020800002080000267082670811800211091010800001080000005020141611122670580000102670926709267092670926709
80024267082070002669302580010108000010800005011667500126694267122670816652316688800102080000208000026708267081180021109101080000108000000502010161092670580000102670926709267092670926712
800242670820700026693025800101080000108000050116675001266832670826708166523166888001020800002080000267082670811800211091010800001080000035020111611122670580000102670926709267092670926709
800242670820600026693025800101080000108000050116675001266832670826708166523166888001020800002080000267112670811800211091010800001080000135020101612102670580000102670926709267092670926709
800242670820700026693025800101080000108000050116675001266832670826708166523166928001020800002080000267082670811800211091010800001080000005020101611112670580000102670926709267092671226709
800242670820701202669302580010108000010800005011667500126683267082670816652316688800102080000208000026708267081180021109101080000108000000502010161182670580000102670926709267092670926709
800242670820700026693025800101080000108000050116675001266832670826708166523166888001020800002080000267082670811800211091010800001080000005020121611122670580000102670926709267092670926709
80024267082070002669302580010108000010800005011667500126683267082670816652316688800102080000208000026708267081180021109101080000108000000502011161082670580000102670926709267092670926709
80024267082070002669302580010108000010800005011667500126683267082670816652316688800102080000208000026708267111180021109101080000108000000502011161172670580000102670926709267092670926709
80024267112070002669302580010108000010800005011667500126762267212670816654316688800102080000208000026708267111180021109101080000108000010502013167142670580000102670926712267092671326709