Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

FMOV (S from W)

Test 1: uops

Code:

  fmov s0, w0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)ld unit uop (a6)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
1004374303590251000100010001406003493743741973232100010001000374374111001100010000073116113711000381375376375375
1004374203590251000100010001406013493743742413232100010001000374374111001100010000073116113711000379375375375375
1004374303590251000100010001406003493743741973232100010001000374374111001100010000073116113711000375375375375375
1004374203590251000100010001406013493743741973232100010001000374374111001100010001373116113711000375375375375375
1004374303590251000100010001406013493743741973232100010001000374374111001100010000073116113711000375375378375375
1004377203590251000100010001441803493743741973232100010001000374374111001100010000073116113711000375375375375375
1004374303590251000100010001406003493743741973232100010001000374374111001100010000073116113711000375375375375375
1004374303590251000100010001406014393773741973232100010001000374374111001100010000073116113711000375375375375375
1004374303590251000100010001406003493743741973232100010001000374374111001100010007373116113711000375375375375375
1004374303590251000100010001406003493743741973232100010001000374374111001100010000073116113711000375375375375375

Test 2: Latency 1->2 roundtrip

Code:

  fmov s0, w0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0032

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)7amap int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2020410003277500000990100020895552530100101001000010000100100001000050047781345679930110001310003210003296900697486201000200100041000420010004100041000321000321120201100991001010010000100010000100111131702161199640100001000010100100035100033100033100033100033
202041000327750000000100017895552530100101001000010000100100001000050047781345679988110001310003210003296900697486201000200100041000420010004100041000341000321120201100991001010010000100010000100111131701161199640100001000010100100033100033100033100033100033
202041000327750000000100017895552530100101001000010000100100001000050047782305679930110001610003210003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000006000131011162299631100001000010100100034100033100033100033100033
202041000327760000000100019895552530100101001000010000100100001000050047781345679930110001310003210003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000000000131012162299631100001000010100100070100033100033100033100036
2020410003377500000088100017895552530100101001000010000100100001000050047781345679930010001310003210003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000000000131012162299631100001000010100100033100033100033100033100033
202041000327750000000100017895552530100101001000010000100100001000050047781345679988110001310003210003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000000000131012162299631100001000010100100033100033100033100033100037
202041000327750000000100017895552530100101001000010000100100001000050047781345679930110001310003210003396895397490201000200100001000020010000100001000341000351120201100991001010010000100010000100000131012162299631100001000010100100033100033100035100033100033
2020410003277500000120100017895552530100101001000010000100100001005050047781345679930010001310003210003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000000000131012162299631100001000010100100033100033100033100033100033
2020410003277500000120100017895552530100101001000010000100100001000050047781345679930110001310003210003296893397490202150200100001000020010000100001000351000321120201100991001010010000100010000100000131012162299711100171000010100100033100033100033100033100033
20204100032775000000108100017895552530100101001000010000100100001000050047781345679930010001310003310003296893397490201000200100001000020010000100001000321000321120201100991001010010000100010000003000131012162299631100001000010100100033100033100033100033100033

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0032

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200241000327750000000100017895572530010100101000010000101000010000504778134567853510001310003210003296915397512200102010000100002010183100001000351000321120021109101001010000100100089000012701161199631100001000010010100082100074100033100033100033
200241000327750000000100017895552530010100101000010000101000010000504778134567853510001410003210003296915397512200102010000100002010000100001000321000321120021109101001010000100100000000012701161199631100001000010010100048100034100037100033100033
200241000327760000000100017895552530010100101000010000101000010000504778134567853510001310003310003296915397512200102010000100002010000100001000321000341120021109101001010000100100001000012701162199631100051000010010100064100033100033100033100033
200241000337750000000100017895552530010100101000010000101000010000504778182567853510001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000100100000000012701161199631100001000010010100034100034100033100033100033
200241000327760000000100019895552530010100101000010000101000010000504778134567853510001410003210003296915397512200102010000100002010000100001000321000321120021109101001010000100100000000012701161199634100091000010010100403100365100390100391100388
200241003807781044528352010036789556253001010010100031000811104001020066478947456053211002671003761003719711417977662044520102411030120103041024110037610041261200211091010010100001041001002135950012701161299634100001000010010100105100039100033100033100033
200241000387750000000100017895572530010100101000010000101000010000504778134567853510013510003210003296915397517202252010000100002010000100001000321000321120021109101001010000100100000000012701321199631100071000010010100033100033100033100033100033
200241000327760000000100017895562530010100161000010000101000010000504778134567853510001310003310020996915397512200102010000100002010000100601000321000321120021109101001010000100100000000012701161199631100001000010010100033100033100033100033100033
2002410003277500001200100017895552530010100101000010000101000010100504778134567853510001310003210003296917397512200102010000100002010000100001000321000321120021109101001010000100100001000012701251199631100001000010010100033100034100033100033100036
20024100032776000000010001789555253001010010100001000010100001000050477813456785351000131000321000329691539762920010201000010000201000010000100032100035212002110910100101000010010000101530012701161199631100001000010010100033100033100033100033100037

Test 3: throughput

Count: 8

Code:

  fmov s0, w8
  fmov s1, w8
  fmov s2, w8
  fmov s3, w8
  fmov s4, w8
  fmov s5, w8
  fmov s6, w8
  fmov s7, w8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3338

retire uop (01)cycle (02)03mmu table walk data (08)191e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)a5ld unit uop (a6)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80204267072070042602669202580100100800001008001550011640381266822670726707166396166648011520080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826712267082670826708
8020426707207001802669202580100100800001008001550011665961266862670726707166356166608011520080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826708267082670826708
80204267072070018902669202580100100800001008001550011665961266822670726707166356167518011520080024200800242670726711118020110099100100800001000008000000011151180160026704800001002670826708267082670826708
80204267072070036902669202580100100800001008001550011665961266822670726707166356166598011420080024200800242670726707118020110099100100800001000008000000011151180160126704800001002670826708267082670826708
80204267072070036002669202580100100800001008001450011665961266862670726707166396167778011620080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826708267082670826708
80204267072070010202669202580100100800001008001550011665961266822670726710166356166598011520080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826708267082670826708
80204267072070038102669202580100100800001008001550011665961266872670726707166356167658011420080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826712267082670826708
80204267072070037502669202580100100800001008001550011665961266822670726707166356168088011520080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826708267082670826708
80204267112070028502669202580100100800001008001450011665960266822670726711166356167448011520080024200800242670726707118020110099100100800001000008000010011151180160026704800001002671226708267082670826708
80204267072070036902669202580100100800001008001550011665961266822670726707166356167788011520080024200800242670726707118020110099100100800001000008000000011151180160026704800001002670826708267082670826708

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire uop (01)cycle (02)03181e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fld unit uop (a6)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800242670820700026693025800101080000108000050116675001266942670826712166523166888001020800002080000267082670811800211091010800001080000005020141611122670580000102670926709267092670926709
80024267082070002669302580010108000010800005011667500126694267122670816652316688800102080000208000026708267081180021109101080000108000000502010161092670580000102670926709267092670926712
800242670820700026693025800101080000108000050116675001266832670826708166523166888001020800002080000267082670811800211091010800001080000035020111611122670580000102670926709267092670926709
800242670820600026693025800101080000108000050116675001266832670826708166523166888001020800002080000267112670811800211091010800001080000135020101612102670580000102670926709267092670926709
800242670820700026693025800101080000108000050116675001266832670826708166523166928001020800002080000267082670811800211091010800001080000005020101611112670580000102670926709267092671226709
800242670820701202669302580010108000010800005011667500126683267082670816652316688800102080000208000026708267081180021109101080000108000000502010161182670580000102670926709267092670926709
800242670820700026693025800101080000108000050116675001266832670826708166523166888001020800002080000267082670811800211091010800001080000005020121611122670580000102670926709267092670926709
80024267082070002669302580010108000010800005011667500126683267082670816652316688800102080000208000026708267081180021109101080000108000000502011161082670580000102670926709267092670926709
80024267082070002669302580010108000010800005011667500126683267082670816652316688800102080000208000026708267111180021109101080000108000000502011161172670580000102670926709267092670926709
80024267112070002669302580010108000010800005011667500126762267212670816654316688800102080000208000026708267111180021109101080000108000010502013167142670580000102670926712267092671326709