Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

FMOV (D to X)

Test 1: uops

Code:

  fmov x0, d0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)1e3a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10045384110250252000100010001000800015195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539
100453841112250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800015195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800015195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110271252000100010001000800015195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539

Test 2: Latency 1->2 roundtrip

Code:

  fmov x0, d0
  fmov d0, x0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0038

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
202041000387750000000010002389568253010010100100001000010010000100005004778481558034901000131000381000389690769749220100200100041000420010004100041000391000381120201100991001010010000100001000000000111131701161199646100001000010100100039100039100039100039100039
202041000387750000000010002389615253010010100100001000010010000100005004778529558034901000131000671000389690969749220100200100041000420010004100041000411000381120201100991001010010000100001000000000111131701161199646100001000010100100039100039100039100039100040
20204100038776000003870010002389603253010010100100001000010010000100005004778481558034901000161000381000389690039749620100200100001000020010000100001000381000381120201100991001010010000100001000000030000131012163299640100001000010100100039100042100039100039100039
202041000387760000000010002389622253010010100100001000010010000100005004778481558034901000131000381000389690139749620100200100001000020010000100001000381000381120201100991001010010000100001000000000000131012162399637100001000010100100039100039100039100039100039
2020410003877500000120010002389632253010010100100001000010010000100005004778481558034901000131000381000399690039749620100200100001000020010000100001000381000381120201100991001010010000100001000000000000131012162299637100001000010100100039100039100039100039100039
202041000387750000000010002389627253010010100100001000010010000100005004778481558034901000131000381000389690039749620100200100001000020010000100001000381000381120201100991001010010000100001000000000000131012162299637100001000010100100039100040100039100123100039
202041000387750000000010002389628253010010100100001000010010000100005004778481558034901000131000381000389690039749720100200100001000020010000100001000381000381120201100991001010010000100001000000000000131012162299637100001000010100100039100039100039100039100039
2020410003877600000120010002389592253010010100100001000410010000100005004778481558034911000131000381000389690039749920100200100001000020010000101271000381001242120201100991001010010000100021000001000000132812162399637100001000010100100039100039100039100039100042
202041000387750000000010002389593253010010100100001000010010000100005004778577558034901000131000381000419690039767120100200100001000020010000100001000381000381120201100991001010010000100001005025013646000002089131644102855101431000010100103578104174103910104049104139
202041038618320103056763784010002389607253010010104100001000410010000100005004778481567901501000151000391000389690839749620100200100001000020010000100001000381000441120201100991001010010000100001000000000000131012162299637100001000010100100073100046100039100042100039

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0038

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3a3f4f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acc2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200241000387760000000010002389561025300101001010000100001010000100005047784815578954001000130100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100039100039100039
2002410003877500000120010002389561025300101001010000100001010000100005047784815578954011000130100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100040100039100039100039100039
200241000387750000000010002389561025300101001010000100001010000100005047818705606034001000170100038100038969223975182001020100001000020100001000010003810006711200211091010010100001000100000000012701162199640100001000010010100039100042100039100039100039
200241000387750000000010002389561025300101001010000100001010000100005047784815578954011000130100038100040969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100040100039100039
2002410003877500000120010002389561025300101001010000100001010000100005047784815578954011000130100038100038969233975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100039100039100039
2002410004177500000120010002389561025300101001010000100001010000100005047784815578954001000130100038100038969223975182001020100001000020100001000010003810004011200211091010010100001000100000100012701161199637100001000010010100039100041100039100039100039
2002410003877500000180010002389561025300101001010000100001010000100005047784815578954001000130100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100039100040100039
200241000387760000000010002389565025300101001010000100001010000100005047784815578954001000140100039100038969253975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100039100039100204
200241000387750000000010002389561025300101001010000100001010000100005047784815578954001000130100038100038969223975182001020100001000020100001000010003810003911200211091010010100001000100000060012701161299637100001000010010100039100039100039100039100039
200241000387750000000010002389561025300101001010000100001010000100005047784815578954011000130100038100038969223975182001020100001000020100001000010003810003911200211091010010100001000100000000012701161199637100001000010010100039100042100039100039100039

Test 3: throughput

Count: 8

Code:

  fmov x0, d8
  fmov x1, d8
  fmov x2, d8
  fmov x3, d8
  fmov x4, d8
  fmov x5, d8
  fmov x6, d8
  fmov x7, d8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l2 tlb miss data (0b)191e1f3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfl1i cache miss demand (d3)map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
8020440051310000032251601008010080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000000001115117016004003580000801004003940039400394003940039
8020440038310000032251601008010080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000000001115117016004003580000801004003940039400394003940039
8020440038310000032251601008052080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000000001115117016004003580000801004003940039400394003940039
8020440038311000032251601008010080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000060301115117016004003580000801004003940039400394003940039
802044003831000120102251601008010080000100800045006400240401260400384003829976062999180104200800162008001640038400381180201100991008010010000040001115117016004003580000801004003940039400394003940039
80204400383100012032251601008010080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000020001115117016004003580000801004003940039400394003940039
802044003831100120118621602688010080000100800045006400240400190401054003829976062999180104200800162008001640038401042180201100991008010010000000001115117016004003580000801004003940039400394010840238
80204400383120002648546216010080100800001008000450064002404001934023640038299760630177801042008001620080016400384003841802011009910080100100000400001115117048004003580000801004003940234400394003940039
80204400383100000742516010080348800001008000450064002404001904003840174299760629991803582008001620080016400384003811802011009910080100100020340001115134016004003580000801004003940039400394030740039
802044024032213624074821601008010080082100800045006400240400190400384003829976063012980104200800162008001640038400381180201100991008010010000030301115117016004019580000801004033240039402414003940039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acc2cfd5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
8002440039310064251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100003050207160434003580000800104003940039400394003940039
8002440038310043251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050203160454003580000800104003940039400394003940039
80024400383110113251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100003050204160434003580000800104003940039400394003940039
8002440038310043251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050204160244003580000800104003940039400394003940039
8002440038310043251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050202160424003580000800104003940039400394003940039
80024400383100723251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050204165634003580000800104003940039400394003940039
8002440038310043251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050204160424003580000800104003940039400394003940039
8002440038310043251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100010050202160444003580000800104003940039400394003940039
800244003831112218251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050202160644003580000800104003940039400394003940039
80024400383110708251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050204160364003580000800104003940039400394003940039