Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMOV (D to X)

Test 1: uops

Code:

  fmov x0, d0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03070a1e3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst integer (97)a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)gpr retires (ef)f5f6f7f8fd
10045384110250252000100010001000800015195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539
100453841112250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800015195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800015195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110271252000100010001000800015195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539
10045384110250252000100010001000800005195385383703396100010001000538538111001100000764164453510001000539539539539539

Test 2: Latency 1->2 roundtrip

Code:

  fmov x0, d0
  fmov d0, x0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0038

retire (01)cycle (02)0308090b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
202041000387750000000010002389568253010010100100001000010010000100005004778481558034901000131000381000389690769749220100200100041000420010004100041000391000381120201100991001010010000100001000000000111131701161199646100001000010100100039100039100039100039100039
202041000387750000000010002389615253010010100100001000010010000100005004778529558034901000131000671000389690969749220100200100041000420010004100041000411000381120201100991001010010000100001000000000111131701161199646100001000010100100039100039100039100039100040
20204100038776000003870010002389603253010010100100001000010010000100005004778481558034901000161000381000389690039749620100200100001000020010000100001000381000381120201100991001010010000100001000000030000131012163299640100001000010100100039100042100039100039100039
202041000387760000000010002389622253010010100100001000010010000100005004778481558034901000131000381000389690139749620100200100001000020010000100001000381000381120201100991001010010000100001000000000000131012162399637100001000010100100039100039100039100039100039
2020410003877500000120010002389632253010010100100001000010010000100005004778481558034901000131000381000399690039749620100200100001000020010000100001000381000381120201100991001010010000100001000000000000131012162299637100001000010100100039100039100039100039100039
202041000387750000000010002389627253010010100100001000010010000100005004778481558034901000131000381000389690039749620100200100001000020010000100001000381000381120201100991001010010000100001000000000000131012162299637100001000010100100039100040100039100123100039
202041000387750000000010002389628253010010100100001000010010000100005004778481558034901000131000381000389690039749720100200100001000020010000100001000381000381120201100991001010010000100001000000000000131012162299637100001000010100100039100039100039100039100039
2020410003877600000120010002389592253010010100100001000410010000100005004778481558034911000131000381000389690039749920100200100001000020010000101271000381001242120201100991001010010000100021000001000000132812162399637100001000010100100039100039100039100039100042
202041000387750000000010002389593253010010100100001000010010000100005004778577558034901000131000381000419690039767120100200100001000020010000100001000381000381120201100991001010010000100001005025013646000002089131644102855101431000010100103578104174103910104049104139
202041038618320103056763784010002389607253010010104100001000410010000100005004778481567901501000151000391000389690839749620100200100001000020010000100001000381000441120201100991001010010000100001000000000000131012162299637100001000010100100073100046100039100042100039

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0038

retire (01)cycle (02)0308090b18191e1f3a3f4f5051inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
200241000387760000000010002389561025300101001010000100001010000100005047784815578954001000130100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100039100039100039
2002410003877500000120010002389561025300101001010000100001010000100005047784815578954011000130100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100040100039100039100039100039
200241000387750000000010002389561025300101001010000100001010000100005047818705606034001000170100038100038969223975182001020100001000020100001000010003810006711200211091010010100001000100000000012701162199640100001000010010100039100042100039100039100039
200241000387750000000010002389561025300101001010000100001010000100005047784815578954011000130100038100040969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100040100039100039
2002410003877500000120010002389561025300101001010000100001010000100005047784815578954011000130100038100038969233975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100039100039100039
2002410004177500000120010002389561025300101001010000100001010000100005047784815578954001000130100038100038969223975182001020100001000020100001000010003810004011200211091010010100001000100000100012701161199637100001000010010100039100041100039100039100039
2002410003877500000180010002389561025300101001010000100001010000100005047784815578954001000130100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100039100040100039
200241000387760000000010002389565025300101001010000100001010000100005047784815578954001000140100039100038969253975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100039100039100204
200241000387750000000010002389561025300101001010000100001010000100005047784815578954001000130100038100038969223975182001020100001000020100001000010003810003911200211091010010100001000100000060012701161299637100001000010010100039100039100039100039100039
200241000387750000000010002389561025300101001010000100001010000100005047784815578954011000130100038100038969223975182001020100001000020100001000010003810003911200211091010010100001000100000000012701161199637100001000010010100039100042100039100039100039

Test 3: throughput

Count: 8

Code:

  fmov x0, d8
  fmov x1, d8
  fmov x2, d8
  fmov x3, d8
  fmov x4, d8
  fmov x5, d8
  fmov x6, d8
  fmov x7, d8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)030b191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa0a1a6a8a9acc2c5branch mispredict (cb)cdcficache miss (d3)d6ddinst fetch restart (de)e0? int output thing (e9)gpr retires (ef)f5f6f7f8fd
8020440051310000032251601008010080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000000001115117016004003580000801004003940039400394003940039
8020440038310000032251601008010080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000000001115117016004003580000801004003940039400394003940039
8020440038310000032251601008052080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000000001115117016004003580000801004003940039400394003940039
8020440038311000032251601008010080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000060301115117016004003580000801004003940039400394003940039
802044003831000120102251601008010080000100800045006400240401260400384003829976062999180104200800162008001640038400381180201100991008010010000040001115117016004003580000801004003940039400394003940039
80204400383100012032251601008010080000100800045006400240400190400384003829976062999180104200800162008001640038400381180201100991008010010000020001115117016004003580000801004003940039400394003940039
802044003831100120118621602688010080000100800045006400240400190401054003829976062999180104200800162008001640038401042180201100991008010010000000001115117016004003580000801004003940039400394010840238
80204400383120002648546216010080100800001008000450064002404001934023640038299760630177801042008001620080016400384003841802011009910080100100000400001115117048004003580000801004003940234400394003940039
80204400383100000742516010080348800001008000450064002404001904003840174299760629991803582008001620080016400384003811802011009910080100100020340001115134016004003580000801004003940039400394030740039
802044024032213624074821601008010080082100800045006400240400190400384003829976063012980104200800162008001640038400381180201100991008010010000030301115117016004019580000801004033240039402414003940039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa1a6a8acc2cfd5d6daddinst fetch restart (de)e0? int output thing (e9)gpr retires (ef)f5f6f7f8fd
8002440039310064251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100003050207160434003580000800104003940039400394003940039
8002440038310043251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050203160454003580000800104003940039400394003940039
80024400383110113251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100003050204160434003580000800104003940039400394003940039
8002440038310043251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050204160244003580000800104003940039400394003940039
8002440038310043251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050202160424003580000800104003940039400394003940039
80024400383100723251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050204165634003580000800104003940039400394003940039
8002440038310043251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050204160424003580000800104003940039400394003940039
8002440038310043251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100010050202160444003580000800104003940039400394003940039
800244003831112218251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050202160644003580000800104003940039400394003940039
80024400383110708251600108001080000108000050640000400194003840038299923300188001020800002080000400384003811800211091080010100000050204160364003580000800104003940039400394003940039