Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMOV (S to W)

Test 1: uops

Code:

  fmov w0, s0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 2.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst integer (97)a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)gpr retires (ef)f5f6f7f8fd
10045384043252000100010001000800005195385383703396100010001000538538111001100000733163353510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100000733163353510001000539539539539539
10045384043252000100010001000800015195385383703396100010001000538538111001100010733163353510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100000733163353510001000539539539539539
10045384043252000100010001000800015195385383703396100010001000538538111001100000733163353510001000539539539539539
10045384043252000100010001000800015195385383703396100010001000538538111001100000733163353510001000539539539539539
10045384043252000100010001000800015195385383703396100010001000538538111001100000733163353510001000539539539539539
10045384043252000100010001000800005195385383703396100010001000538538111001100010733163353510001000539539539539539
10045384043252000100010001000800015195385383703396100010001000538538111001100003733163353510001000539539539539539
10045384043252000100010001000800015195385383703396100010001000538538111001100000733163353510001000539539539539539

Test 2: Latency 1->2 roundtrip

Code:

  fmov w0, s0
  fmov d0, x0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0038

retire (01)cycle (02)030918191e3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acbranch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
2020410003877600001100029895612530100101001000010000100100001000050047784815580349100013100038100038969003974962010020010000100002001000010000100038100038112020110099100101001000010040100000000131012162399637100001000010100100039100039100039100039100039
2020410003877500000100024895612530100101001000010000100100001000050047784815580349100013100038100038969003974992010020010000100002001000010000100038100038112020110099100101001000010000100001000131012162299637100001000010100100039100039100039100039100039
2020410003877500000100056895612530100101001000010000100100001000050047784815580349100013100038100039969003974962010020010000100002001000010000100038100038112020110099100101001000010000100000000131012162299637100001000010100100042100039100039100039100039
2020410003877500000100024895612530100101001000010000100100001000050047784815580349100013100039100038969003974962010020010000100002001000010000100038100038112020110099100101001000010000100000000131012163399637100001000010100100041100039100039100039100039
2020410003877600000100046895612530100101001000010000100100001000062647784815580349100042100038100038969003974972010020010000100002001000010000100038100038112020110099100101001000010000100000000131014172499637100251000010100100040100039100039100039100040
2020410003877500000100370895612530100101001000010000125100001000062647784815580349100013100038100038969003974962010020010000100002001000010000100038100038112020110099100101001000010000100000030131212162399640100001000010100100039100039100039100039100039
2020410003877600000100027895612530100101001000010000100100001000050047784815580349100013100038100038969003974972010020010000100002001000010000100038100038112020110099100101001000010000100000000131013174399637100001000010100100039100039100039100039100039
2020410003877600000100029895612530100101001000010000100100001000050047784815580349100013100038100038969003974962010020010000100002001000010000100038100038112020110099100101001000010000100000001131012162299637100001000010100100039100039100039100039100039
2020410003877500000100045895612530100101001000010000100100001000050047784815580404100013100041100038969003974962010020010000100002001000010000100038100038112020110099100101001000010000100001000131012162299637100001000010100100210100044100119100208100039
2020410003877600000100153895612530100101001000010000100100001000050047784815580349100013100038100038969003974962010020010000100002001000010000100038100038112020110099100101001000010000100000000131012162299637100001000010100100039100039100039100039100039

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0038

retire (01)cycle (02)030b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
20024100039776000216010002389561253001010010100001000010100001000050477848155789541100013100038100038969253975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161299637100001000010010100039100039100039100039100039
20024100038776000546010002389561253001010010100001000010100001000050477848155789540100013100038100038969223975182001020100001000020100661000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100039100039100039100039
20024100038775000453010002389561253001010010100001000010100001000050477848155790620100013100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000100012701161199637100001000010010100039100039100039100039100039
2002410004077500048010002389561253001010010100001000010100001000050477848155789540100013100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161299637100001000010010100039100039100039100039100039
2002410003877500054010002389561253001010010100001000010100001000050477848155789540100013100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000003012701161199696100001000010010100039100039100039100039100039
20024100038775000279010002389561253001010010100001000010100001000050477848155789540100013100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012701161199637100001000010010100039100042100039100039100042
20024100067776000567010002389562253001010010100001000010100001000050477848155789540100015100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000000012702161199637100001000010010100039100039100039100039100040
2002410003877600054010002389565253001010010100001000010100001000050477848156280561100013100038100038969223975182001020100001000020100001000010003810003811200211091010010100001000100000100012701161199637100001000010010100039100039100039100039100039
20024100038776000258010002389561253001010010100001000010100001000050477848155789541100013100038100038969273975182001020100001000020100001000010003810003911200211091010010100001000100000000014211491199637100001000010010100039100039100039100039100039
20024100038775000267010002389561253001010010100001000010100001000050477848155789541100013100038100038969223975182001020100001000020100001000010003910003811200211091010010100001000100000100012701161299637100001000010010100039100039100040100039100039

Test 3: throughput

Count: 8

Code:

  fmov w0, s8
  fmov w1, s8
  fmov w2, s8
  fmov w3, s8
  fmov w4, s8
  fmov w5, s8
  fmov w6, s8
  fmov w7, s8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03090b18191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa6a8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)gpr retires (ef)f5f6f7f8fd
80204400543100000003225160100801008000010080004500640024140019400384003829976629991801042008001620080016400384003811802011009910080100100079301115117016004003580000801004003940039400394003940039
8020440038311000000742516010080100800001008000450064002404001940038400382997662999180104200800162008001640038400381180201100991008010010000601115117016004003580000801004003940039400394003940039
8020440038310000000322516010080100800001008000450064002404001940038400382997662999180104200800162008001640038400381180201100991008010010000301115117016004003580000801004003940039400394003940039
802044003831000001201022516010080100800001008000450064002404001940038400382997662999180104200800162008001640038400381180201100991008010010000001115117016004003580000801004003940039400394003940039
802044003831000001202222516010080100800001008000450064002404001940038400382997662999180104200800162008001640038400381180201100991008010010000301115117016004003580000801004003940039400394003940039
8020440038310000000322516010080100800001008000450064002404001940038400382997662999180104200800162008001640038400381180201100991008010010000001115117016004003580000801004003940039400394003940039
8020440038310000000322516010080100800001008000450064002404001940038400382997662999180104200800162008001640038400381180201100991008010010000228101115117016004003580000801004003940039400394003940039
8020440038310000030742516010080100800001008000450064002404001940038400382997662999180104200800162008001640038400381180201100991008010010000001115117016004003580000801004003940039400394003940039
8020440038311000000322516010080100800001008000450064002404001940038400382997662999180104200800162008001640038400381180201100991008010010000001115117016004003580000801004003940039400394010440039
80204400383100000120322516010080100800001008000450064002404001940038400382997662999180104200800162008001640038400381180201100991008010010000001115117016304003580000801004003940039400394003940039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)0308090b18191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa0a1a7a8a9acc2cfd0d5d6dbddinst fetch restart (de)e0? int output thing (e9)eagpr retires (ef)f5f6f7f8fd
800244004731000000004325160010800108000010800005064000014001940038400382999233001880010208000020800004003840038118002110910800101000000005020011601140035800000800104003940039400394003940039
800244003831000000304325160010800108000010800005064000014001940038400382999233001880010208000020800004003840038118002110910800101000000005020011601140035800000800104003940039400394003940039
800244003831000000004325160010800108000010800005064000014001940038400382999233001880010208000020800004003840038118002110910800101000000005020011602140035800000800104003940039400394003940039
800244003831000000004325160010800108000010800005064000014001940038400382999233001880010208000020800004003840038118002110910800101000000005020011601140035800000800104003940039400394003940039
800244003831000000004325160010800108000010802505064000014001940038400382999233001880010208000020800004003840038118002110910800101000000025020011601140035800000800104003940039402344003940039
800244003831000000004325160010800108000010800005064000014001940038400382999233001880010208000020800004003840038118002110910800101000000005020011601140035800000800104003940039400394003940039
800244003831000000004325160010800108000010800005064000014001940038402332999233001880010208000020800004003840038118002110910800101000000305020011601140035800000800104003940039400394003940039
800244003831000000004325160010800108000010800005064000014001940038400382999233001880010208000020800004003840038118002110910800101000000005020012401140035800000800104003940039400394003940039
800244003831000000004325160010800108000010800005064000014001940038400382999233001880010208000020800004003840038118002110910800101000000170805020011601240357800000800104022940231400394003940236
800244003831200000546043621600108001080000108000050640000140019400384003829992183001880010208000020800004003840038118002110910800101000010005020011601440035800000800104003940039400394003940039