Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMOV (D from X)

Test 1: uops

Code:

  fmov d0, x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)033f4f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)f5f6f7f8fd
10043743359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375375
10043743359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375387
10043743359025100010001000148333493743741973232100010001000374374111001100010002073116113711000375375375375375
10043742359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375375
10043742359025100010001000142003493743741973232100010001000374374111001100010000073116113711000375375375375375
10043743359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375375
10043743359025100010001000140603493743841973232100010001000374374111001100010000073116113711000375375375375375
10043743359025100010001000140603493823741973232100010001000374374111001100010000073116113711000378375375375375
10043743359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375375
10043742359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375375

Test 2: Latency 1->2 roundtrip

Code:

  fmov d0, x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0032

retire (01)cycle (02)030818191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2branch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
2020410003577600000010001789555253010010100100001000010010000100005004778134567993001000641000321000329689339749020100200100001000020010000100001000321000331120201100991001010010000100001000000000131012162299631100001000010100100033100033100033100033100033
2020410003277500000010001789555253010010100100001000010010000100005004778134567993001000131000321000329690439749020100200100001000020010000100001000321000321120201100991001010010000100001000000300131012162299632100001000010100100033100033100033100033100033
2020410003277500000010001789555253010010100100001000010010000100005004778134567993001000761000321000329689339749020100200100001000020010000100001000351000321120201100991001010010000100001000000000131012163299631100001000010100100033100033100033100033100033
2020410003277500000010001789555253010010100100001000010010000100005004778134567993001000131000321000329689339749020100200100001000020010000100001000321000321120201100991001010010000100001000000300131012162299634100001000010100100033100033100036100033100033
2020410003277500000010001789555253010010100100001000010010000100005004778134567993001000871000721000359689339749020100200100001000020010000100001000321000321120201100991001010010000100001000000000131012162299632100001000010100100033100033100033100033100033
2020410003477500000010001789555253010010100100001000010010000100005004778134567993001000731000321000329689339749020100200100001000020010000100001000321000321120201100991001010010000100001000010000131012162299631100001000010100100033100033100033100033100033
2020410003277600000010002089555253010010100100001000010010000100005004778134567993001000851000321000329689339749020100200100001000020010000100001000321000321120201100991001010010000100001000010000131012162399631100001000010100100033100033100033100033100033
2020410003277600000010001789555253010010100100001000010010000100005004778134567993001000131000321000329689339749020100200100001000020010000100001000321000351120201100991001010010000100001000000000131012162299631100001000010100100033100033100033100033100033
2020410003277500000010027889555253010010100100001000010010000100005004778182568279401000871000941001219689339755720344200100001000020010000100001000321000321120201100991001010010000100001000004300131012162299631100001000010100100033100033100033100033100033
2020410003277500000010001789555253010010100100001000110010000100005004778134567993001000651000331000329689339749020100200100001000020010000100001000321000321120201100991001010010000100001000000000131012162299633100001000010100100033100033100033100033100033

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0032

retire (01)cycle (02)03090b191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a7a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
200241000457750000010001789555253001010010100001000010100001000050477813456785351100013100032100032969153975122001020100001000020100001000010003210003211200211091010010100001010000010121270116219963110000001000010010100033100033100033100033100033
20024100032775000001000178955525300101001010000100001010000100005047781345678535010001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000101000606001270216229963110000001000010010100033100033100033100033100036
200241000327750001201000178955525300101001010000100001010000100005047781345678591010001610003210003296915397512200102010060100002010000100001000321001251120021109101001010000101000006031270216339963110000001000010010100034100033100033100033100033
20024100032776000001000178955525300101001010000100001010000100005047781345678702110001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000101000002001270316439963110000001000010010100033100033100033100033100038
200241000327750001201000208955525300101001010000100001010000100005047782785678535010001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000101000006001270316319963110000011000010010100033100033100033100033100042
20024100032776000001000178955525300101001010000100001010000100005047781345678535110001310003210003296915397512200102010000100002010000100661000841000321120021109101001010000101000000001270316329963110000001000010010100033100033100033100033100033
200241000327750101201000178955525300101001010000100001010000100005047781345678535110001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000101000002034911270416439963210000001000010010100033100033100033100033100033
20024100032775000001000178955525300101001010000100001210059100005047782785678535010001310003210003296915397512200102010121100002010000100001000321000321120021109101001010000101000002031270816349963110000001000010010100034100034100033100033100039
20024100032776000001000178955525300101001010000100001010000100005047781345678535010001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000101000002001270216419963110000001000010010100033100035100033100036100080
200241000327750000010001789555253001010010100001000010100001000050477813456785350100013100061100032969153975122001020100001000020100001000010003210003411200211091010010100001010006076034781288316339963110000001000010010100033100033100033100033100048

Test 3: throughput

Count: 8

Code:

  fmov d0, x8
  fmov d1, x8
  fmov d2, x8
  fmov d3, x8
  fmov d4, x8
  fmov d5, x8
  fmov d6, x8
  fmov d7, x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)03080b1e1f3f4f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa5a6a8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020426707207000026697025801001008000010080015500116659612668226707267071663561665980115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267132670826708
8020426707207000026695025801001008000010080015500116659612668226711267071663561666380115200800242008002426711267071180201100991001008000010008000003011151180160026704800001002670826708267082670826708
8020426707207000026696025801001008000010080015500116659612668226707267071663561665980115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
8020426723208000026694025801001008000010080015500116659612668226707267071663961665980115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
8020426707207000026704025801001008000010080015500116659612668626707267071663561665980115200800242008002426707267071180201100991001008000010008000000011151181160026704800001002670826708267082670826708
8020426707207000026692225801001008000010080015500116659612668226711267071663561665980115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
8020426707207000026692025801001008000010080015500116659612668226707267071663561665980115200800242008002426707267071180201100991001008000010008000003011151180160026704800001002671326708267082670826708
8020426707207000026698025801001008000010080016500116659612668226707267071663961665980114200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
8020426707207000026816025801001008000010080015500116659612668626707267071663561665980115200800242008002426712267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
8020426707207000026692025801001008000010080015500116659612668226707267071663561665980115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708271582670826852

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)03181e1f3f4f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a5a6a8a9accfd2icache miss (d3)d5d6daddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
800242670820700026693025800101080000108000050116675001266832670826708166523166888001020800002080000267192670811800211091010800001000800000005020001160112670580000102670926709267122670926709
800242670820700026693025800101080000108000050116675000266832671126711166523166888001020800002080000267082671211800211091010800001000800000005020001160112670880000102671226712267092670926709
800242670820700026693025800101080000108000050116675000266832670826708166523166888001020800002080000267082670811800211091010800001000800000005020001160112670880000102670926709267112670926709
800242670820700026693025800101080000108000050116675000266832670826708166523166888001020800002080000267082671111800211091010800001000800000035020001160112670580000102670926709267092670926709
800242671120700026693025800101080000108000050116675000266832670826708166523166888001020800002080000267082670811800211091010800001000800000005020101160112670580000102670926712267092670926712
800242670820700026693225800101080000108000050116599401266832670826708166523166888001020800002080000267082670811800211091010800001000800000005020001160112670580000102671226709267092670926709
800242671120700026693025800101080000108000050116675011266832670826708166523166888001020800002080000267082670811800211091010800001000800000005020001160112670580000102670926709267092670926709
800242670820700026693025800101080000108000050116675000266832670826708166523166888001020800002080000267082670811800211091010800001000800000005020001160112670580000102670926709267092670926709
800242670820700026693025800101080000108000050116675000266832670826708166523166888001020801932080000267092671511800211091010800001000800000005020001160112670580000102670926709267092670926709
800242670820700026693025800101080000108000050116675011266832670826708166523166888001020800002080000267082671111800211091010800001000800000005020001160112670880000102670926709267092671326709