Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

FMOV (D from X)

Test 1: uops

Code:

  fmov d0, x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)033f4f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)ld unit uop (a6)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)f5f6f7f8fd
10043743359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375375
10043743359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375387
10043743359025100010001000148333493743741973232100010001000374374111001100010002073116113711000375375375375375
10043742359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375375
10043742359025100010001000142003493743741973232100010001000374374111001100010000073116113711000375375375375375
10043743359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375375
10043743359025100010001000140603493743841973232100010001000374374111001100010000073116113711000375375375375375
10043743359025100010001000140603493823741973232100010001000374374111001100010000073116113711000378375375375375
10043743359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375375
10043742359025100010001000140603493743741973232100010001000374374111001100010000073116113711000375375375375375

Test 2: Latency 1->2 roundtrip

Code:

  fmov d0, x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 10.0032

retire uop (01)cycle (02)03mmu table walk data (08)18191e1f3a3f4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
2020410003577600000010001789555253010010100100001000010010000100005004778134567993001000641000321000329689339749020100200100001000020010000100001000321000331120201100991001010010000100001000000000131012162299631100001000010100100033100033100033100033100033
2020410003277500000010001789555253010010100100001000010010000100005004778134567993001000131000321000329690439749020100200100001000020010000100001000321000321120201100991001010010000100001000000300131012162299632100001000010100100033100033100033100033100033
2020410003277500000010001789555253010010100100001000010010000100005004778134567993001000761000321000329689339749020100200100001000020010000100001000351000321120201100991001010010000100001000000000131012163299631100001000010100100033100033100033100033100033
2020410003277500000010001789555253010010100100001000010010000100005004778134567993001000131000321000329689339749020100200100001000020010000100001000321000321120201100991001010010000100001000000300131012162299634100001000010100100033100033100036100033100033
2020410003277500000010001789555253010010100100001000010010000100005004778134567993001000871000721000359689339749020100200100001000020010000100001000321000321120201100991001010010000100001000000000131012162299632100001000010100100033100033100033100033100033
2020410003477500000010001789555253010010100100001000010010000100005004778134567993001000731000321000329689339749020100200100001000020010000100001000321000321120201100991001010010000100001000010000131012162299631100001000010100100033100033100033100033100033
2020410003277600000010002089555253010010100100001000010010000100005004778134567993001000851000321000329689339749020100200100001000020010000100001000321000321120201100991001010010000100001000010000131012162399631100001000010100100033100033100033100033100033
2020410003277600000010001789555253010010100100001000010010000100005004778134567993001000131000321000329689339749020100200100001000020010000100001000321000351120201100991001010010000100001000000000131012162299631100001000010100100033100033100033100033100033
2020410003277500000010027889555253010010100100001000010010000100005004778182568279401000871000941001219689339755720344200100001000020010000100001000321000321120201100991001010010000100001000004300131012162299631100001000010100100033100033100033100033100033
2020410003277500000010001789555253010010100100001000110010000100005004778134567993001000651000331000329689339749020100200100001000020010000100001000321000321120201100991001010010000100001000000000131012162299633100001000010100100033100033100033100033100033

1000 unrolls and 10 iterations

Result (median cycles for code): 10.0032

retire uop (01)cycle (02)0309l2 tlb miss data (0b)191e1f3f4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
200241000457750000010001789555253001010010100001000010100001000050477813456785351100013100032100032969153975122001020100001000020100001000010003210003211200211091010010100001010000010121270116219963110000001000010010100033100033100033100033100033
20024100032775000001000178955525300101001010000100001010000100005047781345678535010001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000101000606001270216229963110000001000010010100033100033100033100033100036
200241000327750001201000178955525300101001010000100001010000100005047781345678591010001610003210003296915397512200102010060100002010000100001000321001251120021109101001010000101000006031270216339963110000001000010010100034100033100033100033100033
20024100032776000001000178955525300101001010000100001010000100005047781345678702110001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000101000002001270316439963110000001000010010100033100033100033100033100038
200241000327750001201000208955525300101001010000100001010000100005047782785678535010001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000101000006001270316319963110000011000010010100033100033100033100033100042
20024100032776000001000178955525300101001010000100001010000100005047781345678535110001310003210003296915397512200102010000100002010000100661000841000321120021109101001010000101000000001270316329963110000001000010010100033100033100033100033100033
200241000327750101201000178955525300101001010000100001010000100005047781345678535110001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000101000002034911270416439963210000001000010010100033100033100033100033100033
20024100032775000001000178955525300101001010000100001210059100005047782785678535010001310003210003296915397512200102010121100002010000100001000321000321120021109101001010000101000002031270816349963110000001000010010100034100034100033100033100039
20024100032776000001000178955525300101001010000100001010000100005047781345678535010001310003210003296915397512200102010000100002010000100001000321000321120021109101001010000101000002001270216419963110000001000010010100033100035100033100036100080
200241000327750000010001789555253001010010100001000010100001000050477813456785350100013100061100032969153975122001020100001000020100001000010003210003411200211091010010100001010006076034781288316339963110000001000010010100033100033100033100033100048

Test 3: throughput

Count: 8

Code:

  fmov d0, x8
  fmov d1, x8
  fmov d2, x8
  fmov d3, x8
  fmov d4, x8
  fmov d5, x8
  fmov d6, x8
  fmov d7, x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3338

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)1e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fa5ld unit uop (a6)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020426707207000026697025801001008000010080015500116659612668226707267071663561665980115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267132670826708
8020426707207000026695025801001008000010080015500116659612668226711267071663561666380115200800242008002426711267071180201100991001008000010008000003011151180160026704800001002670826708267082670826708
8020426707207000026696025801001008000010080015500116659612668226707267071663561665980115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
8020426723208000026694025801001008000010080015500116659612668226707267071663961665980115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
8020426707207000026704025801001008000010080015500116659612668626707267071663561665980115200800242008002426707267071180201100991001008000010008000000011151181160026704800001002670826708267082670826708
8020426707207000026692225801001008000010080015500116659612668226711267071663561665980115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
8020426707207000026692025801001008000010080015500116659612668226707267071663561665980115200800242008002426707267071180201100991001008000010008000003011151180160026704800001002671326708267082670826708
8020426707207000026698025801001008000010080016500116659612668226707267071663961665980114200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
8020426707207000026816025801001008000010080015500116659612668626707267071663561665980115200800242008002426712267071180201100991001008000010008000000011151180160026704800001002670826708267082670826708
8020426707207000026692025801001008000010080015500116659612668226707267071663561665980115200800242008002426707267071180201100991001008000010008000000011151180160026704800001002670826708271582670826852

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire uop (01)cycle (02)03181e1f3f4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)a5ld unit uop (a6)l1d cache writeback (a8)a9accfd2l1i cache miss demand (d3)d5map dispatch bubble (d6)daddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800242670820700026693025800101080000108000050116675001266832670826708166523166888001020800002080000267192670811800211091010800001000800000005020001160112670580000102670926709267122670926709
800242670820700026693025800101080000108000050116675000266832671126711166523166888001020800002080000267082671211800211091010800001000800000005020001160112670880000102671226712267092670926709
800242670820700026693025800101080000108000050116675000266832670826708166523166888001020800002080000267082670811800211091010800001000800000005020001160112670880000102670926709267112670926709
800242670820700026693025800101080000108000050116675000266832670826708166523166888001020800002080000267082671111800211091010800001000800000035020001160112670580000102670926709267092670926709
800242671120700026693025800101080000108000050116675000266832670826708166523166888001020800002080000267082670811800211091010800001000800000005020101160112670580000102670926712267092670926712
800242670820700026693225800101080000108000050116599401266832670826708166523166888001020800002080000267082670811800211091010800001000800000005020001160112670580000102671226709267092670926709
800242671120700026693025800101080000108000050116675011266832670826708166523166888001020800002080000267082670811800211091010800001000800000005020001160112670580000102670926709267092670926709
800242670820700026693025800101080000108000050116675000266832670826708166523166888001020800002080000267082670811800211091010800001000800000005020001160112670580000102670926709267092670926709
800242670820700026693025800101080000108000050116675000266832670826708166523166888001020801932080000267092671511800211091010800001000800000005020001160112670580000102670926709267092670926709
800242670820700026693025800101080000108000050116675011266832670826708166523166888001020800002080000267082671111800211091010800001000800000005020001160112670880000102670926709267092671326709