Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FMOV (D[1] from X)

Test 1: uops

Code:

  fmov v0.d[1], x0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200420401502025169025200010001000100010006342265147120212040204014503177320001000100010002000204020401110011000100000731161120371000100020412041204120412041
200420401502025169025200010001000100010006342265147020212040204014503177320001000100010002000204020401110011000100000731161120371000100020412041204120412041
200420401502025169025200010001000100010006342265147020212040204014503177320001000100010002000204020401110011000100000731161120371000100020412041204120412041
200420401502025169025200010001000100010006342265147020212040204014503177320001000100010002000204020401110011000100000731161120371000100020412041204120412041
200420401602025169025200010001000100010006342265147120212101204014503177320001000100010002000204020401110011000100000731161120371000100020412041204120412041
20042040161862025169025200010001000100010006342265147120212040204014503177320001000100010002000204020401110011000100000731161120371000100020412041204120412041
200420401502025169025200010001000100010006342265147020212040204014503177320001000100010002000204020401110011000100000731161120371000100020412041204120412041
200420401502025169025200010001000100010006342265147020212040204014503177320001000100010002000204020401110011000100000731161120371000100020412041204120412041
200420401602025169025200010001000100010006342265147020212040204014503177320001000100010002000204020401110011000100003731161120371000100020412041204120412041
200420401602025169025200010001000100010006342265147120212040204014503177320001000100010002000204020401110011000100000731161120371000100020412041204120412041

Test 2: Latency 1->2 roundtrip

Code:

  fmov v0.d[1], x0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 12.0032

retire (01)cycle (02)0308090b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)eald/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020412003593000000120012001710945625401001010020000100001002000010000500573572013672053012001301200321200321155346116236301002001000320005200100033000812003212003211202011009910010100100001000010000000001111317011611119586100000100001000010100120033120033120033120033120033
302041200329310000000012001710945625401001010020000100001002000010000500573567213672053112001301200881200361155273116240301002001000020000200100003000012003212003211202011009910010100100001000010004010000001310121622119574100000100001000010100120033120033120033120033120033
302041200329300000000012001710945625401001010020000100001002000010000500573567213672053112001301200321200321155253116240301002001000020000200100003000012003312003211202011009910010100100001000010000000000001310121622119574100000100001000010100120033120033120033120033120033
302041200329310000000012001710945625401001010020000100001002000010000500573567213672053012001401200321200321155253116240301002001000020000200100003000012003212003211202011009910010100100001000010000000000001310121622119574100000100001000010100120033120033120033120033120033
3020412003293000000120012001710945625401001010020000100001002000010000500573567213672053012001301200321200321155253116240301002001000020000200100003000012003212003211202011009910010100100001000010000000000001310121622119574100000100001000010100120034120033120033120033120033
302051200329310000000012001710945725401001010020000100001002000010000500573567213672053012001301200321200321155253116240301002001000020000200100003000012003212003211202011009910010100100001000010000000000011310121622119574100000100001000010100120033120033120033120033120037
302041200329310000000012001710945625401001010020000100001002000010000500573567213672053012001301200351200321155253116240301002001000020000200100003000012003212003211202011009910010100100001000010004000813820001310131622119574100000100001000010100120033120041120033120033120033
30204120032930011200001200171096591284014010125200041000811220460101985635750320136994900120382012050112021011552527116297301002001000020000200100003000012003412003211202011009910010100100001000010000000000001310121622119574100000100001000010100120043120033120033120033120033
302041200329310000000012001710945625401001010020000100001002000010000500573567213672053012001301200321200321155253116240301002001000020000200100003000012003212003211202011009910010100100001000010000000300011310121622119574100000100001000010100120033120033120125120039120034
3020412003293100000120012001710945625401001010020000100001002000010000500573567213672053112001301200321200321155253116240301002001000020000200100003000012003212003211202011009910010100100001000010000000000001310121622119574100000100001000010100120033120033120034120033120033

1000 unrolls and 10 iterations

Result (median cycles for code): 12.0032

retire (01)cycle (02)03080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eald/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3002412003293000001201200221094562540010100102000010000102000010000505735672136700290120013012003212003211554831162623001020100002000020100003000012003212003211200211091010010100001000100001000127021611119574100000100001000010010120033120033120033120033120033
300241200329310000001200171094562540010100102000010000102000010000505735672136700291120013012003212003211562531162623017720100002000020100663000012003712019821200211091010010100001000100001000127011611119574100000100001000010010120033120033120033120033120033
300241200349300000001200171094562540010100102000010000102000010000505735672136700291120013012003212003211558131162623001020100002000020100003000012003212003211200211091010010100001000100000000127011611119574100000100001000010010120033120033120033120033120033
300241200329310000001200171094562540010100102000010000102000010000505735672136700291120014012003212003211554831162623001020100002000020100003000012003212003211200211091010010100001000100000000127011611119574100000100001000010010120033120033120033120033120033
300241200329300000001200171094562540010100102000010000102000010000505736378136700291120013012003212003211554831162623001020100002000020100003000012003212003211200211091010010100001000100001000127011611119574100000100001000010010120033120033120033120033120033
300241200329310004001200171094562540042100102000010008102000010197505735672136700291120307012003212003211554831164863001020102432000020102453000012003212039511200211091010010100001020100060000127011611119574100000100001000010010120033120033120386120033120392
300241200329310000035212001710952625400101002020000100001220000100005057356721367002911203210120032120032115548261162623001020100002000022100003000012035912003211200211091010010100001000100000000127011611119574100000100001000010010120033120033120033120033120033
300241200329300000001200171094562540010100102000010000102000010000505735672136700291120013012003212003211554831162623001022100002000020100003000012003212003211200211091010010100001000100000000127011611119574100000100001000010010120033120033120033120033120033
300241200329310000001200171094562540010100102000010000102000010000505737592136718300120013012003212003211554831162623001020100002000020109213454212221912205727120021109101001010000107410047109160301786326711119574100000100001000010010120625120033121876122000122187
300241220849431123283708193612245711048163540242100652011610020102000010000505735672136700290120013012195712197511645316711782834633241171720482201000030000120102122342341200211091010010100001042100491213147101664523711119574100070100001000010010121894121998121986122050120033

Test 3: throughput

Count: 8

Code:

  fmov v0.d[1], x8
  fmov v1.d[1], x8
  fmov v2.d[1], x8
  fmov v3.d[1], x8
  fmov v4.d[1], x8
  fmov v5.d[1], x8
  fmov v6.d[1], x8
  fmov v7.d[1], x8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)030818191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6067696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a7a8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020426711207000327026693025160100100800008000010080020800155001168951189801600266892670826708663206666116013520080020800202008002016004026708267081180201100991001008000010000080000010011151170160026705080000800001002670926713267092670926709
160204267082070003026693025160100100800008000010080020800155001168951189801600266902670826708663206666616013620080024800242008002016004026708267081180201100991001008000010000080000000011151170160026705080000800001002670926709267092670926709
160204267082070003026693025160100100800008000010080020800155001168951189801600266892670826708663206666616013520080020800202008002016004026708267081180201100991001008000010000080000000011151170160026705080000800001002670926709267092670926709
16020426708207000348026693025160100100800008000010080020800155001168951189801600266892670826708663206666016013520080020800202008002016004026708267081180201100991001008000010000080000000011151170160026709080000800001002670926709267092670926709
16020426708207000330026693325160100100800008000010080020800155001168951189801600266892670826708663506666516013520080020800202008002016004026708267081180201100991001008000010000080000010011151170160026705080000800001002670926709267092670926709
16020426708207000303026693025160100100800008000010080020800155001168951189801600266892670826708663206665716013420080020800242008002016004026708267081180201100991001008000010000080000000011151170160026705080000800001002670926709267092670926709
16020426708207000282026694225160100100800008000010080020800155001168951189801600266932670826708663206665916013520080024800202008002016004826708267081180201100991001008000010000080000003011151170160026706080000800001002670926709267092670926709
16020426708207000384026693025160100100800008000010080020800155001168951189801600266892670826708663206665716013620080020800202008002416004026708267081180201100991001008000010000080000000011151170160026705080000800001002671426709267122670926713
160204267182060000026693025160100100800008000010080020800155001168951189801600266892670826708663206666016013520080020800202008002016004026708267081180201100991001008000010000080000000011151170160026705080000800001002670926709267092670926709
1602042670820710033637226693025160884100800008000010080020800155001168951189801600266892670826708663206665916013420080020800202008002016004026708267081180201100991001008000010020080000002598011151610160026705080000800001002670927199267092670926709

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)0318191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a8acc2cfd0d2d5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002426708200004802669302516001010800008000010800008000050116888018979313102668926708267086653036688160010208000080000208000016000026708267081180021109101080000100008000000050200031611267058000080000102670926709267092670926709
160024267082000000266930251600101080000800001080000800005011688801890591415267062670826708667303668816001020800008000020800001600002670826708118002110910108000010401368000030050200011611267058000080000102670926709267092670926709
1600242670820000002669602516001010800008000010800008000050116888018979318102669126708267086653036688160010208000080000208000016000026708267081180021109101080000100008000000050205011611267058000080000102670926709267092670926709
1600242670820000144882669302516001010800008000010800008000050116888018912297102668926708267086653036688160010208000080000208000016000026708267081180021109101080000100008000000050200011611267058000080000102720526718267162672226709
1600242670820000002669302516001010800008000010800008000050116888018979317152669626719267086653036688160010208000080000208000016000026708267081180021109101080000100008000000050205411612267128000080000102670926709267092670926709
1600242670820000002669302516001010800008000010800008000050116902518976227102669126708267086653036688160010208000080000208000016000026708267081180021109101080000100008000000050205011611267218000080000102670926709267092670926709
1600242670820000002669302516001010800008000010800008000050116888018979317102669526709267086653036688160010208000080000208000016037826708267081180021109101080000100008000000050205411611267058000080000102670926709267092670926709
1600242670820000002669302516001010800008000010800008000050116888018979318102668926708267086653036688160010208000080000208000016000026708267121180021109101080000100008000000050200031611267058000080000102670926709267092670926709
16002426708200000026693025160010108000080000108000080000501168880189793171026689267082670866530366931600102080000800002080000160000267082670811800211091010800001000080000112050200011611267058000080000102670926709267092670926709
16002426708200001202669302516001010800008000010800008000050116888018979317102668926708267086653036688160010208000080000208000016000026708267081180021109101080000100008000000050200011611267058000080000102670926709267092670926709