Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
casp w0, w1, w2, w3, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 6.000
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
76011 | 34743 | 3019 | 1 | 3018 | 3000 | 11000 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34492 | 3001 | 1 | 3000 | 3000 | 11000 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34419 | 3001 | 1 | 3000 | 3000 | 11000 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34420 | 3001 | 1 | 3000 | 3000 | 11000 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34426 | 3001 | 1 | 3000 | 3000 | 11000 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34423 | 3001 | 1 | 3000 | 3000 | 11000 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34414 | 3001 | 1 | 3000 | 3000 | 11000 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34400 | 3001 | 1 | 3000 | 3003 | 11012 | 3003 | 2002 | 4004 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34645 | 3001 | 1 | 3000 | 3000 | 11000 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34418 | 3001 | 1 | 3000 | 3000 | 11000 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
Code:
casp w0, w1, w2, w3, [x6] add x6, x6, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 17.0061
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70225 | 171119 | 54074 | 23942 | 30132 | 23895 | 30002 | 75548 | 1997005 | 55056 | 30202 | 40003 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 170059 | 55060 | 25060 | 30000 | 25054 | 30002 | 75548 | 1997110 | 55056 | 30202 | 40003 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 170058 | 55060 | 25060 | 30000 | 25054 | 30002 | 75578 | 1997104 | 55056 | 30202 | 40003 | 30238 | 70088 | 24090 | 30000 | 40100 |
70204 | 170055 | 55060 | 25060 | 30000 | 25054 | 30002 | 75548 | 1997032 | 55056 | 30202 | 40003 | 30202 | 70004 | 24962 | 30000 | 40100 |
70204 | 170100 | 55060 | 25060 | 30000 | 25054 | 30002 | 75548 | 1997156 | 55056 | 30202 | 40003 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 170058 | 55060 | 25060 | 30000 | 25054 | 30002 | 75548 | 1997087 | 55056 | 30202 | 40003 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 170058 | 55060 | 25060 | 30000 | 25054 | 30002 | 75548 | 1997031 | 55056 | 30202 | 40003 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 170058 | 55060 | 25060 | 30000 | 25054 | 30002 | 75548 | 1997031 | 55056 | 30202 | 40003 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 170059 | 55060 | 25060 | 30000 | 25054 | 30038 | 75734 | 1997171 | 55116 | 30238 | 40051 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 170060 | 55060 | 25060 | 30000 | 25054 | 30002 | 75548 | 1997018 | 55056 | 30202 | 40003 | 30202 | 70004 | 24960 | 30000 | 40100 |
Result (median cycles for code): 17.0055
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70045 | 170803 | 54026 | 23899 | 30127 | 23865 | 30002 | 75446 | 1999568 | 55006 | 30022 | 40003 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 170055 | 55009 | 25009 | 30000 | 25002 | 30000 | 75383 | 1999464 | 55002 | 30020 | 40000 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 170055 | 55009 | 25009 | 30000 | 25002 | 30036 | 71950 | 1999127 | 53848 | 30056 | 40048 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 170055 | 55009 | 25009 | 30000 | 25002 | 30000 | 75383 | 1999464 | 55002 | 30020 | 40000 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 170055 | 55011 | 25011 | 30000 | 25002 | 30000 | 75383 | 1999464 | 55002 | 30020 | 40000 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 170055 | 55009 | 25009 | 30000 | 25002 | 30000 | 75383 | 1999464 | 55002 | 30020 | 40000 | 30058 | 70088 | 24550 | 30000 | 40010 |
70024 | 170061 | 55011 | 25011 | 30000 | 25002 | 30000 | 75383 | 1999464 | 55002 | 30020 | 40000 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 170055 | 55009 | 25009 | 30000 | 25002 | 30000 | 75383 | 1999464 | 55002 | 30020 | 40000 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 170055 | 55009 | 25009 | 30000 | 25002 | 30000 | 75383 | 1999464 | 55002 | 30020 | 40000 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 170055 | 55009 | 25009 | 30000 | 25002 | 30038 | 72326 | 1999057 | 53984 | 30058 | 40051 | 30020 | 70000 | 24999 | 30000 | 40010 |
Code:
casp w0, w1, w2, w3, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 17.0056
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
70197 | 170227 | 111790 | 57331 | 54459 | 57720 | 104420 | 346622 | 1129873 | 164884 | 70216 | 139228 | 70386 | 244235 | 0 | 60177 | 30000 | 0 | 40062 |
70188 | 170207 | 110091 | 57267 | 52824 | 59298 | 104360 | 352908 | 1135269 | 165000 | 70176 | 139148 | 70182 | 243525 | 0 | 59367 | 30000 | 0 | 40092 |
70196 | 170084 | 113879 | 59667 | 54212 | 60476 | 104670 | 352884 | 1128589 | 165742 | 70382 | 139562 | 70382 | 244231 | 0 | 60565 | 30000 | 0 | 40092 |
70196 | 170084 | 115577 | 60865 | 54712 | 61072 | 104970 | 345300 | 1118027 | 166242 | 70582 | 139962 | 70530 | 244743 | 0 | 59551 | 30000 | 0 | 40082 |
70189 | 170127 | 114245 | 59497 | 54748 | 59706 | 104670 | 348024 | 1124889 | 165736 | 70382 | 139562 | 70382 | 244231 | 0 | 60559 | 30000 | 0 | 40092 |
70196 | 170084 | 115571 | 60859 | 54712 | 61066 | 104970 | 345284 | 1117989 | 166242 | 70582 | 139962 | 70582 | 244931 | 0 | 60965 | 30000 | 0 | 40092 |
70196 | 170084 | 116277 | 61265 | 55012 | 61272 | 104970 | 345284 | 1117989 | 166242 | 70582 | 139962 | 70582 | 244931 | 0 | 60965 | 30000 | 0 | 40092 |
70196 | 170084 | 116277 | 61265 | 55012 | 61272 | 104970 | 345284 | 1117989 | 166242 | 70582 | 139962 | 70618 | 245038 | 0 | 59056 | 30000 | 0 | 40084 |
70196 | 170084 | 116277 | 61265 | 55012 | 61272 | 104970 | 345284 | 1117989 | 166242 | 70582 | 139962 | 70582 | 244931 | 0 | 60965 | 30000 | 0 | 40092 |
70196 | 170084 | 116277 | 61265 | 55012 | 61272 | 104970 | 345284 | 1117989 | 166242 | 70582 | 139962 | 70582 | 244931 | 0 | 60965 | 30000 | 0 | 40092 |
Result (median cycles for code): 17.0082
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
70016 | 170084 | 116905 | 61893 | 55012 | 61902 | 104890 | 345566 | 1118501 | 166742 | 69988 | 139854 | 70016 | 244828 | 61836 | 30000 | 0 | 39998 |
70014 | 170045 | 116847 | 61869 | 54978 | 61866 | 104992 | 342104 | 1120161 | 165434 | 70060 | 139992 | 69988 | 244736 | 61738 | 30000 | 0 | 40000 |
70014 | 170081 | 116607 | 61694 | 54913 | 61754 | 104891 | 345796 | 1120479 | 166665 | 69990 | 139856 | 70010 | 244813 | 61826 | 30000 | 0 | 40000 |
70014 | 170085 | 116831 | 61856 | 54975 | 61872 | 104952 | 346254 | 1119691 | 166826 | 70036 | 139940 | 70036 | 244880 | 61812 | 30000 | 0 | 40000 |
70014 | 170082 | 116883 | 61892 | 54991 | 61890 | 104948 | 345972 | 1119190 | 166838 | 70028 | 139932 | 70060 | 244982 | 61810 | 30000 | 0 | 39992 |
70014 | 170085 | 116829 | 61854 | 54975 | 61872 | 104952 | 346254 | 1119691 | 166826 | 70036 | 139940 | 70036 | 244880 | 61812 | 30000 | 0 | 40000 |
70014 | 170082 | 116831 | 61856 | 54975 | 61872 | 104921 | 346224 | 1119789 | 166793 | 70010 | 139896 | 70036 | 244880 | 61812 | 30000 | 0 | 40000 |
70014 | 170082 | 116883 | 61892 | 54991 | 61890 | 104948 | 346134 | 1119441 | 166838 | 70028 | 139932 | 70028 | 244876 | 61858 | 30000 | 0 | 40000 |
70014 | 170081 | 116888 | 61888 | 55000 | 61890 | 105016 | 343864 | 1119681 | 166134 | 70076 | 140024 | 70008 | 244806 | 61814 | 30000 | 0 | 40000 |
70014 | 170081 | 116814 | 61844 | 54970 | 61866 | 104921 | 346224 | 1119757 | 166793 | 70010 | 139896 | 70010 | 244813 | 61822 | 30000 | 0 | 40000 |