Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
caspl x0, x1, x2, x3, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 6.002
Issues: 3.003
Integer unit issues: 0.001
Load/store unit issues: 3.003
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
76011 | 34651 | 3019 | 1 | 3018 | 3006 | 11022 | 3006 | 2004 | 4008 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 34605 | 3004 | 1 | 3003 | 3003 | 11014 | 3003 | 2002 | 4004 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 35073 | 3004 | 1 | 3003 | 3003 | 11011 | 3003 | 2002 | 4004 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 34596 | 3004 | 1 | 3003 | 3003 | 11011 | 3003 | 2002 | 4004 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 35114 | 3004 | 1 | 3003 | 3003 | 11011 | 3003 | 2002 | 4004 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 34384 | 3004 | 1 | 3003 | 3003 | 11011 | 3003 | 2002 | 4004 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 34398 | 3004 | 1 | 3003 | 3003 | 11011 | 3003 | 2002 | 4004 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 34396 | 3004 | 1 | 3003 | 3006 | 11034 | 3006 | 2004 | 4008 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 34379 | 3004 | 1 | 3003 | 3003 | 11011 | 3003 | 2002 | 4004 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 34399 | 3004 | 1 | 3003 | 3003 | 11011 | 3003 | 2002 | 4004 | 2002 | 7007 | 1 | 3000 | 3002 |
Code:
caspl x0, x1, x2, x3, [x6] add x6, x6, 16
(fused SUBS/B.cc loop)
Result (median cycles for code): 19.0053
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70235 | 191112 | 54197 | 24057 | 30140 | 23886 | 30002 | 76074 | 2245829 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24961 | 30000 | 40100 |
70204 | 190052 | 55061 | 25061 | 30000 | 25054 | 30002 | 75636 | 2245199 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24965 | 30000 | 40100 |
70204 | 190046 | 55065 | 25065 | 30000 | 25054 | 30002 | 75636 | 2245236 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24967 | 30000 | 40100 |
70205 | 190100 | 53932 | 23902 | 30030 | 23894 | 30002 | 75648 | 2245274 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24965 | 30000 | 40100 |
70204 | 190076 | 55065 | 25065 | 30000 | 25054 | 30002 | 75636 | 2245199 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24965 | 30000 | 40100 |
70204 | 190046 | 55065 | 25065 | 30000 | 25054 | 30002 | 75636 | 2245199 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24965 | 30000 | 40100 |
70205 | 190109 | 54598 | 24568 | 30030 | 24556 | 30002 | 75636 | 2245199 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24965 | 30000 | 40100 |
70204 | 190046 | 55065 | 25065 | 30000 | 25054 | 30002 | 75636 | 2245199 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24965 | 30000 | 40100 |
70204 | 190046 | 55065 | 25065 | 30000 | 25054 | 30002 | 75636 | 2245199 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24965 | 30000 | 40100 |
70204 | 190046 | 55065 | 25065 | 30000 | 25054 | 30038 | 72307 | 2244585 | 0 | 53938 | 30238 | 40051 | 0 | 30202 | 70004 | 24965 | 30000 | 40100 |
Result (median cycles for code): 19.0055
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
70054 | 191061 | 54088 | 23982 | 30106 | 23804 | 30002 | 75887 | 2249536 | 0 | 55006 | 30022 | 40003 | 0 | 30020 | 70000 | 0 | 25004 | 30000 | 0 | 40010 |
70024 | 190081 | 55009 | 25009 | 30000 | 25002 | 30000 | 75511 | 2249129 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 0 | 25000 | 30000 | 0 | 40010 |
70024 | 190049 | 55014 | 25014 | 30000 | 25002 | 30000 | 75484 | 2249012 | 0 | 55002 | 30020 | 40000 | 0 | 30056 | 70084 | 0 | 23830 | 30000 | 0 | 40010 |
70024 | 190049 | 55014 | 25014 | 30000 | 25002 | 30000 | 75490 | 2249012 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 0 | 25004 | 30000 | 0 | 40010 |
70024 | 190049 | 55014 | 25014 | 30000 | 25002 | 30000 | 75490 | 2249012 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 0 | 25004 | 30000 | 0 | 40010 |
70024 | 190051 | 55014 | 25014 | 30000 | 25002 | 30000 | 75484 | 2249012 | 0 | 55002 | 30020 | 40000 | 0 | 30058 | 70088 | 0 | 24521 | 30000 | 0 | 40010 |
70024 | 190049 | 55014 | 25014 | 30000 | 25002 | 30000 | 75490 | 2249012 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 0 | 25004 | 30000 | 0 | 40010 |
70024 | 190049 | 55014 | 25014 | 30000 | 25002 | 30000 | 75484 | 2249012 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 0 | 25004 | 30000 | 0 | 40010 |
70024 | 190049 | 55014 | 25014 | 30000 | 25002 | 30000 | 75484 | 2249012 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 0 | 25004 | 30000 | 0 | 40010 |
70025 | 190116 | 53902 | 23872 | 30030 | 23860 | 30000 | 75881 | 2249530 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 0 | 25000 | 30000 | 0 | 40010 |
Code:
caspl x0, x1, x2, x3, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 19.0098
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70197 | 190362 | 113995 | 58929 | 55066 | 58906 | 105045 | 330676 | 1313685 | 164893 | 70634 | 140062 | 70572 | 244896 | 60915 | 30000 | 40090 |
70196 | 190098 | 116274 | 61259 | 55015 | 61274 | 104966 | 334280 | 1312558 | 166154 | 70580 | 139956 | 70584 | 244938 | 60959 | 30000 | 40092 |
70194 | 190092 | 115580 | 60872 | 54708 | 61076 | 104973 | 334586 | 1312736 | 166247 | 70584 | 139966 | 70398 | 244275 | 60377 | 30000 | 40092 |
70194 | 190093 | 115875 | 61068 | 54807 | 61274 | 105025 | 337112 | 1317482 | 165941 | 70620 | 140036 | 70584 | 244938 | 60761 | 30000 | 40092 |
70194 | 190092 | 115877 | 61070 | 54807 | 61274 | 104973 | 334586 | 1312736 | 166247 | 70584 | 139966 | 70584 | 244938 | 60959 | 30000 | 40092 |
70194 | 190092 | 115580 | 60872 | 54708 | 61076 | 104973 | 334586 | 1312736 | 166247 | 70584 | 139966 | 70584 | 244938 | 60959 | 30000 | 40092 |
70194 | 190092 | 116273 | 61268 | 55005 | 61274 | 104973 | 334586 | 1312736 | 166247 | 70584 | 139966 | 70634 | 245101 | 59119 | 30000 | 40086 |
70194 | 190092 | 114473 | 60068 | 54405 | 60474 | 104373 | 341580 | 1329839 | 164847 | 70184 | 139166 | 70584 | 244938 | 60959 | 30000 | 40092 |
70194 | 190092 | 116273 | 61268 | 55005 | 61274 | 104973 | 334600 | 1312753 | 166247 | 70584 | 139966 | 70584 | 244938 | 60959 | 30000 | 40092 |
70189 | 190168 | 116127 | 61070 | 55057 | 61084 | 104973 | 334678 | 1312971 | 166247 | 70584 | 139966 | 70620 | 245058 | 60865 | 30000 | 40082 |
Result (median cycles for code): 19.0094
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70017 | 190224 | 114967 | 59907 | 55060 | 59894 | 104976 | 336052 | 1314066 | 166882 | 70046 | 139970 | 70026 | 244875 | 61835 | 30000 | 40002 |
70016 | 190101 | 116894 | 61881 | 55013 | 61892 | 104921 | 336207 | 1314716 | 166793 | 70010 | 139896 | 70060 | 244982 | 61580 | 30000 | 39992 |
70016 | 190134 | 116829 | 61843 | 54986 | 61874 | 104921 | 336255 | 1314843 | 166793 | 70010 | 139896 | 70028 | 244876 | 61862 | 30000 | 40000 |
70014 | 190094 | 116892 | 61892 | 55000 | 61890 | 104948 | 335757 | 1314014 | 166838 | 70028 | 139932 | 70028 | 244876 | 61862 | 30000 | 40000 |
70014 | 190094 | 116892 | 61892 | 55000 | 61890 | 104948 | 335757 | 1314014 | 166838 | 70028 | 139932 | 70028 | 244876 | 61862 | 30000 | 40000 |
70007 | 190136 | 115256 | 60236 | 55020 | 60256 | 104918 | 336197 | 1314854 | 166782 | 70008 | 139892 | 70010 | 244813 | 61826 | 30000 | 40000 |
70007 | 190162 | 114924 | 59910 | 55014 | 59952 | 104954 | 336372 | 1314755 | 166828 | 70036 | 139942 | 70036 | 244886 | 61812 | 30000 | 40000 |
70014 | 190094 | 116811 | 61842 | 54969 | 61874 | 104954 | 336372 | 1314755 | 166828 | 70036 | 139942 | 70028 | 244876 | 61831 | 30000 | 40000 |
70014 | 190092 | 116869 | 61872 | 54997 | 61874 | 104948 | 335750 | 1313975 | 166838 | 70028 | 139932 | 70028 | 244876 | 61858 | 30000 | 40000 |
70014 | 190092 | 116888 | 61888 | 55000 | 61890 | 104993 | 334085 | 1315461 | 166091 | 70060 | 139994 | 70008 | 244806 | 61818 | 30000 | 40000 |