Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
casp x0, x1, x2, x3, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 6.000
Issues: 3.042
Integer unit issues: 0.001
Load/store unit issues: 3.042
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
76011 | 36065 | 3043 | 1 | 3042 | 3036 | 11485 | 3036 | 2025 | 4048 | 2026 | 7091 | 1 | 3000 | 3000 |
76004 | 35971 | 3040 | 1 | 3039 | 3045 | 11878 | 3045 | 2030 | 4060 | 2032 | 7112 | 1 | 3000 | 3000 |
76004 | 35804 | 3043 | 1 | 3042 | 3033 | 11377 | 3033 | 2025 | 4045 | 2042 | 7147 | 1 | 3000 | 3000 |
76004 | 36472 | 3049 | 1 | 3048 | 3051 | 11853 | 3051 | 2036 | 4069 | 2038 | 7133 | 1 | 3000 | 3000 |
76004 | 36574 | 3067 | 1 | 3066 | 3052 | 11795 | 3052 | 2037 | 4070 | 2033 | 7112 | 1 | 3000 | 3000 |
76004 | 35744 | 3040 | 1 | 3039 | 3048 | 11576 | 3048 | 2032 | 4064 | 2032 | 7112 | 1 | 3000 | 3000 |
76005 | 36011 | 3049 | 1 | 3048 | 3042 | 11946 | 3042 | 2028 | 4056 | 2034 | 7119 | 1 | 3000 | 3000 |
76004 | 35952 | 3043 | 1 | 3042 | 3039 | 11570 | 3039 | 2026 | 4052 | 2022 | 7077 | 1 | 3000 | 3000 |
76004 | 35893 | 3043 | 1 | 3042 | 3046 | 11732 | 3046 | 2032 | 4062 | 2032 | 7105 | 1 | 3000 | 3000 |
76004 | 35968 | 3043 | 1 | 3042 | 3036 | 11254 | 3036 | 2024 | 4048 | 2029 | 7098 | 1 | 3000 | 3000 |
Code:
casp x0, x1, x2, x3, [x6] add x6, x6, 16
(fused SUBS/B.cc loop)
Result (median cycles for code): 17.0060
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70235 | 171136 | 54222 | 24087 | 30135 | 23892 | 30038 | 75524 | 1996682 | 55010 | 30238 | 40051 | 30274 | 70172 | 23899 | 30000 | 40100 |
70204 | 170071 | 55064 | 25064 | 30000 | 25054 | 30038 | 74359 | 1997255 | 54626 | 30238 | 40051 | 30350 | 70328 | 25063 | 30000 | 40100 |
70206 | 170225 | 54353 | 24293 | 30060 | 24280 | 30002 | 75654 | 1997112 | 55056 | 30202 | 40003 | 30202 | 70004 | 24964 | 30000 | 40100 |
70205 | 170151 | 54440 | 24411 | 30029 | 24400 | 30002 | 75654 | 1997088 | 55056 | 30202 | 40003 | 30202 | 70004 | 24964 | 30000 | 40100 |
70205 | 170154 | 54338 | 24308 | 30030 | 24298 | 30002 | 75654 | 1997088 | 55056 | 30202 | 40003 | 30346 | 70340 | 25070 | 30000 | 40100 |
70204 | 171010 | 55515 | 25276 | 30239 | 25265 | 30071 | 76025 | 1997523 | 55178 | 30276 | 40096 | 30202 | 70004 | 24964 | 30000 | 40100 |
70204 | 170065 | 55064 | 25064 | 30000 | 25054 | 30002 | 75654 | 1997137 | 55056 | 30202 | 40003 | 30202 | 70004 | 24964 | 30000 | 40100 |
70205 | 170112 | 54927 | 24897 | 30030 | 24885 | 30002 | 75654 | 1997112 | 55056 | 30202 | 40003 | 30202 | 70004 | 24964 | 30000 | 40100 |
70205 | 170174 | 54757 | 24727 | 30030 | 24716 | 30002 | 75654 | 1997137 | 55056 | 30202 | 40003 | 30202 | 70004 | 24964 | 30000 | 40100 |
70204 | 170061 | 55064 | 25064 | 30000 | 25054 | 30002 | 75654 | 1997088 | 55056 | 30202 | 40003 | 30202 | 70004 | 24964 | 30000 | 40100 |
Result (median cycles for code): 17.0061
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70055 | 171143 | 54158 | 24017 | 30141 | 23839 | 30002 | 75510 | 1999960 | 55006 | 30022 | 40003 | 30020 | 70000 | 25000 | 30000 | 40010 |
70024 | 170069 | 55010 | 25010 | 30000 | 25002 | 30000 | 75504 | 1999592 | 55002 | 30020 | 40000 | 30020 | 70000 | 25002 | 30000 | 40010 |
70024 | 170060 | 55010 | 25010 | 30000 | 25002 | 30000 | 75504 | 1999591 | 55002 | 30020 | 40000 | 30020 | 70000 | 25000 | 30000 | 40010 |
70024 | 170065 | 55010 | 25010 | 30000 | 25002 | 30000 | 75504 | 1999592 | 55002 | 30020 | 40000 | 30058 | 70088 | 24575 | 30000 | 40010 |
70024 | 170081 | 55010 | 25010 | 30000 | 25002 | 30038 | 75378 | 1999531 | 54944 | 30058 | 40051 | 30020 | 70000 | 25002 | 30000 | 40010 |
70024 | 170221 | 55069 | 25039 | 30030 | 25030 | 30000 | 75500 | 1999725 | 55002 | 30020 | 40000 | 30020 | 70000 | 25005 | 30000 | 40010 |
70024 | 170052 | 55013 | 25013 | 30000 | 25002 | 30000 | 75518 | 1999527 | 55002 | 30020 | 40000 | 30020 | 70000 | 25003 | 30000 | 40010 |
70024 | 170060 | 55013 | 25013 | 30000 | 25002 | 30000 | 75518 | 1999489 | 55002 | 30020 | 40000 | 30020 | 70000 | 25003 | 30000 | 40010 |
70024 | 170054 | 55013 | 25013 | 30000 | 25002 | 30000 | 75518 | 1999489 | 55002 | 30020 | 40000 | 30020 | 70000 | 25003 | 30000 | 40010 |
70024 | 170052 | 55013 | 25013 | 30000 | 25002 | 30000 | 75518 | 1999575 | 55002 | 30020 | 40000 | 30020 | 70000 | 25003 | 30000 | 40010 |
Code:
casp x0, x1, x2, x3, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 17.0045
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70197 | 170223 | 113958 | 58907 | 55051 | 58894 | 104927 | 344504 | 1117595 | 166273 | 70554 | 139904 | 70574 | 244897 | 60965 | 30000 | 40092 |
70196 | 170045 | 116260 | 61265 | 54995 | 61268 | 104957 | 343386 | 1117691 | 166225 | 70574 | 139944 | 70366 | 244163 | 58645 | 30000 | 40084 |
70196 | 170045 | 116260 | 61265 | 54995 | 61268 | 104957 | 343386 | 1117691 | 166225 | 70574 | 139944 | 70574 | 244897 | 60965 | 30000 | 40092 |
70196 | 170045 | 116260 | 61265 | 54995 | 61268 | 104957 | 343386 | 1117691 | 166225 | 70574 | 139944 | 70574 | 244897 | 60965 | 30000 | 40092 |
70196 | 170045 | 116260 | 61265 | 54995 | 61268 | 104957 | 343386 | 1117691 | 166225 | 70574 | 139944 | 70574 | 244897 | 60965 | 30000 | 40092 |
70196 | 170045 | 116260 | 61265 | 54995 | 61268 | 105006 | 337758 | 1119204 | 163942 | 70608 | 140010 | 70574 | 244897 | 60963 | 30000 | 40092 |
70196 | 170047 | 116260 | 61265 | 54995 | 61268 | 104957 | 343404 | 1117703 | 166225 | 70574 | 139944 | 70574 | 244897 | 60965 | 30000 | 40092 |
70196 | 170045 | 116260 | 61265 | 54995 | 61268 | 104957 | 343386 | 1117691 | 166225 | 70574 | 139944 | 70574 | 244897 | 60965 | 30000 | 40092 |
70196 | 170045 | 116260 | 61265 | 54995 | 61268 | 104957 | 343386 | 1117691 | 166225 | 70574 | 139944 | 70610 | 245017 | 59627 | 30000 | 40084 |
70196 | 170045 | 116260 | 61265 | 54995 | 61268 | 104957 | 343386 | 1117691 | 166225 | 70574 | 139944 | 70574 | 244897 | 60965 | 30000 | 40092 |
Result (median cycles for code): 17.0082
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70017 | 170589 | 114966 | 59909 | 55057 | 59886 | 104948 | 345972 | 1119190 | 0 | 166838 | 70028 | 139932 | 0 | 70028 | 244876 | 61858 | 30000 | 40000 |
70014 | 170081 | 116825 | 61852 | 54973 | 61872 | 104952 | 346254 | 1119691 | 0 | 166826 | 70036 | 139940 | 0 | 70036 | 244880 | 61812 | 30000 | 40000 |
70007 | 170169 | 115735 | 60721 | 55014 | 60776 | 104938 | 346226 | 1119487 | 0 | 166806 | 70032 | 139924 | 0 | 70036 | 244880 | 61812 | 30000 | 40000 |
70014 | 170081 | 116888 | 61888 | 55000 | 61890 | 104948 | 345972 | 1119190 | 0 | 166838 | 70028 | 139932 | 0 | 70028 | 244876 | 61858 | 30000 | 40000 |
70014 | 170081 | 116888 | 61888 | 55000 | 61890 | 105020 | 344006 | 1119968 | 0 | 166098 | 70078 | 140030 | 0 | 70028 | 244876 | 61858 | 30000 | 40000 |
70014 | 170081 | 116886 | 61886 | 55000 | 61890 | 104948 | 345972 | 1119190 | 0 | 166838 | 70028 | 139932 | 0 | 70022 | 244855 | 68519 | 30000 | 40002 |
70014 | 170082 | 116894 | 61894 | 55000 | 61890 | 104948 | 345937 | 1119216 | 0 | 166838 | 70028 | 139932 | 0 | 70010 | 244813 | 61828 | 30000 | 40000 |
70014 | 170082 | 116894 | 61894 | 55000 | 61890 | 104948 | 345937 | 1119216 | 0 | 166838 | 70028 | 139932 | 0 | 70028 | 244876 | 61864 | 30000 | 40000 |
70014 | 170082 | 116894 | 61894 | 55000 | 61890 | 104921 | 346189 | 1119783 | 0 | 166793 | 70010 | 139896 | 0 | 70072 | 245018 | 60307 | 30000 | 39994 |
70014 | 170082 | 116724 | 61784 | 54940 | 61820 | 104861 | 346239 | 1121203 | 0 | 166639 | 69970 | 139816 | 0 | 70032 | 244866 | 61750 | 30000 | 40000 |