Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
caslh w0, w1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.000
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74007 | 34734 | 3013 | 1 | 3012 | 3000 | 15033 | 3000 | 1000 | 3000 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34315 | 3004 | 1 | 3003 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74005 | 34267 | 3001 | 1 | 3000 | 3000 | 15032 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34361 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34244 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34242 | 3001 | 1 | 3000 | 3003 | 15037 | 3003 | 1001 | 3003 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34415 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34243 | 3001 | 1 | 3000 | 3003 | 15037 | 3003 | 1001 | 3003 | 1000 | 6000 | 1 | 3000 | 1000 |
74005 | 34278 | 3004 | 1 | 3003 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34377 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
Code:
caslh w0, w1, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 9.0060
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
50208 | 90263 | 43925 | 13876 | 0 | 30049 | 13877 | 0 | 30003 | 47269 | 1039038 | 0 | 45728 | 20201 | 30003 | 0 | 20441 | 60726 | 0 | 15787 | 30000 | 0 | 20100 |
50204 | 90082 | 45730 | 15729 | 0 | 30001 | 15725 | 0 | 30003 | 47271 | 1039144 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 0 | 15626 | 30000 | 0 | 20100 |
56432 | 107330 | 51211 | 19458 | 5 | 31748 | 19356 | 3 | 30003 | 47285 | 1039151 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 0 | 15626 | 30000 | 0 | 20100 |
50204 | 90060 | 45727 | 15726 | 0 | 30001 | 15725 | 0 | 30003 | 47295 | 1039178 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 0 | 15626 | 30000 | 0 | 20100 |
50204 | 90060 | 45727 | 15726 | 0 | 30001 | 15725 | 0 | 30003 | 47271 | 1039150 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 0 | 15626 | 30000 | 0 | 20100 |
50204 | 90060 | 45727 | 15726 | 0 | 30001 | 15725 | 0 | 30039 | 43550 | 1039054 | 0 | 44494 | 20225 | 30039 | 0 | 20201 | 60006 | 0 | 15626 | 30000 | 0 | 20100 |
50204 | 90060 | 45727 | 15726 | 0 | 30001 | 15725 | 0 | 30003 | 47271 | 1039143 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 0 | 15626 | 30000 | 0 | 20100 |
50204 | 90060 | 45727 | 15726 | 0 | 30001 | 15725 | 0 | 30075 | 47496 | 1039602 | 0 | 45833 | 20249 | 30075 | 0 | 20201 | 60006 | 0 | 15626 | 30000 | 0 | 20100 |
50204 | 90208 | 45815 | 15754 | 0 | 30061 | 15753 | 0 | 30003 | 47271 | 1039140 | 0 | 45728 | 20201 | 30003 | 0 | 20249 | 60150 | 0 | 15656 | 30000 | 0 | 20100 |
50204 | 90060 | 45727 | 15726 | 0 | 30001 | 15725 | 0 | 30039 | 47365 | 1039326 | 0 | 45781 | 20225 | 30039 | 0 | 20201 | 60006 | 0 | 15626 | 30000 | 0 | 20100 |
Result (median cycles for code): 9.0058
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
50028 | 90254 | 43830 | 13785 | 30045 | 13785 | 30003 | 46994 | 1039375 | 45638 | 20021 | 30003 | 20020 | 60000 | 15626 | 30000 | 0 | 20010 |
50024 | 90058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46994 | 1039359 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 0 | 20010 |
50024 | 90051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46964 | 1039240 | 45635 | 20020 | 30000 | 20045 | 60078 | 14443 | 30000 | 0 | 20010 |
50024 | 90051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46994 | 1039351 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 0 | 20010 |
50024 | 90058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46964 | 1039237 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 0 | 20010 |
50024 | 90051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46964 | 1039237 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 0 | 20010 |
50024 | 90051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46994 | 1039362 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 0 | 20010 |
50024 | 90051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46964 | 1039233 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 0 | 20010 |
50024 | 90051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46994 | 1039348 | 45635 | 20020 | 30000 | 20045 | 60078 | 14465 | 30000 | 0 | 20010 |
50025 | 90104 | 43902 | 13872 | 30030 | 13871 | 30000 | 46964 | 1039220 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 0 | 20010 |
Code:
caslh w0, w1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 11.5167
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
42936 | 117234 | 67531 | 15611 | 51920 | 12982 | 79036 | 281483 | 1327943 | 96088 | 26841 | 79041 | 26829 | 158106 | 20051 | 30000 | 12831 |
42942 | 117109 | 71932 | 20268 | 51664 | 17075 | 79151 | 277345 | 1301101 | 96209 | 26945 | 79159 | 26797 | 157420 | 19230 | 30000 | 12831 |
42998 | 114192 | 69217 | 18820 | 50397 | 16463 | 79738 | 286278 | 1290451 | 96832 | 26986 | 79742 | 26994 | 159547 | 19898 | 30000 | 12896 |
43000 | 115619 | 71012 | 19802 | 51210 | 17094 | 79566 | 280184 | 1294559 | 96690 | 26992 | 79568 | 26735 | 158010 | 19790 | 30000 | 12832 |
42997 | 114419 | 70656 | 19667 | 50989 | 17181 | 79051 | 277756 | 1319155 | 96077 | 26847 | 79059 | 27266 | 161121 | 18824 | 30000 | 12986 |
43101 | 112618 | 69685 | 19276 | 50409 | 17099 | 79757 | 281871 | 1230426 | 96939 | 26987 | 79761 | 26891 | 158946 | 19005 | 30000 | 12895 |
43001 | 115574 | 70092 | 19326 | 50766 | 16890 | 80047 | 286727 | 1266656 | 97277 | 27090 | 80054 | 26904 | 158954 | 19037 | 30000 | 12895 |
43003 | 114172 | 69367 | 19102 | 50265 | 16880 | 80001 | 273320 | 1275172 | 96932 | 27160 | 80008 | 26907 | 159025 | 18913 | 30000 | 12902 |
43087 | 112889 | 69055 | 18808 | 50247 | 17221 | 79351 | 268215 | 1299450 | 96445 | 27000 | 79408 | 27085 | 158529 | 18674 | 30000 | 12897 |
42978 | 115619 | 70797 | 19688 | 51109 | 17196 | 79363 | 284745 | 1236297 | 96225 | 27008 | 79418 | 27427 | 161312 | 19153 | 30000 | 12945 |
Result (median cycles for code): 11.7255
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
42812 | 117222 | 67588 | 15568 | 52020 | 12977 | 79135 | 275386 | 1329016 | 95788 | 26426 | 79138 | 26405 | 158149 | 20190 | 30000 | 12735 |
42749 | 117342 | 72014 | 20211 | 51803 | 16823 | 79051 | 280924 | 1329628 | 95874 | 26397 | 79051 | 26397 | 158101 | 20188 | 30000 | 12735 |
42749 | 117312 | 72006 | 20211 | 51795 | 16823 | 79051 | 281041 | 1329616 | 95874 | 26397 | 79051 | 26397 | 158101 | 20188 | 30000 | 12735 |
42749 | 117312 | 72006 | 20211 | 51795 | 16823 | 79047 | 281305 | 1329378 | 95870 | 26395 | 79047 | 26397 | 158089 | 20180 | 30000 | 12735 |
42749 | 117342 | 72014 | 20211 | 51803 | 16823 | 79047 | 281271 | 1329190 | 95870 | 26395 | 79047 | 26395 | 158094 | 20190 | 30000 | 12735 |
42749 | 117314 | 72006 | 20213 | 51793 | 16823 | 79148 | 256885 | 1330340 | 95227 | 26433 | 79151 | 26491 | 158672 | 20136 | 30000 | 12751 |
42765 | 117251 | 71989 | 20157 | 51832 | 16827 | 79414 | 282994 | 1322745 | 96261 | 26514 | 79416 | 26417 | 158175 | 20169 | 30000 | 12739 |
42750 | 117356 | 72048 | 20231 | 51817 | 16833 | 79067 | 281105 | 1329578 | 95900 | 26403 | 79067 | 26400 | 158126 | 20193 | 30000 | 12736 |
42810 | 117089 | 72315 | 20351 | 51964 | 17062 | 80135 | 282622 | 1318303 | 97197 | 26759 | 80135 | 26759 | 160268 | 20326 | 30000 | 12796 |
42810 | 117090 | 72307 | 20348 | 51959 | 17062 | 80134 | 282692 | 1318437 | 97196 | 26759 | 80135 | 26881 | 160988 | 20237 | 30000 | 12818 |