Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
casl x0, x1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.000
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74007 | 34537 | 3013 | 1 | 3012 | 3000 | 15027 | 3000 | 1000 | 3000 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34342 | 3004 | 1 | 3003 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34319 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34351 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34289 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74005 | 34333 | 3004 | 1 | 3003 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34352 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34384 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34350 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34297 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
Code:
casl x0, x1, [x6] add x6, x6, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 9.0060
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50214 | 90558 | 44016 | 13913 | 30103 | 13893 | 30039 | 43479 | 1039316 | 44455 | 20225 | 30039 | 20201 | 60006 | 15631 | 30000 | 20100 |
50204 | 90058 | 45730 | 15729 | 30001 | 15725 | 30003 | 47381 | 1039602 | 45728 | 20201 | 30003 | 20201 | 60006 | 15629 | 30000 | 20100 |
50204 | 90058 | 45730 | 15729 | 30001 | 15725 | 30003 | 47380 | 1039592 | 45728 | 20201 | 30003 | 20201 | 60006 | 15629 | 30000 | 20100 |
50204 | 90058 | 45730 | 15729 | 30001 | 15725 | 30003 | 47381 | 1039596 | 45728 | 20201 | 30003 | 20201 | 60006 | 15629 | 30000 | 20100 |
50204 | 90058 | 45730 | 15729 | 30001 | 15725 | 30003 | 47382 | 1039593 | 45728 | 20201 | 30003 | 20201 | 60006 | 15629 | 30000 | 20100 |
50204 | 90058 | 45730 | 15729 | 30001 | 15725 | 30003 | 47381 | 1039596 | 45728 | 20201 | 30003 | 20201 | 60006 | 15629 | 30000 | 20100 |
50205 | 90102 | 44699 | 14668 | 30031 | 14664 | 30003 | 47381 | 1039602 | 45728 | 20201 | 30003 | 20201 | 60006 | 15629 | 30000 | 20100 |
50204 | 90058 | 45730 | 15729 | 30001 | 15725 | 30003 | 47380 | 1039590 | 45728 | 20201 | 30003 | 20201 | 60006 | 15629 | 30000 | 20100 |
50204 | 90058 | 45730 | 15729 | 30001 | 15725 | 30003 | 47381 | 1039594 | 45728 | 20201 | 30003 | 20201 | 60006 | 15629 | 30000 | 20100 |
50204 | 90058 | 45730 | 15729 | 30001 | 15725 | 30003 | 47381 | 1039596 | 45728 | 20201 | 30003 | 20201 | 60006 | 15629 | 30000 | 20100 |
Result (median cycles for code): 9.0060
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50034 | 90555 | 43918 | 13817 | 30101 | 13800 | 30003 | 47094 | 1039868 | 45638 | 20021 | 30003 | 20020 | 60000 | 15630 | 30000 | 20010 |
50024 | 90060 | 45640 | 15640 | 30000 | 15635 | 30039 | 43365 | 1039600 | 44414 | 20045 | 30039 | 20020 | 60000 | 15630 | 30000 | 20010 |
50024 | 90060 | 45640 | 15640 | 30000 | 15635 | 30000 | 47093 | 1039766 | 45635 | 20020 | 30000 | 20020 | 60000 | 15630 | 30000 | 20010 |
50024 | 90060 | 45640 | 15640 | 30000 | 15635 | 30000 | 47093 | 1039766 | 45635 | 20020 | 30000 | 20020 | 60000 | 15630 | 30000 | 20010 |
50024 | 90060 | 45640 | 15640 | 30000 | 15635 | 30000 | 47093 | 1039766 | 45635 | 20020 | 30000 | 20020 | 60000 | 15630 | 30000 | 20010 |
50024 | 90060 | 45640 | 15640 | 30000 | 15635 | 30000 | 47093 | 1039766 | 45635 | 20020 | 30000 | 20020 | 60000 | 15630 | 30000 | 20010 |
50024 | 90060 | 45640 | 15640 | 30000 | 15635 | 30000 | 47093 | 1039766 | 45635 | 20020 | 30000 | 20020 | 60000 | 15630 | 30000 | 20010 |
50024 | 90060 | 45640 | 15640 | 30000 | 15635 | 30000 | 47093 | 1039766 | 45635 | 20020 | 30000 | 20020 | 60000 | 15630 | 30000 | 20010 |
50024 | 90060 | 45640 | 15640 | 30000 | 15635 | 30000 | 47093 | 1039766 | 45635 | 20020 | 30000 | 20020 | 60000 | 15630 | 30000 | 20010 |
50024 | 90060 | 45640 | 15640 | 30000 | 15635 | 30000 | 47093 | 1039766 | 45635 | 20020 | 30000 | 20020 | 60000 | 15630 | 30000 | 20010 |
Code:
casl x0, x1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 11.5132
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
42961 | 115919 | 71065 | 19847 | 51218 | 17094 | 79711 | 273994 | 1259512 | 96978 | 27160 | 79712 | 27202 | 159631 | 19380 | 30000 | 12899 |
42954 | 114862 | 71436 | 20077 | 51359 | 16945 | 78922 | 265443 | 1302440 | 95977 | 27006 | 79026 | 27214 | 159235 | 19695 | 30000 | 12901 |
43006 | 115596 | 71005 | 19891 | 51114 | 17111 | 79260 | 278949 | 1279616 | 96408 | 27047 | 79269 | 26596 | 157176 | 19573 | 30000 | 12798 |
43000 | 114187 | 69634 | 19293 | 50341 | 16993 | 78900 | 272655 | 1293955 | 95836 | 26829 | 78903 | 27195 | 159075 | 19437 | 30000 | 12897 |
43003 | 114156 | 70307 | 19694 | 50613 | 17295 | 79743 | 287510 | 1240965 | 96920 | 26986 | 79750 | 26735 | 156843 | 19367 | 30000 | 12806 |
42911 | 110077 | 68899 | 18813 | 50086 | 16626 | 78945 | 280939 | 1309946 | 95938 | 26922 | 79022 | 26608 | 156687 | 18857 | 30000 | 12833 |
43002 | 115611 | 71120 | 20187 | 50933 | 17296 | 80405 | 259980 | 1182847 | 97860 | 27458 | 80410 | 27113 | 158578 | 19409 | 30000 | 12899 |
43096 | 114231 | 71210 | 19517 | 51693 | 17385 | 79804 | 277948 | 1257155 | 97102 | 27205 | 79813 | 26723 | 157206 | 17984 | 30000 | 12844 |
42997 | 115598 | 70295 | 19642 | 50653 | 17181 | 79069 | 281224 | 1307044 | 96111 | 26905 | 79075 | 27149 | 159167 | 19385 | 30000 | 12900 |
43101 | 115596 | 70375 | 19581 | 50794 | 17493 | 79465 | 282921 | 1287617 | 96673 | 26985 | 79467 | 27103 | 159215 | 19590 | 30000 | 12902 |
Result (median cycles for code): 11.7314
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
42812 | 117244 | 67610 | 15581 | 52029 | 12977 | 79089 | 281028 | 1329592 | 95926 | 26412 | 79092 | 26431 | 158295 | 16914 | 30000 | 12735 |
42810 | 117089 | 72306 | 20348 | 51958 | 17062 | 79361 | 281963 | 1326461 | 96255 | 26499 | 79361 | 26393 | 158084 | 20186 | 30000 | 12735 |
42810 | 117090 | 72306 | 20347 | 51959 | 17062 | 80282 | 269160 | 1318762 | 96923 | 26811 | 80285 | 26417 | 158228 | 20216 | 30000 | 12739 |
42750 | 117356 | 72050 | 20233 | 51817 | 16833 | 79067 | 281143 | 1329489 | 95896 | 26401 | 79067 | 26780 | 160394 | 19790 | 30000 | 12794 |
42810 | 117089 | 72307 | 20349 | 51958 | 17062 | 80135 | 282622 | 1318302 | 97197 | 26759 | 80135 | 26759 | 160268 | 20326 | 30000 | 12796 |
42749 | 117327 | 72028 | 20216 | 51812 | 16827 | 79067 | 281103 | 1329580 | 95900 | 26403 | 79067 | 26401 | 158132 | 20195 | 30000 | 12736 |
42750 | 117356 | 72050 | 20233 | 51817 | 16833 | 79067 | 281143 | 1329489 | 95896 | 26401 | 79067 | 26759 | 160268 | 20326 | 30000 | 12796 |
42810 | 117090 | 72307 | 20348 | 51959 | 17062 | 80134 | 282675 | 1318360 | 97196 | 26759 | 80135 | 26762 | 160286 | 20339 | 30000 | 12796 |
42810 | 117086 | 72301 | 20347 | 51954 | 17062 | 80134 | 282675 | 1318359 | 97196 | 26759 | 80135 | 26772 | 160334 | 19723 | 30000 | 12794 |
42832 | 117001 | 72395 | 20385 | 52010 | 17145 | 80129 | 282610 | 1318152 | 97188 | 26757 | 80129 | 26757 | 160256 | 20311 | 30000 | 12796 |