Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
casl w0, w1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.001
Issues: 3.003
Integer unit issues: 0.001
Load/store unit issues: 3.003
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74007 | 34998 | 3013 | 1 | 3012 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34679 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34813 | 3001 | 1 | 3000 | 3000 | 15012 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34248 | 3001 | 1 | 3000 | 3006 | 15088 | 3006 | 1002 | 3006 | 1000 | 6000 | 1 | 3000 | 1000 |
74005 | 34550 | 3004 | 1 | 3003 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34521 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34723 | 3001 | 1 | 3000 | 3000 | 15027 | 3000 | 1000 | 3000 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34528 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34517 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34354 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
Code:
casl w0, w1, [x6] add x6, x6, 4
(fused SUBS/B.cc loop)
Result (median cycles for code): 9.0054
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
50212 | 90499 | 44006 | 13907 | 30099 | 13903 | 30003 | 47274 | 1039158 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 0 | 20100 |
50204 | 90051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47234 | 1039103 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 0 | 20100 |
50204 | 90051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47234 | 1039102 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 0 | 20100 |
50204 | 90051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47234 | 1039104 | 45728 | 20201 | 30003 | 20225 | 60078 | 15296 | 30000 | 0 | 20100 |
50204 | 90051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47234 | 1039099 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 0 | 20100 |
50204 | 90051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47234 | 1039105 | 45728 | 20201 | 30003 | 20225 | 60078 | 14070 | 30000 | 0 | 20100 |
50205 | 90095 | 45291 | 15260 | 30031 | 15259 | 30003 | 47236 | 1039107 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 0 | 20100 |
50204 | 90051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47240 | 1039121 | 45728 | 20201 | 30003 | 20297 | 60294 | 15690 | 30000 | 0 | 20100 |
50204 | 90217 | 45783 | 15752 | 30031 | 15743 | 30003 | 47234 | 1039078 | 45728 | 20201 | 30003 | 20225 | 60078 | 15644 | 30000 | 0 | 20100 |
50205 | 90179 | 44707 | 14646 | 30061 | 14644 | 30003 | 47239 | 1039012 | 45728 | 20201 | 30003 | 20249 | 60150 | 15656 | 30000 | 0 | 20100 |
Result (median cycles for code): 9.0060
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50030 | 90347 | 43855 | 13792 | 30063 | 13790 | 30003 | 47001 | 1039531 | 45638 | 20021 | 30003 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90060 | 45636 | 15636 | 30000 | 15635 | 30000 | 47001 | 1039508 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90060 | 45636 | 15636 | 30000 | 15635 | 30000 | 47001 | 1039509 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90060 | 45636 | 15636 | 30000 | 15635 | 30000 | 47001 | 1039509 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50025 | 90104 | 44495 | 14464 | 30031 | 14463 | 30000 | 47001 | 1039511 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90060 | 45636 | 15636 | 30000 | 15635 | 30000 | 47001 | 1039510 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90060 | 45636 | 15636 | 30000 | 15635 | 30000 | 47001 | 1039510 | 45635 | 20020 | 30000 | 20045 | 60078 | 15127 | 30000 | 20010 |
50024 | 90060 | 45636 | 15636 | 30000 | 15635 | 30000 | 47016 | 1039538 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90060 | 45636 | 15636 | 30000 | 15635 | 30000 | 47001 | 1039509 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90060 | 45636 | 15636 | 30000 | 15635 | 30000 | 47001 | 1039509 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
Code:
casl w0, w1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 11.5594
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
42953 | 115990 | 66153 | 14939 | 0 | 51214 | 12898 | 0 | 79116 | 281258 | 1263510 | 0 | 96199 | 27033 | 79121 | 0 | 26909 | 157752 | 19002 | 30000 | 12894 |
43001 | 115630 | 70318 | 19689 | 0 | 50629 | 16902 | 0 | 79130 | 281959 | 1313913 | 0 | 96190 | 26885 | 79135 | 0 | 26885 | 158230 | 19948 | 30000 | 12839 |
42998 | 115752 | 71170 | 19776 | 0 | 51394 | 17007 | 0 | 79610 | 287602 | 1292522 | 0 | 96810 | 27205 | 79615 | 0 | 26912 | 157423 | 18863 | 30000 | 12896 |
43002 | 115536 | 69812 | 19036 | 0 | 50776 | 17056 | 0 | 79693 | 281709 | 1263821 | 0 | 96740 | 26970 | 79698 | 0 | 26937 | 159205 | 19105 | 30000 | 12900 |
43002 | 115597 | 70688 | 19684 | 0 | 51004 | 17189 | 0 | 79250 | 280315 | 1281552 | 0 | 96242 | 26851 | 79353 | 0 | 27003 | 159001 | 19292 | 30000 | 12898 |
43003 | 115660 | 70275 | 19320 | 0 | 50955 | 16535 | 0 | 78709 | 263699 | 1320095 | 0 | 95704 | 26902 | 78716 | 0 | 26751 | 157465 | 19317 | 30000 | 12836 |
42961 | 116259 | 70597 | 19602 | 0 | 50995 | 16983 | 0 | 79744 | 270776 | 1295458 | 0 | 96842 | 27019 | 79843 | 0 | 27890 | 164904 | 19781 | 30000 | 13072 |
42980 | 115461 | 70359 | 19530 | 0 | 50829 | 17052 | 0 | 80335 | 188106 | 1205862 | 0 | 93799 | 27417 | 80341 | 0 | 26978 | 158907 | 19093 | 30000 | 12900 |
43004 | 115582 | 69204 | 19104 | 0 | 50100 | 16981 | 0 | 78679 | 256344 | 1306476 | 0 | 95777 | 26924 | 78780 | 0 | 26914 | 158171 | 18878 | 30000 | 12907 |
42943 | 117111 | 71978 | 20309 | 0 | 51669 | 17078 | 0 | 79551 | 283595 | 1294458 | 0 | 96912 | 27166 | 79554 | 0 | 26889 | 158871 | 19427 | 30000 | 12891 |
Result (median cycles for code): 11.7124
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
42815 | 117110 | 67536 | 15544 | 51992 | 12983 | 79097 | 280757 | 1325828 | 95930 | 26407 | 79101 | 26400 | 158160 | 20162 | 30000 | 12737 |
42751 | 117171 | 71909 | 20164 | 51745 | 16824 | 79061 | 280812 | 1329095 | 95888 | 26399 | 79061 | 26763 | 160258 | 19978 | 30000 | 12797 |
42812 | 116940 | 71928 | 20011 | 51917 | 16813 | 80125 | 286019 | 1316117 | 96936 | 26761 | 80125 | 26406 | 158145 | 20147 | 30000 | 12738 |
42752 | 117255 | 71904 | 20142 | 51762 | 16828 | 79049 | 280681 | 1329791 | 95871 | 26395 | 79049 | 26689 | 159780 | 20034 | 30000 | 12785 |
42750 | 117314 | 71996 | 20205 | 51791 | 16827 | 79163 | 271133 | 1330559 | 95698 | 26442 | 79176 | 26715 | 159999 | 16088 | 30000 | 12773 |
42749 | 117375 | 71563 | 19728 | 51835 | 16414 | 80138 | 282442 | 1317976 | 97204 | 26768 | 80146 | 26395 | 158101 | 20165 | 30000 | 12736 |
42810 | 117074 | 72281 | 20334 | 51947 | 17058 | 80148 | 282617 | 1316728 | 97212 | 26766 | 80148 | 26420 | 158200 | 20178 | 30000 | 12739 |
42753 | 117149 | 71966 | 20192 | 51774 | 16842 | 79152 | 280335 | 1324255 | 96011 | 26443 | 79153 | 26397 | 158079 | 20152 | 30000 | 12736 |
42810 | 117059 | 72113 | 20160 | 51953 | 16915 | 80139 | 285674 | 1317751 | 97054 | 26763 | 80139 | 26400 | 158097 | 20171 | 30000 | 12736 |
42810 | 117105 | 69640 | 17656 | 51984 | 14776 | 79482 | 283082 | 1322534 | 96301 | 26544 | 79482 | 26754 | 160221 | 19975 | 30000 | 12795 |