Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
caslb w0, w1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.001
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74008 | 34590 | 3013 | 1 | 0 | 3012 | 0 | 0 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34332 | 3004 | 1 | 0 | 3003 | 0 | 0 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34307 | 3004 | 1 | 0 | 3003 | 0 | 0 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34313 | 3004 | 1 | 0 | 3003 | 0 | 0 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34311 | 3004 | 1 | 0 | 3003 | 0 | 0 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34319 | 3004 | 1 | 0 | 3003 | 0 | 0 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34297 | 3004 | 1 | 0 | 3003 | 0 | 0 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34332 | 3004 | 1 | 0 | 3003 | 0 | 0 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34312 | 3004 | 1 | 0 | 3003 | 0 | 0 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34330 | 3004 | 1 | 0 | 3003 | 0 | 0 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
Code:
caslb w0, w1, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 9.0054
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50208 | 90248 | 43919 | 13875 | 30044 | 13875 | 30003 | 47281 | 1039107 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 90060 | 45727 | 15726 | 30001 | 15725 | 30003 | 47241 | 1039033 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 90054 | 45727 | 15726 | 30001 | 15725 | 30003 | 47241 | 1039033 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 90054 | 45727 | 15726 | 30001 | 15725 | 30003 | 47241 | 1039039 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 90054 | 45727 | 15726 | 30001 | 15725 | 30003 | 47241 | 1039029 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 90054 | 45727 | 15726 | 30001 | 15725 | 30003 | 47241 | 1039035 | 45728 | 20201 | 30003 | 20225 | 60078 | 14975 | 30000 | 20100 |
50204 | 90054 | 45727 | 15726 | 30001 | 15725 | 30003 | 47241 | 1039033 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 90054 | 45727 | 15726 | 30001 | 15725 | 30003 | 47241 | 1039031 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 90054 | 45727 | 15726 | 30001 | 15725 | 30003 | 47241 | 1039031 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 90054 | 45727 | 15726 | 30001 | 15725 | 30003 | 47241 | 1039030 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
Result (median cycles for code): 9.0051
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50028 | 90249 | 43830 | 13785 | 30045 | 13785 | 30003 | 46971 | 1039314 | 45638 | 20021 | 30003 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46971 | 1039276 | 45635 | 20020 | 30000 | 20045 | 60078 | 15227 | 30000 | 20010 |
50024 | 90054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46971 | 1039267 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46971 | 1039274 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46971 | 1039273 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46971 | 1039282 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46971 | 1039278 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 90054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46971 | 1039275 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50025 | 90098 | 44854 | 14823 | 30031 | 14821 | 30000 | 46971 | 1039287 | 45635 | 20020 | 30000 | 20021 | 60006 | 15626 | 30000 | 20010 |
50024 | 90051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46964 | 1039238 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
Code:
caslb w0, w1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 11.5584
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
42944 | 117520 | 66786 | 15167 | 0 | 51619 | 12923 | 0 | 78722 | 271634 | 1335843 | 95677 | 26730 | 78726 | 28414 | 167164 | 19445 | 30000 | 13128 |
43007 | 115595 | 70251 | 19596 | 0 | 50655 | 16911 | 0 | 80111 | 280885 | 1260205 | 97390 | 27255 | 80125 | 27241 | 159431 | 19567 | 30000 | 12905 |
42973 | 114731 | 70433 | 19529 | 0 | 50904 | 17021 | 0 | 79196 | 275767 | 1274315 | 96194 | 26965 | 79233 | 26965 | 158829 | 19571 | 30000 | 12865 |
43001 | 115579 | 70463 | 19547 | 0 | 50916 | 17145 | 0 | 79502 | 296246 | 1304501 | 96601 | 26904 | 79506 | 27105 | 158530 | 19296 | 30000 | 12899 |
43000 | 114289 | 69222 | 18963 | 0 | 50259 | 16730 | 0 | 79004 | 269835 | 1302937 | 95906 | 26998 | 79010 | 27099 | 158494 | 19089 | 30000 | 12898 |
42998 | 113615 | 69112 | 18938 | 0 | 50174 | 16697 | 0 | 78994 | 265405 | 1265886 | 95892 | 27000 | 79000 | 26898 | 158954 | 19224 | 30000 | 12895 |
42991 | 114458 | 70717 | 19779 | 0 | 50938 | 17239 | 0 | 79410 | 259105 | 1262372 | 96510 | 26878 | 79418 | 27010 | 159631 | 19495 | 30000 | 12899 |
43001 | 116869 | 69362 | 18789 | 0 | 50573 | 16470 | 0 | 80514 | 271926 | 1254793 | 97987 | 27272 | 80616 | 27287 | 161322 | 19437 | 30000 | 12996 |
43005 | 115625 | 70309 | 19684 | 0 | 50625 | 17198 | 0 | 79070 | 276510 | 1302852 | 96115 | 26858 | 79110 | 26828 | 158522 | 19011 | 30000 | 12901 |
43007 | 115584 | 65419 | 14763 | 0 | 50656 | 13483 | 0 | 80567 | 275275 | 1214949 | 98167 | 27506 | 80620 | 27300 | 159700 | 18994 | 30000 | 12898 |
Result (median cycles for code): 11.7276
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
42812 | 117190 | 67604 | 15571 | 52033 | 12973 | 79079 | 280943 | 1328862 | 0 | 95916 | 26413 | 79087 | 0 | 26399 | 158103 | 20199 | 30000 | 12735 |
42749 | 117306 | 72033 | 20222 | 51811 | 16827 | 79155 | 214662 | 1330369 | 0 | 93957 | 26437 | 79161 | 0 | 26399 | 158108 | 20187 | 30000 | 12736 |
42760 | 117288 | 71537 | 19669 | 51868 | 16398 | 79814 | 282611 | 1322225 | 0 | 96806 | 26654 | 79814 | 0 | 26403 | 158132 | 20211 | 30000 | 12736 |
42750 | 117366 | 72051 | 20234 | 51817 | 16833 | 79067 | 281095 | 1329591 | 0 | 95896 | 26401 | 79067 | 0 | 26759 | 160251 | 20009 | 30000 | 12796 |
42810 | 117082 | 71963 | 20018 | 51945 | 16818 | 80121 | 286501 | 1318128 | 0 | 96937 | 26757 | 80121 | 0 | 26757 | 160239 | 19987 | 30000 | 12796 |
42812 | 117097 | 67959 | 15967 | 51992 | 13377 | 80821 | 271157 | 1340938 | 1167 | 96836 | 29284 | 81070 | 8 | 26402 | 158138 | 20159 | 30000 | 12736 |
42773 | 117172 | 71923 | 20100 | 51823 | 16824 | 80347 | 288022 | 1313537 | 0 | 97185 | 26851 | 80353 | 0 | 26850 | 160690 | 19935 | 30000 | 12808 |
42822 | 116962 | 71940 | 19961 | 51979 | 16830 | 81207 | 293340 | 1306654 | 0 | 98013 | 27119 | 81207 | 0 | 26449 | 158391 | 20128 | 30000 | 12745 |
42810 | 117097 | 71963 | 20020 | 51943 | 16817 | 80127 | 287000 | 1317793 | 0 | 96947 | 26759 | 80127 | 0 | 26755 | 160237 | 19975 | 30000 | 12796 |
42750 | 117321 | 71981 | 20194 | 51787 | 16824 | 79057 | 280585 | 1328447 | 0 | 95877 | 26399 | 79057 | 0 | 26400 | 158102 | 20158 | 30000 | 12736 |