Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
staddlb w0, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 3.000
Issues: 3.002
Integer unit issues: 1.003
Load/store unit issues: 2.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
73005 | 34949 | 3032 | 1018 | 2014 | 1007 | 2020 | 7959 | 10823 | 3030 | 1010 | 2020 | 2026 | 4052 | 1021 | 2000 | 1000 |
73004 | 35304 | 3047 | 1021 | 2026 | 1013 | 2020 | 8026 | 11008 | 3030 | 1010 | 2020 | 2026 | 4052 | 1024 | 2000 | 1000 |
73004 | 35197 | 3047 | 1023 | 2024 | 1012 | 2024 | 7999 | 10909 | 3036 | 1012 | 2024 | 2024 | 4048 | 1020 | 2000 | 1000 |
73004 | 35369 | 3048 | 1020 | 2028 | 1014 | 2026 | 8226 | 11374 | 3039 | 1013 | 2026 | 2022 | 4044 | 1021 | 2000 | 1000 |
73004 | 35326 | 3053 | 1025 | 2028 | 1014 | 2022 | 7995 | 10854 | 3033 | 1011 | 2022 | 2026 | 4052 | 1020 | 2000 | 1000 |
73004 | 35422 | 3064 | 1032 | 2032 | 1016 | 2028 | 8080 | 11033 | 3042 | 1014 | 2028 | 2028 | 4056 | 1021 | 2000 | 1000 |
73004 | 35159 | 3043 | 1021 | 2022 | 1011 | 2019 | 8016 | 10984 | 3029 | 1010 | 2019 | 2016 | 4032 | 1014 | 2000 | 1000 |
73004 | 34777 | 3031 | 1015 | 2016 | 1008 | 2016 | 7943 | 10832 | 3024 | 1008 | 2016 | 2018 | 4036 | 1017 | 2000 | 1000 |
73004 | 34857 | 3035 | 1017 | 2018 | 1009 | 2018 | 7995 | 10931 | 3027 | 1009 | 2018 | 2018 | 4036 | 1018 | 2000 | 1000 |
73004 | 34169 | 3003 | 1003 | 2000 | 1000 | 2033 | 8233 | 11435 | 3050 | 1017 | 2033 | 2030 | 4060 | 1027 | 2000 | 1000 |
Code:
staddlb w0, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 6.0065
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40206 | 60285 | 40242 | 20201 | 20041 | 20152 | 20005 | 115561 | 95823 | 40110 | 20205 | 20005 | 30208 | 40009 | 20007 | 20000 | 20100 |
40204 | 60062 | 40105 | 20105 | 20000 | 20102 | 20002 | 115647 | 95521 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60062 | 40105 | 20105 | 20000 | 20102 | 20002 | 115650 | 95526 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60062 | 40105 | 20105 | 20000 | 20102 | 20002 | 115661 | 95545 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40205 | 60110 | 40173 | 20141 | 20032 | 20134 | 20002 | 115652 | 95530 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60062 | 40105 | 20105 | 20000 | 20102 | 20002 | 115645 | 95516 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60062 | 40105 | 20105 | 20000 | 20102 | 20002 | 115646 | 95518 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60062 | 40105 | 20105 | 20000 | 20102 | 20002 | 115654 | 95537 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60062 | 40105 | 20105 | 20000 | 20102 | 20002 | 115636 | 95498 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60062 | 40105 | 20105 | 20000 | 20102 | 20002 | 115655 | 95532 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
Result (median cycles for code): 6.0062
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
40026 | 60288 | 40145 | 20107 | 20038 | 20058 | 20002 | 115424 | 95436 | 0 | 40014 | 20022 | 20002 | 0 | 30020 | 40000 | 0 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115418 | 95428 | 0 | 40010 | 20020 | 20000 | 0 | 30020 | 40000 | 0 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115430 | 95446 | 0 | 40010 | 20020 | 20000 | 0 | 30020 | 40000 | 0 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115408 | 95414 | 0 | 40010 | 20020 | 20000 | 0 | 30020 | 40000 | 0 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115414 | 95424 | 0 | 40010 | 20020 | 20000 | 0 | 30020 | 40000 | 0 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115409 | 95414 | 0 | 40010 | 20020 | 20000 | 0 | 30020 | 40000 | 0 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115416 | 95426 | 0 | 40010 | 20020 | 20000 | 0 | 30020 | 40000 | 0 | 20006 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115417 | 95423 | 0 | 40010 | 20020 | 20000 | 0 | 30020 | 40000 | 0 | 20006 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115404 | 95405 | 0 | 40010 | 20020 | 20000 | 0 | 30020 | 40000 | 0 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40016 | 20016 | 20000 | 20010 | 20000 | 115404 | 95405 | 0 | 40010 | 20020 | 20000 | 0 | 30020 | 40000 | 0 | 20006 | 20000 | 0 | 20010 |
Code:
staddlb w0, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.7718
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
30205 | 112223 | 44707 | 21631 | 0 | 23076 | 12985 | 0 | 23339 | 2045185 | 1978543 | 36320 | 13127 | 25395 | 24685 | 47810 | 20897 | 20000 | 0 | 10100 |
30682 | 109832 | 40441 | 19801 | 0 | 20640 | 10754 | 0 | 20217 | 1923230 | 1900728 | 30501 | 10385 | 20375 | 29904 | 46120 | 23259 | 22711 | 4 | 15275 |
30204 | 110083 | 42870 | 20922 | 0 | 21948 | 12039 | 0 | 20863 | 1976419 | 1919869 | 31695 | 10932 | 21441 | 21550 | 42519 | 19810 | 20000 | 0 | 10100 |
30204 | 107730 | 39671 | 19519 | 0 | 20152 | 10133 | 0 | 20454 | 1944556 | 1897156 | 30973 | 10619 | 20832 | 21430 | 42349 | 19789 | 20000 | 0 | 10100 |
30204 | 107398 | 40192 | 19567 | 0 | 20625 | 10597 | 0 | 20197 | 1883288 | 1865832 | 30456 | 10360 | 20312 | 23946 | 46716 | 20840 | 20000 | 0 | 10100 |
30204 | 110023 | 42795 | 20956 | 0 | 21839 | 11934 | 0 | 21257 | 1975716 | 1923325 | 32423 | 11273 | 22014 | 21593 | 42595 | 19606 | 20000 | 0 | 10100 |
30204 | 107100 | 41336 | 20133 | 0 | 21203 | 11249 | 0 | 20855 | 1900805 | 1882543 | 31596 | 10842 | 21279 | 20584 | 40695 | 19033 | 20000 | 0 | 10100 |
30204 | 107267 | 40723 | 19694 | 0 | 21029 | 11081 | 0 | 20126 | 1919683 | 1902701 | 30317 | 10292 | 20173 | 20648 | 40870 | 19359 | 20000 | 0 | 10100 |
30204 | 105964 | 39603 | 19324 | 0 | 20279 | 10272 | 0 | 21096 | 1980466 | 1924148 | 32090 | 11098 | 21754 | 20912 | 41376 | 19661 | 20000 | 0 | 10100 |
30204 | 107985 | 40710 | 20035 | 0 | 20675 | 10690 | 0 | 20535 | 1955481 | 1909700 | 31101 | 10669 | 20912 | 20734 | 41003 | 19479 | 20000 | 0 | 10100 |
Result (median cycles for code): 11.4072
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
30025 | 114896 | 46561 | 22561 | 24000 | 14109 | 24959 | 2072131 | 1997412 | 39218 | 14272 | 27925 | 28284 | 54593 | 0 | 22276 | 20000 | 0 | 10010 |
30024 | 114862 | 47045 | 22477 | 24568 | 14584 | 24752 | 2097328 | 2019330 | 38822 | 14081 | 27788 | 28624 | 55383 | 0 | 22440 | 20000 | 0 | 10010 |
30024 | 114489 | 46670 | 22438 | 24232 | 14274 | 24821 | 2092704 | 2015379 | 39073 | 14266 | 28013 | 28184 | 54538 | 0 | 22457 | 20000 | 0 | 10010 |
30024 | 114545 | 47004 | 22243 | 24761 | 14807 | 24809 | 2089181 | 2012585 | 39039 | 14245 | 28009 | 28196 | 54484 | 0 | 22046 | 20000 | 0 | 10010 |
30024 | 114140 | 46235 | 22293 | 23942 | 13892 | 24309 | 2063762 | 1990439 | 38169 | 13872 | 27241 | 28184 | 54191 | 0 | 22319 | 20000 | 0 | 10010 |
30024 | 114274 | 46268 | 22726 | 23542 | 13574 | 23945 | 2084529 | 2008284 | 37436 | 13503 | 26693 | 27962 | 54417 | 0 | 22030 | 20000 | 0 | 10010 |
30024 | 113606 | 46012 | 22109 | 23903 | 13924 | 24580 | 2076470 | 2002123 | 38577 | 14009 | 27531 | 28920 | 55885 | 0 | 22059 | 20000 | 0 | 10010 |
30024 | 113800 | 45968 | 22427 | 23541 | 13467 | 24797 | 2081308 | 2004964 | 38871 | 14086 | 27930 | 28442 | 55246 | 0 | 22431 | 20000 | 0 | 10010 |
30024 | 114083 | 46333 | 22183 | 24150 | 14414 | 24857 | 2084953 | 2008551 | 39226 | 14382 | 28182 | 28426 | 54816 | 0 | 22154 | 20000 | 0 | 10010 |
30024 | 114200 | 46363 | 22132 | 24231 | 14344 | 24529 | 2082394 | 2006494 | 38451 | 13936 | 27554 | 28286 | 54472 | 0 | 22168 | 20000 | 0 | 10010 |