Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
steorlh w0, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 3.000
Issues: 3.002
Integer unit issues: 1.003
Load/store unit issues: 2.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
73005 | 34458 | 3018 | 1014 | 2004 | 1002 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34436 | 3004 | 1004 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34343 | 3003 | 1003 | 2000 | 1000 | 2000 | 7773 | 10527 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34463 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34103 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34099 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34100 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34101 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34087 | 3003 | 1003 | 2000 | 1000 | 2000 | 7775 | 10531 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34102 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
Code:
steorlh w0, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 6.0058
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40206 | 60635 | 40282 | 20234 | 20048 | 20150 | 20005 | 115437 | 95713 | 40110 | 20205 | 20005 | 30208 | 40009 | 20006 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115506 | 95382 | 40104 | 20202 | 20002 | 30203 | 40004 | 20004 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115510 | 95391 | 40104 | 20202 | 20002 | 30203 | 40004 | 20004 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115521 | 95410 | 40104 | 20202 | 20002 | 30203 | 40004 | 20004 | 20000 | 20100 |
40204 | 60055 | 40106 | 20106 | 20000 | 20102 | 20002 | 115508 | 95387 | 40104 | 20202 | 20002 | 30203 | 40004 | 20004 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115514 | 95396 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115514 | 95395 | 40104 | 20202 | 20002 | 30203 | 40004 | 20004 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20036 | 111453 | 97846 | 40172 | 20236 | 20036 | 30203 | 40004 | 20004 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115512 | 95392 | 40104 | 20202 | 20002 | 30203 | 40004 | 20004 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20033 | 108024 | 99025 | 40168 | 20235 | 20033 | 30203 | 40004 | 20004 | 20000 | 20100 |
Result (median cycles for code): 6.0058
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40026 | 60282 | 40145 | 20107 | 20038 | 20060 | 20002 | 115496 | 95499 | 40014 | 20022 | 20002 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20000 | 115370 | 95384 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115362 | 95372 | 40010 | 20020 | 20000 | 30020 | 40000 | 20007 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115370 | 95385 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115360 | 95366 | 40010 | 20020 | 20000 | 30020 | 40000 | 20007 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115364 | 95372 | 40010 | 20020 | 20000 | 30023 | 40004 | 20006 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115368 | 95383 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115357 | 95360 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115354 | 95355 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115356 | 95360 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
Code:
steorlh w0, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.7472
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30205 | 108933 | 41157 | 20282 | 20875 | 10852 | 20861 | 1958449 | 1911373 | 31693 | 10932 | 21450 | 20526 | 40612 | 18978 | 20000 | 10100 |
30204 | 109209 | 40820 | 19712 | 21108 | 11063 | 20524 | 1919100 | 1898513 | 31036 | 10613 | 20829 | 20304 | 40192 | 19476 | 20000 | 10100 |
30204 | 109763 | 42811 | 20825 | 21986 | 12144 | 21704 | 2000864 | 1937969 | 33303 | 11726 | 22894 | 25041 | 48720 | 21150 | 20000 | 10100 |
30204 | 107600 | 40316 | 19739 | 20577 | 10578 | 20556 | 1953235 | 1901177 | 31097 | 10641 | 20818 | 20254 | 40101 | 18996 | 20000 | 10100 |
30204 | 106710 | 39760 | 19382 | 20378 | 10385 | 20183 | 1952623 | 1908303 | 30417 | 10334 | 20249 | 22524 | 44063 | 20091 | 20000 | 10100 |
30204 | 107650 | 41314 | 20139 | 21175 | 11170 | 20359 | 1927561 | 1888617 | 30776 | 10517 | 20622 | 20262 | 40107 | 19104 | 20000 | 10100 |
30204 | 107993 | 40750 | 20032 | 20718 | 10778 | 20767 | 1969784 | 1913662 | 31512 | 10847 | 21282 | 26524 | 51398 | 21636 | 20000 | 10100 |
30204 | 110350 | 42841 | 20951 | 21890 | 12040 | 22463 | 2033492 | 1967885 | 34717 | 12383 | 24077 | 21447 | 42386 | 20111 | 20000 | 10100 |
30204 | 108566 | 40995 | 20151 | 20844 | 10897 | 21128 | 1951935 | 1916055 | 32162 | 11176 | 21760 | 21116 | 41750 | 19783 | 20000 | 10100 |
30204 | 106318 | 40051 | 19571 | 20480 | 10416 | 20748 | 1951748 | 1904059 | 31446 | 10798 | 21121 | 24434 | 47632 | 20608 | 20000 | 10100 |
Result (median cycles for code): 11.4086
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
30025 | 115045 | 47375 | 22486 | 24889 | 14903 | 25021 | 2094822 | 2017987 | 39496 | 14487 | 28157 | 26820 | 52185 | 22811 | 20000 | 0 | 10010 |
30024 | 114478 | 46183 | 22213 | 23970 | 14730 | 24787 | 2075998 | 2001127 | 39143 | 14371 | 28124 | 27622 | 53267 | 22475 | 20000 | 0 | 10010 |
30024 | 113707 | 46446 | 22213 | 24233 | 14535 | 25253 | 2069260 | 1994689 | 39933 | 14695 | 28784 | 27910 | 54285 | 22476 | 20000 | 0 | 10010 |
30024 | 114396 | 46575 | 22310 | 24265 | 14265 | 24268 | 2098542 | 2020747 | 37963 | 13706 | 26873 | 28067 | 54388 | 22439 | 20000 | 0 | 10010 |
30024 | 113692 | 46019 | 22471 | 23548 | 13560 | 24532 | 2079624 | 2003995 | 38655 | 14136 | 27833 | 27705 | 53881 | 22347 | 20000 | 0 | 10010 |
30024 | 113673 | 46147 | 22091 | 24056 | 14439 | 23667 | 2087218 | 2010155 | 36886 | 13232 | 26146 | 28422 | 54922 | 22153 | 20000 | 0 | 10010 |
30024 | 114071 | 46508 | 22285 | 24223 | 14138 | 24336 | 2084957 | 2008941 | 38160 | 13836 | 27208 | 28094 | 54477 | 22287 | 20000 | 0 | 10010 |
30024 | 114086 | 47057 | 22375 | 24682 | 14563 | 24793 | 2070500 | 1997038 | 39003 | 14223 | 27996 | 27019 | 52876 | 22109 | 20000 | 0 | 10010 |
30024 | 113573 | 45789 | 22196 | 23593 | 13898 | 24668 | 2077940 | 2002505 | 38731 | 14076 | 27921 | 27972 | 54123 | 22267 | 20000 | 0 | 10010 |
30024 | 113658 | 46812 | 22330 | 24482 | 14437 | 24433 | 2071133 | 1997232 | 38340 | 13918 | 27524 | 59038 | 102775 | 43855 | 38682 | 4 | 23078 |