Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
staddl x0, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 3.000
Issues: 3.002
Integer unit issues: 1.003
Load/store unit issues: 2.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
73005 | 34213 | 3018 | 1014 | 2004 | 1002 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34069 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34076 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34073 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34073 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34073 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34076 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34073 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34073 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34073 | 3003 | 1003 | 2000 | 1000 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
Code:
staddl x0, [x6] add x6, x6, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 6.0065
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40209 | 60616 | 40458 | 20368 | 20090 | 20203 | 20005 | 115957 | 96491 | 40110 | 20205 | 20005 | 30208 | 40009 | 20008 | 20000 | 20100 |
40204 | 60062 | 40106 | 20106 | 20000 | 20102 | 20002 | 116074 | 96260 | 40104 | 20202 | 20002 | 30251 | 40068 | 20043 | 20000 | 20100 |
40204 | 60062 | 40106 | 20106 | 20000 | 20102 | 20002 | 116084 | 96277 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60062 | 40106 | 20106 | 20000 | 20102 | 20002 | 116078 | 96265 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60062 | 40106 | 20106 | 20000 | 20102 | 20002 | 116082 | 96276 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60062 | 40106 | 20106 | 20000 | 20102 | 20002 | 116078 | 96267 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60062 | 40106 | 20106 | 20000 | 20102 | 20002 | 116074 | 96261 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60062 | 40106 | 20106 | 20000 | 20102 | 20002 | 116146 | 96394 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60062 | 40106 | 20106 | 20000 | 20102 | 20005 | 116144 | 96603 | 40110 | 20205 | 20005 | 30208 | 40009 | 20019 | 20000 | 20100 |
40204 | 60058 | 40117 | 20117 | 20000 | 20102 | 20002 | 116181 | 96341 | 40104 | 20202 | 20002 | 30208 | 40009 | 20018 | 20000 | 20100 |
Result (median cycles for code): 6.0062
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40029 | 60611 | 40356 | 20273 | 20083 | 20106 | 20002 | 115919 | 96188 | 40014 | 20022 | 20002 | 30118 | 40129 | 20073 | 20000 | 20010 |
40024 | 60089 | 40016 | 20016 | 20000 | 20012 | 20005 | 114853 | 99142 | 40020 | 20025 | 20005 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60062 | 40016 | 20016 | 20000 | 20010 | 20000 | 115825 | 96112 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60062 | 40016 | 20016 | 20000 | 20010 | 20000 | 115835 | 96128 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60062 | 40016 | 20016 | 20000 | 20010 | 20000 | 115841 | 96139 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60062 | 40016 | 20016 | 20000 | 20010 | 20000 | 115813 | 96090 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60062 | 40016 | 20016 | 20000 | 20010 | 20000 | 115790 | 96049 | 40010 | 20020 | 20000 | 30113 | 40122 | 20068 | 20000 | 20010 |
40024 | 60062 | 40016 | 20016 | 20000 | 20010 | 20060 | 91283 | 106620 | 40131 | 20081 | 20060 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60062 | 40016 | 20016 | 20000 | 20010 | 20000 | 115835 | 96129 | 40010 | 20020 | 20000 | 30074 | 40070 | 20042 | 20000 | 20010 |
40024 | 60062 | 40016 | 20016 | 20000 | 20010 | 20000 | 115841 | 96138 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
Code:
staddl x0, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.7726
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30205 | 114218 | 46442 | 22379 | 24063 | 14151 | 20881 | 1961680 | 1909648 | 31771 | 10990 | 21530 | 22376 | 44080 | 20268 | 20000 | 10100 |
30205 | 107956 | 41080 | 20021 | 21059 | 11064 | 20354 | 1943910 | 1899278 | 30763 | 10513 | 20590 | 21150 | 41741 | 19632 | 20000 | 10100 |
30204 | 107983 | 41201 | 20162 | 21039 | 10884 | 21337 | 1960480 | 1920765 | 32601 | 11368 | 22242 | 23170 | 45363 | 20340 | 20000 | 10100 |
30204 | 107449 | 40886 | 19872 | 21014 | 10987 | 20543 | 1926832 | 1894327 | 31130 | 10701 | 20934 | 20795 | 41064 | 19404 | 20000 | 10100 |
30204 | 107557 | 39478 | 19255 | 20223 | 10153 | 20231 | 1947478 | 1901512 | 30537 | 10406 | 20386 | 21628 | 42632 | 19815 | 20000 | 10100 |
30205 | 106824 | 40403 | 19664 | 20739 | 10708 | 20549 | 1949479 | 1898175 | 31094 | 10645 | 20824 | 20826 | 41213 | 19490 | 20000 | 10100 |
30204 | 106836 | 40164 | 19628 | 20536 | 10608 | 20533 | 1960653 | 1907994 | 31078 | 10646 | 20870 | 20384 | 40340 | 19319 | 20000 | 10100 |
30204 | 108757 | 41522 | 20332 | 21190 | 11117 | 20560 | 1947298 | 1898424 | 31133 | 10673 | 20920 | 22402 | 44021 | 19834 | 20000 | 10100 |
30205 | 107384 | 40250 | 19649 | 20601 | 10561 | 22489 | 1923527 | 1855428 | 36935 | 15951 | 23966 | 22464 | 43968 | 20113 | 20000 | 10100 |
30204 | 109997 | 42358 | 20721 | 21637 | 11730 | 21602 | 1988804 | 1931981 | 33123 | 11650 | 22694 | 21444 | 42231 | 19776 | 20000 | 10100 |
Result (median cycles for code): 11.4086
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30025 | 115208 | 47896 | 22550 | 25346 | 14489 | 24502 | 2089961 | 2013078 | 38414 | 13923 | 27644 | 29437 | 56569 | 22400 | 20000 | 10010 |
30024 | 114547 | 46842 | 22617 | 24225 | 14161 | 24835 | 2084767 | 2008334 | 39209 | 14388 | 28231 | 27512 | 53665 | 22219 | 20000 | 10010 |
30024 | 113640 | 45773 | 22167 | 23606 | 14206 | 25217 | 2082521 | 2007300 | 40003 | 14801 | 28933 | 29038 | 55843 | 22474 | 20000 | 10010 |
30024 | 113777 | 46213 | 22141 | 24072 | 14241 | 24587 | 2088592 | 2012049 | 38841 | 14269 | 27969 | 28571 | 54989 | 22120 | 20000 | 10010 |
30024 | 113751 | 46192 | 22311 | 23881 | 13962 | 24550 | 2078771 | 2003041 | 38653 | 14119 | 27719 | 27730 | 54233 | 22289 | 20000 | 10010 |
30024 | 114253 | 46439 | 22281 | 24158 | 14414 | 24832 | 2080107 | 2004240 | 39032 | 14213 | 28031 | 27620 | 53880 | 22526 | 20000 | 10010 |
30024 | 114631 | 47048 | 22839 | 24209 | 14035 | 25238 | 2089793 | 2012997 | 39877 | 14651 | 28632 | 27734 | 54077 | 22588 | 20000 | 10010 |
30025 | 114665 | 46648 | 22406 | 24242 | 14363 | 24740 | 2081494 | 2005603 | 38956 | 14232 | 27911 | 26935 | 52968 | 22660 | 20000 | 10010 |
30024 | 113926 | 45933 | 22176 | 23757 | 14015 | 24623 | 2082827 | 2007394 | 38775 | 14165 | 27710 | 28210 | 55002 | 22471 | 20000 | 10010 |
30024 | 113959 | 46356 | 22245 | 24111 | 14449 | 24703 | 2088475 | 2011551 | 39092 | 14399 | 28092 | 28088 | 54424 | 22413 | 20000 | 10010 |