Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
staddl w0, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 3.000
Issues: 3.001
Integer unit issues: 1.002
Load/store unit issues: 2.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
73005 | 35460 | 3018 | 1014 | 2004 | 1002 | 2000 | 7845 | 10597 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34539 | 3003 | 1003 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34485 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34208 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34253 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10515 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34230 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34223 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34203 | 3002 | 1002 | 2000 | 1000 | 2000 | 7761 | 10512 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34222 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34236 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
Code:
staddl w0, [x6] add x6, x6, 4
(fused SUBS/B.cc loop)
Result (median cycles for code): 6.0065
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40207 | 60391 | 40335 | 20280 | 20055 | 20169 | 20005 | 115678 | 95960 | 40110 | 20205 | 20005 | 30208 | 40009 | 20008 | 20000 | 20100 |
40204 | 60065 | 40111 | 20108 | 20003 | 20105 | 20002 | 115765 | 95665 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60065 | 40106 | 20106 | 20000 | 20102 | 20002 | 115767 | 95667 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60065 | 40106 | 20106 | 20000 | 20102 | 20002 | 115765 | 95664 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60065 | 40106 | 20106 | 20000 | 20102 | 20002 | 115769 | 95675 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60065 | 40106 | 20106 | 20000 | 20102 | 20002 | 115765 | 95663 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60065 | 40106 | 20106 | 20000 | 20102 | 20002 | 115773 | 95677 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60065 | 40106 | 20106 | 20000 | 20102 | 20002 | 115775 | 95684 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60065 | 40106 | 20106 | 20000 | 20102 | 20002 | 115763 | 95660 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60065 | 40106 | 20106 | 20000 | 20102 | 20002 | 115769 | 95671 | 40104 | 20202 | 20002 | 30203 | 40004 | 20006 | 20000 | 20100 |
Result (median cycles for code): 6.0062
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40027 | 60396 | 40211 | 20157 | 20054 | 20074 | 20002 | 115476 | 95528 | 40014 | 20022 | 20002 | 30073 | 40069 | 20040 | 20000 | 20010 |
40024 | 60067 | 40017 | 20017 | 20000 | 20010 | 20000 | 115466 | 95517 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115479 | 95544 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115497 | 95573 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115476 | 95533 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115483 | 95547 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115493 | 95569 | 40010 | 20020 | 20000 | 30074 | 40070 | 20044 | 20000 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115493 | 95566 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115495 | 95570 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115487 | 95557 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
Code:
staddl w0, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.7769
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30205 | 114236 | 46420 | 22368 | 24052 | 14095 | 23057 | 2038712 | 1972134 | 35788 | 12870 | 25055 | 24118 | 47085 | 20843 | 20000 | 10100 |
30204 | 109764 | 42726 | 20813 | 21913 | 12096 | 20393 | 1960991 | 1910696 | 30829 | 10542 | 20665 | 20804 | 41108 | 19316 | 20000 | 10100 |
30204 | 107085 | 39838 | 19490 | 20348 | 10237 | 20725 | 1971745 | 1914032 | 31423 | 10800 | 21181 | 22044 | 43387 | 20221 | 20000 | 10100 |
30204 | 107484 | 40364 | 19668 | 20696 | 10736 | 21563 | 1946746 | 1908271 | 33043 | 11580 | 22642 | 20896 | 41259 | 19629 | 20000 | 10100 |
30204 | 108059 | 40041 | 19710 | 20331 | 10332 | 21190 | 1973282 | 1922522 | 32309 | 11221 | 22029 | 22078 | 43531 | 20133 | 20000 | 10100 |
30204 | 108186 | 41383 | 20235 | 21148 | 11213 | 20941 | 1993964 | 1931932 | 31870 | 11035 | 21621 | 20424 | 40435 | 19430 | 20000 | 10100 |
30204 | 109466 | 42504 | 20749 | 21755 | 11704 | 21965 | 1997251 | 1936513 | 33752 | 11889 | 23192 | 22134 | 43607 | 20054 | 20000 | 10100 |
30204 | 107260 | 40317 | 19716 | 20601 | 10561 | 20758 | 1960449 | 1905580 | 31515 | 10857 | 21316 | 21518 | 42518 | 19867 | 20000 | 10100 |
30204 | 106944 | 40106 | 19508 | 20598 | 10566 | 20988 | 1941276 | 1902957 | 31891 | 11004 | 21599 | 22192 | 43700 | 20117 | 20000 | 10100 |
30204 | 106293 | 39898 | 19522 | 20376 | 10455 | 20519 | 1947573 | 1908154 | 31023 | 10605 | 20805 | 21892 | 43161 | 20010 | 20000 | 10100 |
Result (median cycles for code): 11.4091
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30025 | 114961 | 47483 | 22590 | 24893 | 14980 | 24813 | 2083617 | 2007152 | 39168 | 14367 | 28169 | 29098 | 56520 | 22417 | 20000 | 10010 |
30024 | 114777 | 47141 | 22669 | 24472 | 14332 | 24377 | 2107468 | 2028202 | 38171 | 13810 | 27123 | 28836 | 55253 | 22432 | 20000 | 10010 |
30024 | 114068 | 46387 | 22236 | 24151 | 13938 | 24595 | 2087786 | 2011546 | 38681 | 14102 | 27708 | 28004 | 54572 | 22636 | 20000 | 10010 |
30024 | 113587 | 45989 | 22376 | 23613 | 13622 | 24754 | 2084461 | 2008371 | 38917 | 14176 | 28005 | 27244 | 53507 | 22365 | 20000 | 10010 |
30027 | 114631 | 46493 | 22242 | 24251 | 14214 | 24632 | 2069769 | 1996063 | 38767 | 14150 | 27867 | 27300 | 53394 | 22461 | 20000 | 10010 |
30024 | 113815 | 46254 | 22218 | 24036 | 13968 | 24770 | 2071143 | 1997765 | 39077 | 14320 | 27996 | 27976 | 54395 | 22371 | 20000 | 10010 |
30024 | 113705 | 45978 | 22260 | 23718 | 14001 | 25035 | 2070728 | 1997519 | 39714 | 14694 | 28678 | 27312 | 52867 | 22324 | 20000 | 10010 |
30024 | 114298 | 46807 | 22467 | 24340 | 14146 | 24566 | 2081733 | 2006987 | 38685 | 14133 | 27785 | 27320 | 53464 | 22348 | 20000 | 10010 |
30024 | 113745 | 46318 | 22354 | 23964 | 14093 | 24697 | 2070323 | 1995548 | 38808 | 14124 | 28023 | 27697 | 53629 | 22532 | 20000 | 10010 |
30024 | 113627 | 46034 | 22173 | 23861 | 14286 | 24863 | 2063738 | 1990011 | 39225 | 14379 | 28275 | 27112 | 52890 | 22253 | 20000 | 10010 |