Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
caspa w0, w1, w2, w3, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 6.000
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
76007 | 34882 | 3007 | 1 | 3006 | 3000 | 0 | 11001 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34512 | 3001 | 1 | 3000 | 3000 | 0 | 11001 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34498 | 3001 | 1 | 3000 | 3000 | 0 | 11001 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34501 | 3001 | 1 | 3000 | 3000 | 0 | 11001 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34441 | 3001 | 1 | 3000 | 3000 | 0 | 11001 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34583 | 3001 | 1 | 3000 | 3000 | 0 | 11001 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76005 | 34622 | 3004 | 1 | 3003 | 3000 | 0 | 11007 | 3000 | 2000 | 4000 | 2002 | 7007 | 1 | 3000 | 3000 |
76004 | 34444 | 3001 | 1 | 3000 | 3000 | 0 | 11001 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34535 | 3001 | 1 | 3000 | 3000 | 0 | 11001 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
76004 | 34496 | 3001 | 1 | 3000 | 3000 | 0 | 11001 | 3000 | 2000 | 4000 | 2000 | 7000 | 1 | 3000 | 3000 |
Code:
caspa w0, w1, w2, w3, [x6] add x6, x6, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 17.0055
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
70220 | 170603 | 51195 | 21104 | 30091 | 21051 | 30135 | 69302 | 646954 | 53056 | 30336 | 40180 | 30303 | 70238 | 0 | 22802 | 30000 | 0 | 40100 |
70204 | 170253 | 52935 | 22876 | 30059 | 22871 | 30069 | 68942 | 645632 | 52941 | 30268 | 40092 | 30268 | 70161 | 0 | 22780 | 30000 | 0 | 40100 |
70204 | 170492 | 53042 | 22925 | 30117 | 22920 | 30003 | 68665 | 644671 | 52830 | 30202 | 40004 | 30268 | 70161 | 0 | 22771 | 30000 | 0 | 40100 |
70204 | 173441 | 54226 | 23442 | 30784 | 23437 | 30102 | 68345 | 646032 | 52751 | 30301 | 40136 | 30202 | 70007 | 0 | 22732 | 30000 | 0 | 40100 |
70204 | 170052 | 52833 | 22832 | 30001 | 22827 | 30003 | 68665 | 644671 | 52830 | 30202 | 40004 | 30202 | 70007 | 0 | 22732 | 30000 | 0 | 40100 |
70204 | 170052 | 52833 | 22832 | 30001 | 22827 | 30003 | 68665 | 644671 | 52830 | 30202 | 40004 | 30202 | 70007 | 0 | 22732 | 30000 | 0 | 40100 |
70204 | 170052 | 52833 | 22832 | 30001 | 22827 | 30003 | 68665 | 644671 | 52830 | 30202 | 40004 | 30202 | 70007 | 0 | 22732 | 30000 | 0 | 40100 |
70205 | 170141 | 51816 | 21786 | 30030 | 21781 | 30036 | 64303 | 645134 | 51374 | 30235 | 40048 | 30202 | 70007 | 0 | 22732 | 30000 | 0 | 40100 |
70204 | 170052 | 52833 | 22832 | 30001 | 22827 | 30003 | 68665 | 644685 | 52830 | 30202 | 40004 | 30202 | 70007 | 0 | 22732 | 30000 | 0 | 40100 |
70205 | 170113 | 52246 | 22216 | 30030 | 22211 | 30003 | 69929 | 645827 | 52830 | 30202 | 40004 | 30202 | 70007 | 0 | 22732 | 30000 | 0 | 40100 |
Result (median cycles for code): 17.0058
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
70039 | 170591 | 51053 | 20991 | 30062 | 20938 | 30003 | 68439 | 644791 | 52740 | 30022 | 40004 | 30022 | 70007 | 22734 | 30000 | 0 | 40010 |
70025 | 170100 | 52782 | 22752 | 30030 | 22745 | 30003 | 68439 | 644754 | 52740 | 30022 | 40004 | 30022 | 70007 | 22734 | 30000 | 0 | 40010 |
70024 | 170058 | 52745 | 22744 | 30001 | 22737 | 30003 | 68439 | 644768 | 52740 | 30022 | 40004 | 30022 | 70007 | 22734 | 30000 | 0 | 40010 |
70024 | 170058 | 52745 | 22744 | 30001 | 22737 | 30003 | 68439 | 644754 | 52740 | 30022 | 40004 | 30022 | 70007 | 22734 | 30000 | 0 | 40010 |
70024 | 170058 | 52745 | 22744 | 30001 | 22737 | 30003 | 68439 | 644754 | 52740 | 30022 | 40004 | 30055 | 70084 | 21844 | 30000 | 0 | 40010 |
70024 | 170058 | 52745 | 22744 | 30001 | 22737 | 30003 | 68439 | 644754 | 52740 | 30022 | 40004 | 30022 | 70007 | 22734 | 30000 | 0 | 40010 |
70024 | 170058 | 52745 | 22744 | 30001 | 22737 | 30003 | 68439 | 644754 | 52740 | 30022 | 40004 | 30022 | 70007 | 22734 | 30000 | 0 | 40010 |
70024 | 170058 | 52745 | 22744 | 30001 | 22737 | 30003 | 68439 | 644754 | 52740 | 30022 | 40004 | 30022 | 70007 | 22734 | 30000 | 0 | 40010 |
70024 | 170058 | 52745 | 22744 | 30001 | 22737 | 30003 | 68439 | 644754 | 52740 | 30022 | 40004 | 30022 | 70007 | 22734 | 30000 | 0 | 40010 |
70024 | 170058 | 52745 | 22744 | 30001 | 22737 | 30003 | 68439 | 644754 | 52740 | 30022 | 40004 | 30022 | 70007 | 22734 | 30000 | 0 | 40010 |
Code:
caspa w0, w1, w2, w3, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 15.0251
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
60209 | 150162 | 51997 | 21959 | 30038 | 11032 | 30030 | 1781151 | 1694318 | 42868 | 20220 | 40040 | 20220 | 70070 | 25454 | 30000 | 30104 |
60211 | 150204 | 53811 | 23761 | 30050 | 11965 | 30078 | 1574724 | 1697387 | 41441 | 20252 | 40104 | 20220 | 70070 | 25322 | 30000 | 30104 |
60206 | 150340 | 55510 | 25493 | 30017 | 12836 | 30030 | 1785604 | 1699988 | 42867 | 20220 | 40040 | 20220 | 70070 | 25385 | 30000 | 30104 |
60208 | 150628 | 55441 | 25422 | 30019 | 12837 | 30024 | 1788658 | 1704272 | 42860 | 20216 | 40032 | 20216 | 70056 | 25322 | 30000 | 30102 |
60206 | 150628 | 55439 | 25422 | 30017 | 12836 | 30024 | 1784905 | 1699326 | 42860 | 20216 | 40032 | 20216 | 70056 | 25385 | 30000 | 30102 |
60209 | 150327 | 52846 | 22798 | 30048 | 11495 | 30024 | 1784905 | 1699326 | 42860 | 20216 | 40032 | 20216 | 70056 | 25385 | 30000 | 30102 |
60208 | 150052 | 55575 | 25556 | 30019 | 12837 | 30069 | 1630034 | 1698530 | 41814 | 20248 | 40092 | 20220 | 70070 | 25456 | 30000 | 30104 |
60208 | 150055 | 55575 | 25556 | 30019 | 12837 | 30030 | 1781157 | 1694270 | 42867 | 20220 | 40040 | 20220 | 70070 | 25456 | 30000 | 30104 |
60208 | 150052 | 55575 | 25556 | 30019 | 12837 | 30021 | 1782349 | 1692078 | 42858 | 20214 | 40028 | 20220 | 70070 | 25458 | 30000 | 30104 |
60208 | 150054 | 55575 | 25556 | 30019 | 12838 | 30030 | 1785015 | 1699237 | 42868 | 20220 | 40040 | 20220 | 70070 | 25458 | 30000 | 30104 |
Result (median cycles for code): 15.0054
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
60029 | 150164 | 51901 | 21863 | 30038 | 10942 | 30027 | 1780901 | 1694462 | 42773 | 20038 | 40036 | 20038 | 70063 | 25452 | 30000 | 30014 |
60028 | 150084 | 55479 | 25472 | 30007 | 12747 | 30003 | 1781360 | 1694551 | 42740 | 20022 | 40004 | 20022 | 70007 | 25453 | 30000 | 30010 |
60024 | 150048 | 55464 | 25463 | 30001 | 12737 | 30003 | 1781348 | 1694537 | 42740 | 20022 | 40004 | 20020 | 70000 | 25455 | 30000 | 30010 |
60024 | 150048 | 55465 | 25465 | 30000 | 12737 | 30000 | 1781348 | 1694523 | 42737 | 20020 | 40000 | 20020 | 70000 | 25455 | 30000 | 30010 |
60024 | 150048 | 55465 | 25465 | 30000 | 12737 | 30042 | 1729564 | 1694443 | 42418 | 20048 | 40056 | 20038 | 70063 | 25458 | 30000 | 30014 |
60024 | 150048 | 55465 | 25465 | 30000 | 12737 | 30000 | 1781348 | 1694523 | 42737 | 20020 | 40000 | 20020 | 70000 | 25455 | 30000 | 30010 |
60024 | 150048 | 55465 | 25465 | 30000 | 12737 | 30000 | 1781348 | 1694525 | 42737 | 20020 | 40000 | 20048 | 70098 | 22713 | 30000 | 30012 |
60028 | 150088 | 55478 | 25460 | 30018 | 12747 | 30000 | 1781779 | 1695069 | 42737 | 20020 | 40000 | 20020 | 70000 | 25445 | 30000 | 30010 |
60024 | 150080 | 55455 | 25455 | 30000 | 12737 | 30033 | 1710395 | 1694755 | 42270 | 20042 | 40044 | 20038 | 70063 | 25454 | 30000 | 30014 |
60028 | 150052 | 55484 | 25466 | 30018 | 12746 | 30027 | 1780865 | 1694424 | 42773 | 20038 | 40036 | 20038 | 70063 | 25456 | 30000 | 30014 |