Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
stclrl x0, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 3.000
Issues: 3.001
Integer unit issues: 1.002
Load/store unit issues: 2.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
73005 | 34592 | 3019 | 1015 | 2004 | 1002 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34229 | 3003 | 1003 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34187 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34189 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 35099 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34282 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34417 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34295 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34339 | 3002 | 1002 | 2000 | 1000 | 2000 | 7769 | 10527 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 35031 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
Code:
stclrl x0, [x6] add x6, x6, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 6.0058
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40209 | 60622 | 40463 | 20372 | 20091 | 20201 | 20005 | 115941 | 96463 | 40110 | 20205 | 20005 | 30203 | 40004 | 20007 | 20000 | 20100 |
40205 | 60110 | 40172 | 20143 | 20029 | 20134 | 20002 | 115941 | 96134 | 40104 | 20202 | 20002 | 30254 | 40070 | 20043 | 20000 | 20100 |
40204 | 60078 | 40107 | 20107 | 20000 | 20102 | 20002 | 115899 | 96067 | 40104 | 20202 | 20002 | 30203 | 40004 | 20003 | 20000 | 20100 |
40204 | 60055 | 40103 | 20103 | 20000 | 20102 | 20002 | 115907 | 96091 | 40104 | 20202 | 20002 | 30203 | 40004 | 20003 | 20000 | 20100 |
40204 | 60055 | 40103 | 20103 | 20000 | 20102 | 20002 | 115917 | 96109 | 40104 | 20202 | 20002 | 30203 | 40004 | 20003 | 20000 | 20100 |
40204 | 60055 | 40103 | 20103 | 20000 | 20102 | 20002 | 115913 | 96105 | 40104 | 20202 | 20002 | 30203 | 40004 | 20003 | 20000 | 20100 |
40204 | 60055 | 40103 | 20103 | 20000 | 20102 | 20036 | 116176 | 96662 | 40172 | 20236 | 20036 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60087 | 40117 | 20117 | 20000 | 20102 | 20005 | 116016 | 96525 | 40110 | 20205 | 20005 | 30208 | 40009 | 20019 | 20000 | 20100 |
40204 | 60058 | 40117 | 20117 | 20000 | 20102 | 20002 | 116151 | 96318 | 40104 | 20202 | 20002 | 30203 | 40004 | 20017 | 20000 | 20100 |
40204 | 60058 | 40117 | 20117 | 20000 | 20102 | 20002 | 116151 | 96316 | 40104 | 20202 | 20002 | 30203 | 40004 | 20016 | 20000 | 20100 |
Result (median cycles for code): 6.0055
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40029 | 60624 | 40364 | 20277 | 20087 | 20107 | 20002 | 115848 | 96152 | 40014 | 20022 | 20002 | 30023 | 40004 | 20006 | 20000 | 20010 |
40024 | 60055 | 40013 | 20013 | 20000 | 20010 | 20000 | 115717 | 96051 | 40010 | 20020 | 20000 | 30020 | 40000 | 20003 | 20000 | 20010 |
40024 | 60055 | 40013 | 20013 | 20000 | 20010 | 20000 | 115711 | 96034 | 40010 | 20020 | 20000 | 30020 | 40000 | 20003 | 20000 | 20010 |
40025 | 60103 | 40083 | 20051 | 20032 | 20044 | 20000 | 115739 | 96090 | 40010 | 20020 | 20000 | 30020 | 40000 | 20003 | 20000 | 20010 |
40024 | 60055 | 40013 | 20013 | 20000 | 20010 | 20000 | 115713 | 96040 | 40010 | 20020 | 20000 | 30020 | 40000 | 20003 | 20000 | 20010 |
40024 | 60055 | 40013 | 20013 | 20000 | 20010 | 20000 | 115714 | 96045 | 40010 | 20020 | 20000 | 30020 | 40000 | 20003 | 20000 | 20010 |
40024 | 60055 | 40013 | 20013 | 20000 | 20010 | 20000 | 115727 | 96069 | 40010 | 20020 | 20000 | 30020 | 40000 | 20003 | 20000 | 20010 |
40024 | 60055 | 40013 | 20013 | 20000 | 20010 | 20000 | 115711 | 96039 | 40010 | 20020 | 20000 | 30020 | 40000 | 20003 | 20000 | 20010 |
40024 | 60055 | 40013 | 20013 | 20000 | 20010 | 20000 | 115727 | 96070 | 40010 | 20020 | 20000 | 30020 | 40000 | 20003 | 20000 | 20010 |
40025 | 60103 | 40086 | 20052 | 20034 | 20046 | 20000 | 115676 | 95978 | 40010 | 20020 | 20000 | 30020 | 40000 | 20008 | 20000 | 20010 |
Code:
stclrl x0, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.7704
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
30205 | 111788 | 44200 | 21250 | 22950 | 13160 | 22341 | 2022267 | 1957274 | 34504 | 12318 | 23877 | 22882 | 44812 | 0 | 20438 | 20000 | 0 | 10100 |
30204 | 109567 | 42126 | 20671 | 21455 | 11522 | 20767 | 1970663 | 1922152 | 31516 | 10850 | 21290 | 21726 | 42800 | 0 | 19711 | 20000 | 0 | 10100 |
30204 | 106879 | 39595 | 19315 | 20280 | 10149 | 20608 | 1962260 | 1912382 | 31241 | 10733 | 21042 | 21052 | 41546 | 0 | 19836 | 20000 | 0 | 10100 |
30204 | 107222 | 39580 | 19352 | 20228 | 10178 | 20024 | 1927782 | 1901230 | 30143 | 10219 | 20036 | 20940 | 41404 | 0 | 19551 | 20000 | 0 | 10100 |
30204 | 107704 | 40934 | 19971 | 20963 | 10881 | 20403 | 1949419 | 1903631 | 30848 | 10545 | 20670 | 20600 | 40742 | 0 | 19334 | 20000 | 0 | 10100 |
30204 | 106986 | 40084 | 19511 | 20573 | 10621 | 20342 | 1933437 | 1893900 | 30723 | 10481 | 20567 | 21584 | 42618 | 0 | 19672 | 20000 | 0 | 10100 |
30204 | 106382 | 39806 | 19340 | 20466 | 10301 | 20504 | 1943233 | 1898973 | 30985 | 10581 | 20743 | 20714 | 40956 | 0 | 19407 | 20000 | 0 | 10100 |
30204 | 107553 | 40501 | 19878 | 20623 | 10656 | 20455 | 1937184 | 1898715 | 30929 | 10574 | 20698 | 21720 | 42870 | 0 | 19824 | 20000 | 0 | 10100 |
30204 | 108106 | 40452 | 19922 | 20530 | 10536 | 20350 | 1973419 | 1917407 | 30760 | 10544 | 20590 | 20856 | 40957 | 0 | 19523 | 20000 | 0 | 10100 |
30204 | 105867 | 39649 | 19208 | 20441 | 10326 | 20672 | 1942539 | 1895899 | 31413 | 10845 | 21265 | 21150 | 41841 | 0 | 19593 | 20000 | 0 | 10100 |
Result (median cycles for code): 11.4154
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30025 | 114608 | 46303 | 22649 | 23654 | 14190 | 24791 | 2081601 | 2004669 | 38793 | 14015 | 27714 | 28954 | 55824 | 22233 | 20000 | 10010 |
30024 | 113939 | 46134 | 22379 | 23755 | 13710 | 24366 | 2079185 | 2004148 | 38210 | 13860 | 27419 | 27073 | 53005 | 22424 | 20000 | 10010 |
30024 | 113600 | 46117 | 22179 | 23938 | 14054 | 24544 | 2076427 | 2001013 | 38535 | 14004 | 27588 | 28240 | 54878 | 22241 | 20000 | 10010 |
30025 | 114778 | 46524 | 22348 | 24176 | 14186 | 24820 | 2086045 | 2010071 | 39221 | 14414 | 28115 | 27780 | 54067 | 22152 | 20000 | 10010 |
30024 | 113989 | 45996 | 22063 | 23933 | 13961 | 24553 | 2068930 | 1995137 | 38572 | 14033 | 27671 | 28141 | 54286 | 22396 | 20000 | 10010 |
30024 | 114634 | 46707 | 22392 | 24315 | 14278 | 24550 | 2080418 | 2004368 | 38611 | 14075 | 27601 | 27682 | 53517 | 22258 | 20000 | 10010 |
30024 | 113953 | 46114 | 22351 | 23763 | 13856 | 24892 | 2076478 | 2000910 | 39346 | 14468 | 28270 | 27819 | 53825 | 22179 | 20000 | 10010 |
30024 | 113993 | 46476 | 22291 | 24185 | 14233 | 24782 | 2074616 | 2000057 | 39068 | 14300 | 28061 | 29426 | 56698 | 22123 | 20000 | 10010 |
30024 | 114526 | 46944 | 22783 | 24161 | 13915 | 23270 | 2101133 | 2022562 | 36047 | 12787 | 25291 | 28283 | 55116 | 22299 | 20000 | 10010 |
30024 | 113934 | 46469 | 22557 | 23912 | 13917 | 24734 | 2078303 | 2002465 | 38976 | 14257 | 27993 | 27538 | 53330 | 22390 | 20000 | 10010 |