Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
stclrlh w0, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 3.000
Issues: 3.002
Integer unit issues: 1.003
Load/store unit issues: 2.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
73005 | 34545 | 3018 | 1014 | 2004 | 1002 | 2000 | 7767 | 10518 | 3000 | 1000 | 2000 | 2000 | 4000 | 0 | 1002 | 2000 | 0 | 1000 |
73004 | 34213 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 0 | 1002 | 2000 | 0 | 1000 |
73004 | 34169 | 3002 | 1002 | 2000 | 1000 | 2000 | 7764 | 10515 | 3000 | 1000 | 2000 | 2000 | 4000 | 0 | 1002 | 2000 | 0 | 1000 |
73004 | 34165 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 0 | 1002 | 2000 | 0 | 1000 |
73004 | 34126 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 0 | 1002 | 2000 | 0 | 1000 |
73004 | 34162 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 0 | 1002 | 2000 | 0 | 1000 |
73004 | 34162 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 0 | 1002 | 2000 | 0 | 1000 |
73004 | 34203 | 3003 | 1003 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 0 | 1002 | 2000 | 0 | 1000 |
73004 | 34165 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 3000 | 1000 | 2000 | 2000 | 4000 | 0 | 1002 | 2000 | 0 | 1000 |
73004 | 34168 | 3002 | 1002 | 2000 | 1000 | 2000 | 7810 | 10561 | 3000 | 1000 | 2000 | 2000 | 4000 | 0 | 1006 | 2000 | 0 | 1000 |
Code:
stclrlh w0, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 6.0058
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40206 | 60420 | 40258 | 20210 | 20048 | 20150 | 20005 | 115569 | 95838 | 40110 | 20205 | 20005 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60062 | 40112 | 20109 | 20003 | 20105 | 20005 | 115592 | 95858 | 40110 | 20205 | 20005 | 30203 | 40004 | 20007 | 20000 | 20100 |
40204 | 60055 | 40107 | 20107 | 20000 | 20102 | 20002 | 115674 | 95548 | 40104 | 20202 | 20002 | 30208 | 40009 | 20009 | 20000 | 20100 |
40204 | 60055 | 40107 | 20107 | 20000 | 20102 | 20002 | 115680 | 95565 | 40104 | 20202 | 20002 | 30203 | 40004 | 20007 | 20000 | 20100 |
40204 | 60055 | 40105 | 20105 | 20000 | 20102 | 20002 | 115682 | 95567 | 40104 | 20202 | 20002 | 30203 | 40004 | 20007 | 20000 | 20100 |
40204 | 60055 | 40105 | 20105 | 20000 | 20102 | 20002 | 115670 | 95548 | 40104 | 20202 | 20002 | 30254 | 40070 | 20044 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115688 | 95578 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60055 | 40105 | 20105 | 20000 | 20102 | 20005 | 115567 | 95833 | 40110 | 20205 | 20005 | 30203 | 40004 | 20007 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115649 | 95526 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60055 | 40105 | 20105 | 20000 | 20102 | 20002 | 115690 | 95572 | 40104 | 20202 | 20002 | 30203 | 40004 | 20007 | 20000 | 20100 |
Result (median cycles for code): 6.0062
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
40027 | 60424 | 40205 | 20138 | 20067 | 20088 | 20002 | 115420 | 95430 | 40014 | 20022 | 20002 | 30020 | 40000 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115453 | 95475 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 0 | 20010 |
40024 | 60072 | 40015 | 20015 | 20000 | 20010 | 20000 | 115416 | 95425 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115419 | 95430 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115416 | 95426 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115448 | 95466 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115408 | 95412 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40015 | 20015 | 20000 | 20010 | 20000 | 115414 | 95422 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 0 | 20010 |
40024 | 60062 | 40017 | 20017 | 20000 | 20010 | 20036 | 95825 | 104291 | 40082 | 20056 | 20036 | 30020 | 40000 | 20005 | 20000 | 0 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20000 | 115421 | 95435 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 0 | 20010 |
Code:
stclrlh w0, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.7550
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
30205 | 113798 | 46334 | 22053 | 24281 | 14322 | 24133 | 2054782 | 1985304 | 37900 | 13935 | 26668 | 23988 | 46940 | 0 | 20595 | 20000 | 0 | 10100 |
30204 | 107500 | 40296 | 19773 | 20523 | 10536 | 21012 | 1981568 | 1927369 | 32022 | 11111 | 21762 | 20616 | 40780 | 0 | 19267 | 20000 | 0 | 10100 |
30204 | 105849 | 39122 | 18892 | 20230 | 10203 | 21429 | 1956050 | 1924828 | 32715 | 11387 | 22292 | 21812 | 42919 | 0 | 19814 | 20000 | 0 | 10100 |
30204 | 106374 | 39820 | 19535 | 20285 | 10341 | 20115 | 1912632 | 1875194 | 30297 | 10282 | 20169 | 20596 | 40710 | 0 | 19237 | 20000 | 0 | 10100 |
30204 | 110445 | 43433 | 21106 | 22327 | 12319 | 21428 | 1969846 | 1918240 | 32680 | 11354 | 22102 | 22433 | 43721 | 0 | 20050 | 20000 | 0 | 10100 |
30204 | 106960 | 39541 | 19395 | 20146 | 10202 | 21244 | 1978928 | 1927883 | 32409 | 11267 | 22026 | 21486 | 42167 | 0 | 19709 | 20000 | 0 | 10100 |
30204 | 107550 | 40714 | 19884 | 20830 | 10832 | 22373 | 2009318 | 1949461 | 34454 | 12216 | 23734 | 22946 | 44419 | 0 | 20253 | 20285 | 1 | 10502 |
30204 | 106612 | 39937 | 19478 | 20459 | 10342 | 20748 | 1931091 | 1912244 | 31523 | 10876 | 21306 | 21255 | 41876 | 0 | 19360 | 20000 | 0 | 10100 |
30204 | 106978 | 40116 | 19647 | 20469 | 10543 | 20575 | 1954156 | 1918591 | 31165 | 10692 | 20953 | 22007 | 43315 | 0 | 20244 | 20000 | 0 | 10100 |
30204 | 109213 | 41593 | 20390 | 21203 | 11083 | 20203 | 1936398 | 1908237 | 30441 | 10339 | 20272 | 20382 | 40347 | 0 | 19282 | 20000 | 0 | 10100 |
Result (median cycles for code): 11.4179
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
57396 | 170997 | 73054 | 38738 | 108 | 34208 | 30461 | 89 | 25148 | 2091432 | 2014469 | 39743 | 14606 | 28693 | 28950 | 55674 | 22548 | 20000 | 10010 |
30024 | 114632 | 46847 | 22620 | 0 | 24227 | 14020 | 0 | 24642 | 2069357 | 1994606 | 38818 | 14188 | 27813 | 29116 | 55956 | 22337 | 20000 | 10010 |
30024 | 114390 | 46488 | 22474 | 0 | 24014 | 13710 | 0 | 24107 | 2088257 | 2012105 | 37776 | 13682 | 26839 | 28284 | 54730 | 22478 | 20000 | 10010 |
30024 | 114863 | 47296 | 22494 | 0 | 24802 | 14638 | 0 | 24473 | 2091292 | 2014011 | 38389 | 13928 | 27453 | 28396 | 54720 | 22525 | 20000 | 10010 |
30024 | 114261 | 46338 | 22681 | 0 | 23657 | 13507 | 0 | 24758 | 2084838 | 2008923 | 38960 | 14213 | 27850 | 28292 | 54916 | 22504 | 20000 | 10010 |
30025 | 114427 | 46405 | 22450 | 0 | 23955 | 13736 | 0 | 24638 | 2088938 | 2011983 | 38809 | 14183 | 27728 | 27830 | 54137 | 22439 | 20000 | 10010 |
30024 | 114610 | 47047 | 22523 | 0 | 24524 | 14192 | 0 | 25166 | 2077762 | 2002787 | 39848 | 14695 | 28820 | 28496 | 55243 | 22456 | 20000 | 10010 |
30024 | 114353 | 46849 | 22357 | 0 | 24492 | 14377 | 0 | 24702 | 2087324 | 2010782 | 38777 | 14088 | 27842 | 26790 | 52403 | 22509 | 20000 | 10010 |
30024 | 114359 | 46632 | 22548 | 0 | 24084 | 14149 | 0 | 24641 | 2077530 | 2002766 | 38660 | 14030 | 27839 | 27656 | 53864 | 22319 | 20000 | 10010 |
30024 | 113712 | 46115 | 22075 | 0 | 24040 | 14306 | 0 | 23984 | 2076019 | 2001044 | 37457 | 13485 | 26909 | 27860 | 53578 | 22357 | 20000 | 10010 |