Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
staddlh w0, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 3.000
Issues: 3.001
Integer unit issues: 1.002
Load/store unit issues: 2.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
73005 | 34487 | 3019 | 1015 | 2004 | 1002 | 2000 | 7770 | 10521 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34211 | 3003 | 1003 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34204 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34224 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2002 | 4004 | 1003 | 2000 | 1000 |
73004 | 34351 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34206 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34206 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34222 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34203 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34468 | 3002 | 1002 | 2000 | 1000 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
Code:
staddlh w0, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 6.0058
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40206 | 60435 | 40259 | 20210 | 20049 | 20152 | 20005 | 115564 | 95824 | 40110 | 20205 | 20005 | 30208 | 40009 | 20008 | 20000 | 20100 |
40204 | 60062 | 40105 | 20105 | 20000 | 20102 | 20005 | 115560 | 95817 | 40110 | 20205 | 20005 | 30251 | 40065 | 20042 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115646 | 95519 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115639 | 95507 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115645 | 95515 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115638 | 95505 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115647 | 95519 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115649 | 95524 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115645 | 95516 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
40204 | 60055 | 40104 | 20104 | 20000 | 20102 | 20002 | 115649 | 95527 | 40104 | 20202 | 20002 | 30203 | 40004 | 20005 | 20000 | 20100 |
Result (median cycles for code): 6.0055
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40026 | 60304 | 40147 | 20111 | 20036 | 20058 | 20002 | 115504 | 95500 | 40014 | 20022 | 20002 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20000 | 115381 | 95382 | 40010 | 20020 | 20000 | 30076 | 40070 | 20045 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20002 | 115535 | 95531 | 40014 | 20022 | 20002 | 30023 | 40004 | 20014 | 20000 | 20010 |
40024 | 60055 | 40025 | 20025 | 20000 | 20010 | 20000 | 115396 | 95405 | 40010 | 20020 | 20000 | 30020 | 40000 | 20014 | 20000 | 20010 |
40024 | 60055 | 40024 | 20024 | 20000 | 20010 | 20000 | 115386 | 95389 | 40010 | 20020 | 20000 | 30020 | 40000 | 20014 | 20000 | 20010 |
40024 | 60055 | 40024 | 20024 | 20000 | 20010 | 20000 | 115342 | 95346 | 40010 | 20020 | 20000 | 30020 | 40000 | 20014 | 20000 | 20010 |
40024 | 60055 | 40024 | 20024 | 20000 | 20010 | 20000 | 115367 | 95373 | 40010 | 20020 | 20000 | 30020 | 40000 | 20014 | 20000 | 20010 |
40024 | 60055 | 40024 | 20024 | 20000 | 20010 | 20000 | 115375 | 95387 | 40010 | 20020 | 20000 | 30020 | 40000 | 20014 | 20000 | 20010 |
40024 | 60055 | 40024 | 20024 | 20000 | 20010 | 20000 | 115373 | 95384 | 40010 | 20020 | 20000 | 30020 | 40000 | 20014 | 20000 | 20010 |
40024 | 60055 | 40020 | 20020 | 20000 | 20010 | 20000 | 115396 | 95406 | 40010 | 20020 | 20000 | 30020 | 40000 | 20014 | 20000 | 20010 |
Code:
staddlh w0, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.7434
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30684 | 120336 | 46407 | 22144 | 24263 | 14526 | 21586 | 1998060 | 1936846 | 33089 | 11607 | 22697 | 22474 | 44218 | 20254 | 20000 | 10100 |
30204 | 107578 | 40497 | 19924 | 20573 | 10572 | 20189 | 1948223 | 1922077 | 30441 | 10353 | 20300 | 20514 | 40540 | 19421 | 20000 | 10100 |
30204 | 108764 | 40629 | 19922 | 20707 | 10514 | 20876 | 1969784 | 1932401 | 31744 | 10970 | 21507 | 22478 | 44247 | 20088 | 20000 | 10100 |
30205 | 110930 | 43612 | 21141 | 22471 | 12581 | 21024 | 1980263 | 1922692 | 32007 | 11086 | 21715 | 21898 | 43136 | 19971 | 20000 | 10100 |
30204 | 109088 | 42094 | 20515 | 21579 | 11682 | 20542 | 1952193 | 1902126 | 31077 | 10636 | 20824 | 20950 | 41457 | 19705 | 20000 | 10100 |
30204 | 109396 | 41939 | 20478 | 21461 | 11431 | 20665 | 1930585 | 1906923 | 31234 | 10673 | 20931 | 21989 | 43347 | 19831 | 20000 | 10100 |
30204 | 108137 | 40357 | 19716 | 20641 | 10571 | 20103 | 1922880 | 1904108 | 30281 | 10283 | 20155 | 20762 | 41013 | 19764 | 20000 | 10100 |
30204 | 107057 | 39532 | 19287 | 20245 | 10130 | 20252 | 1952508 | 1915788 | 30541 | 10390 | 20377 | 21300 | 42017 | 19608 | 20000 | 10100 |
30204 | 108490 | 39997 | 19483 | 20514 | 10527 | 20311 | 1874509 | 1871321 | 30606 | 10397 | 20379 | 21384 | 42268 | 19663 | 20000 | 10100 |
30205 | 107579 | 40751 | 20007 | 20744 | 10716 | 21089 | 1954133 | 1910543 | 32138 | 11151 | 21863 | 21620 | 42714 | 19754 | 20000 | 10100 |
Result (median cycles for code): 11.4140
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
30025 | 115277 | 46669 | 22412 | 24257 | 14129 | 24942 | 2094155 | 2016557 | 39308 | 14379 | 28217 | 28326 | 54846 | 22557 | 20000 | 0 | 10010 |
30024 | 114773 | 47124 | 22397 | 24727 | 14515 | 24748 | 2080656 | 2005775 | 38975 | 14242 | 28159 | 27910 | 53913 | 22426 | 20000 | 0 | 10010 |
30025 | 114305 | 46338 | 22370 | 23968 | 14056 | 24727 | 2081168 | 2005741 | 38917 | 14206 | 28070 | 28016 | 54288 | 22272 | 20000 | 0 | 10010 |
30026 | 114109 | 46188 | 22190 | 23998 | 14049 | 24410 | 2080086 | 2005556 | 38281 | 13885 | 27420 | 28072 | 54500 | 21956 | 20000 | 0 | 10010 |
30024 | 114417 | 46358 | 22172 | 24186 | 14407 | 24312 | 2084800 | 2008486 | 38075 | 13777 | 27181 | 27284 | 52920 | 22313 | 20000 | 0 | 10010 |
30024 | 114089 | 46249 | 22480 | 23769 | 13610 | 24563 | 2079659 | 2004383 | 38599 | 14050 | 27606 | 29026 | 55595 | 22262 | 20000 | 0 | 10010 |
30024 | 114265 | 46424 | 22571 | 23853 | 13777 | 24247 | 2074322 | 1999412 | 37945 | 13710 | 27038 | 27888 | 54080 | 22405 | 20000 | 0 | 10010 |
30024 | 114625 | 46401 | 22617 | 23784 | 13654 | 24956 | 2081647 | 2005789 | 39305 | 14361 | 28204 | 27906 | 54079 | 22486 | 20000 | 0 | 10010 |
30024 | 114294 | 46811 | 22342 | 24469 | 14462 | 24882 | 2084460 | 2008970 | 39196 | 14328 | 27908 | 28588 | 55318 | 22399 | 20000 | 0 | 10010 |
30024 | 114292 | 46359 | 22374 | 23985 | 14061 | 24188 | 2086687 | 2011198 | 37872 | 13697 | 27038 | 28534 | 55174 | 22283 | 20000 | 0 | 10010 |