Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
steorl x0, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 3.000
Issues: 3.001
Integer unit issues: 1.002
Load/store unit issues: 2.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
73005 | 34496 | 3018 | 1014 | 2004 | 1002 | 2000 | 7767 | 10518 | 0 | 3000 | 1000 | 2000 | 0 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 34217 | 3003 | 1003 | 2000 | 1000 | 2000 | 7760 | 10511 | 0 | 3000 | 1000 | 2000 | 0 | 2002 | 4004 | 1004 | 2000 | 1000 |
73004 | 34226 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 0 | 3000 | 1000 | 2000 | 0 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34348 | 3002 | 1002 | 2000 | 1000 | 2000 | 7767 | 10518 | 0 | 3000 | 1000 | 2000 | 0 | 2000 | 4000 | 1002 | 2000 | 1000 |
73005 | 34361 | 3005 | 1003 | 2002 | 1001 | 2000 | 7765 | 10521 | 0 | 3000 | 1000 | 2000 | 0 | 862 | 1724 | 435 | 862 | 431 |
73004 | 34948 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 0 | 3000 | 1000 | 2000 | 0 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34353 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 0 | 3000 | 1000 | 2000 | 0 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34481 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 0 | 3000 | 1000 | 2000 | 0 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34534 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 0 | 3000 | 1000 | 2000 | 0 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34404 | 3002 | 1002 | 2000 | 1000 | 2000 | 7760 | 10511 | 0 | 3000 | 1000 | 2000 | 0 | 2000 | 4000 | 1002 | 2000 | 1000 |
Code:
steorl x0, [x6] add x6, x6, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 6.0058
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40209 | 60599 | 40498 | 20411 | 20087 | 20201 | 20005 | 116038 | 96554 | 40110 | 20205 | 20005 | 30208 | 40009 | 20008 | 20000 | 20100 |
40204 | 60065 | 40106 | 20106 | 20000 | 20102 | 20002 | 116058 | 96245 | 40104 | 20202 | 20002 | 30254 | 40070 | 20043 | 20000 | 20100 |
40204 | 60087 | 40107 | 20107 | 20000 | 20102 | 20005 | 116029 | 96534 | 40110 | 20205 | 20005 | 30254 | 40070 | 20063 | 20000 | 20100 |
40204 | 60058 | 40122 | 20119 | 20003 | 20105 | 20002 | 116127 | 96271 | 40104 | 20202 | 20002 | 30203 | 40004 | 20018 | 20000 | 20100 |
40204 | 60058 | 40117 | 20117 | 20000 | 20102 | 20002 | 116114 | 96245 | 40104 | 20202 | 20002 | 30203 | 40004 | 20016 | 20000 | 20100 |
40204 | 60058 | 40117 | 20117 | 20000 | 20102 | 20002 | 116107 | 96223 | 40104 | 20202 | 20002 | 30203 | 40004 | 20017 | 20000 | 20100 |
40204 | 60058 | 40117 | 20117 | 20000 | 20102 | 20002 | 116121 | 96260 | 40104 | 20202 | 20002 | 30203 | 40004 | 20017 | 20000 | 20100 |
40204 | 60058 | 40117 | 20117 | 20000 | 20102 | 20002 | 116139 | 96290 | 40104 | 20202 | 20002 | 30203 | 40004 | 20017 | 20000 | 20100 |
40204 | 60058 | 40117 | 20117 | 20000 | 20102 | 20002 | 116137 | 96289 | 40104 | 20202 | 20002 | 30203 | 40004 | 20017 | 20000 | 20100 |
40204 | 60058 | 40117 | 20117 | 20000 | 20102 | 20002 | 116139 | 96293 | 40104 | 20202 | 20002 | 30252 | 40065 | 20065 | 20000 | 20100 |
Result (median cycles for code): 6.0062
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40029 | 60582 | 40354 | 20271 | 20083 | 20106 | 20002 | 115926 | 96197 | 40014 | 20022 | 20002 | 30028 | 40009 | 20008 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20000 | 115926 | 96203 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20000 | 115932 | 96213 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20000 | 115930 | 96210 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20034 | 106177 | 100313 | 40078 | 20054 | 20034 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20000 | 115942 | 96229 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20000 | 115946 | 96242 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20000 | 115916 | 96183 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20000 | 115938 | 96223 | 40010 | 20020 | 20000 | 30020 | 40000 | 20006 | 20000 | 20010 |
40024 | 60065 | 40016 | 20016 | 20000 | 20010 | 20030 | 90481 | 104854 | 40071 | 20051 | 20030 | 30020 | 40000 | 20007 | 20000 | 20010 |
Code:
steorl x0, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.7598
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30205 | 110685 | 43151 | 21004 | 0 | 22147 | 12250 | 0 | 21821 | 1998606 | 1937621 | 33554 | 11838 | 23116 | 21694 | 42847 | 19668 | 20000 | 10100 |
30204 | 107492 | 40665 | 19823 | 0 | 20842 | 10824 | 0 | 20285 | 1947580 | 1902099 | 30634 | 10449 | 20484 | 20276 | 40135 | 19183 | 20000 | 10100 |
30204 | 107124 | 39918 | 19596 | 0 | 20322 | 10142 | 0 | 20424 | 1935953 | 1899693 | 30894 | 10573 | 20737 | 20856 | 41244 | 19354 | 20000 | 10100 |
30204 | 106794 | 40097 | 19511 | 0 | 20586 | 10485 | 0 | 20905 | 1938059 | 1914515 | 31807 | 11002 | 21567 | 22412 | 44061 | 20228 | 20000 | 10100 |
30204 | 107895 | 40425 | 19838 | 0 | 20587 | 10537 | 0 | 20136 | 1952467 | 1920625 | 30361 | 10326 | 20245 | 20822 | 41245 | 19494 | 20000 | 10100 |
30204 | 106527 | 40134 | 19458 | 0 | 20676 | 10631 | 0 | 20802 | 1894172 | 1878602 | 31532 | 10830 | 21246 | 20828 | 41212 | 19098 | 20000 | 10100 |
30204 | 106463 | 39672 | 19336 | 0 | 20336 | 10348 | 0 | 20226 | 1880857 | 1866179 | 30514 | 10389 | 20361 | 20948 | 41285 | 19294 | 20000 | 10100 |
30204 | 107598 | 39625 | 19480 | 0 | 20145 | 10110 | 0 | 20343 | 1958745 | 1911277 | 30733 | 10491 | 20584 | 21352 | 42222 | 19583 | 20000 | 10100 |
30204 | 106453 | 40112 | 19578 | 0 | 20534 | 10440 | 0 | 20669 | 1910497 | 1887597 | 31350 | 10785 | 21161 | 21526 | 42567 | 19712 | 20000 | 10100 |
30204 | 107514 | 40067 | 19615 | 0 | 20452 | 10491 | 0 | 20076 | 1904348 | 1881511 | 30245 | 10269 | 20131 | 21650 | 42581 | 19411 | 20000 | 10100 |
Result (median cycles for code): 11.4102
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30026 | 114833 | 47451 | 22526 | 24925 | 14615 | 24940 | 2083378 | 2008092 | 39289 | 14364 | 28252 | 28844 | 55493 | 22445 | 20000 | 10010 |
30024 | 114495 | 47110 | 22429 | 24681 | 14586 | 24624 | 2081234 | 2006178 | 38688 | 14077 | 27886 | 27614 | 53505 | 22433 | 20000 | 10010 |
30024 | 114027 | 45943 | 22258 | 23685 | 13757 | 24043 | 2081912 | 2005808 | 37629 | 13597 | 26923 | 28668 | 55356 | 22103 | 20000 | 10010 |
30024 | 113728 | 45931 | 22033 | 23898 | 14037 | 24440 | 2089576 | 2012885 | 38333 | 13905 | 27442 | 28100 | 54023 | 22392 | 20000 | 10010 |
30024 | 114464 | 46686 | 22565 | 24121 | 14037 | 24583 | 2082051 | 2005736 | 38818 | 14247 | 27836 | 28380 | 54869 | 22200 | 20000 | 10010 |
30024 | 114165 | 46388 | 22500 | 23888 | 13978 | 25007 | 2083299 | 2008196 | 39465 | 14471 | 28391 | 27328 | 53241 | 22350 | 20000 | 10010 |
30024 | 114161 | 46300 | 22488 | 23812 | 13819 | 24161 | 2077111 | 2001486 | 37863 | 13717 | 27223 | 27770 | 53997 | 22306 | 20000 | 10010 |
30024 | 114390 | 46569 | 22461 | 24108 | 13977 | 25046 | 2081108 | 2004309 | 39554 | 14523 | 28619 | 27978 | 54519 | 22259 | 20000 | 10010 |
30024 | 114102 | 46226 | 22426 | 23800 | 13895 | 24572 | 2092737 | 2015872 | 38562 | 14000 | 27509 | 28667 | 55643 | 22136 | 20000 | 10010 |
30024 | 113909 | 46280 | 22221 | 24059 | 14214 | 24614 | 2071939 | 1997625 | 38700 | 14100 | 27845 | 28376 | 54924 | 22326 | 20000 | 10010 |