Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
cash w0, w1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.001
Issues: 3.003
Integer unit issues: 0.001
Load/store unit issues: 3.003
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74007 | 34518 | 3013 | 1 | 3012 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34234 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34233 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34234 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34248 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34234 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34234 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34324 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34437 | 3004 | 1 | 3003 | 3000 | 15029 | 3000 | 1000 | 3000 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34375 | 3004 | 1 | 3003 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
Code:
cash w0, w1, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 7.0054
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50210 | 70373 | 43977 | 13895 | 30082 | 13896 | 30003 | 47266 | 790922 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70058 | 45727 | 15726 | 30001 | 15725 | 30003 | 47236 | 790843 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47236 | 790843 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47236 | 790841 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30039 | 46672 | 790811 | 45565 | 20225 | 30039 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47236 | 790843 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47236 | 790845 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47236 | 790843 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47236 | 790847 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47236 | 790843 | 45728 | 20201 | 30003 | 20201 | 60006 | 15626 | 30000 | 20100 |
Result (median cycles for code): 7.0051
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50030 | 70394 | 43883 | 13803 | 30080 | 13805 | 30003 | 46973 | 791135 | 45638 | 20021 | 30003 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70060 | 45636 | 15636 | 30000 | 15635 | 30000 | 46978 | 791159 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46973 | 791159 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46973 | 791151 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46973 | 791147 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46973 | 791153 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46978 | 791159 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46973 | 791159 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46973 | 791151 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70054 | 45636 | 15636 | 30000 | 15635 | 30000 | 46973 | 791149 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
Code:
cash w0, w1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.6179
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
42714 | 100464 | 61416 | 13193 | 48223 | 11798 | 79218 | 295801 | 1221815 | 96854 | 27144 | 79912 | 27124 | 159668 | 19523 | 30000 | 13075 |
43178 | 107591 | 68997 | 19704 | 49293 | 17731 | 79036 | 294812 | 1221350 | 96767 | 27155 | 79765 | 26799 | 158059 | 19384 | 30000 | 13040 |
43541 | 107394 | 62040 | 13886 | 48154 | 13103 | 76064 | 303464 | 1185839 | 92586 | 26077 | 76503 | 26329 | 154821 | 16055 | 30000 | 12909 |
43103 | 106948 | 67919 | 19488 | 48431 | 17127 | 77373 | 296502 | 1201236 | 94573 | 26542 | 78122 | 26067 | 153586 | 17933 | 30000 | 12928 |
43202 | 105794 | 68276 | 19189 | 49087 | 17689 | 77914 | 305008 | 1200100 | 94922 | 26583 | 78509 | 27173 | 160060 | 19266 | 30000 | 13098 |
43082 | 105376 | 68110 | 19277 | 48833 | 17250 | 79036 | 295188 | 1221823 | 96767 | 27155 | 79765 | 27155 | 159505 | 19419 | 30000 | 13074 |
43178 | 107596 | 68995 | 19694 | 49301 | 17731 | 78553 | 225260 | 1218616 | 92637 | 26908 | 79392 | 26580 | 157403 | 18882 | 30000 | 13017 |
42880 | 103360 | 66752 | 18588 | 48164 | 15991 | 75935 | 284342 | 1187348 | 92985 | 26037 | 76687 | 25905 | 152420 | 18876 | 30000 | 12850 |
43015 | 103543 | 66236 | 18512 | 47724 | 16837 | 77262 | 277256 | 1161088 | 94125 | 26366 | 77632 | 26421 | 155565 | 18841 | 30000 | 12903 |
42971 | 104083 | 66317 | 18475 | 47842 | 16359 | 78330 | 303434 | 1192944 | 95668 | 27053 | 79889 | 25965 | 153356 | 17732 | 30000 | 12899 |
Result (median cycles for code): 10.6500
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
42885 | 105611 | 62861 | 13551 | 49310 | 12713 | 78244 | 278458 | 1185921 | 95307 | 26307 | 78775 | 26392 | 158040 | 19488 | 30000 | 12902 |
42910 | 105848 | 68811 | 19518 | 49293 | 17174 | 78682 | 280384 | 1196176 | 95972 | 26469 | 79251 | 26040 | 155923 | 19250 | 30000 | 12829 |
42954 | 106539 | 69059 | 19663 | 49396 | 17467 | 78683 | 280878 | 1199628 | 96013 | 26474 | 79268 | 26273 | 157357 | 16065 | 30000 | 12873 |
42931 | 106026 | 68860 | 19609 | 49251 | 17449 | 77273 | 277720 | 1165892 | 93861 | 25953 | 77697 | 25953 | 155391 | 19182 | 30000 | 12814 |
42826 | 104563 | 68199 | 19202 | 48997 | 16585 | 78910 | 280630 | 1203437 | 96356 | 26557 | 79515 | 26557 | 159027 | 19636 | 30000 | 12935 |
42876 | 105375 | 68566 | 19406 | 49160 | 16930 | 78508 | 248659 | 1193566 | 94112 | 26412 | 79092 | 26194 | 156866 | 19383 | 30000 | 12862 |
42742 | 103350 | 67627 | 18883 | 48744 | 15987 | 79292 | 279624 | 1211785 | 96945 | 26683 | 79927 | 26430 | 158335 | 19568 | 30000 | 12911 |
42954 | 106500 | 69108 | 19690 | 49418 | 17485 | 79376 | 280306 | 1210598 | 97041 | 26720 | 80014 | 26977 | 161526 | 19930 | 30000 | 13017 |
43021 | 107547 | 69495 | 19917 | 49578 | 17943 | 79032 | 279878 | 1205481 | 96556 | 26591 | 79639 | 26449 | 158394 | 19571 | 30000 | 12912 |
42926 | 106125 | 68909 | 19596 | 49313 | 17304 | 78606 | 279424 | 1196832 | 95909 | 26441 | 79177 | 26626 | 159451 | 19686 | 30000 | 12948 |