Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
cas x0, x1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.001
Issues: 3.003
Integer unit issues: 0.001
Load/store unit issues: 3.003
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74007 | 34849 | 3013 | 1 | 3012 | 3003 | 15032 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74006 | 65579 | 3007 | 1 | 3006 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34366 | 3004 | 1 | 3003 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34451 | 3004 | 1 | 3003 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34330 | 3004 | 1 | 3003 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34365 | 3004 | 1 | 3003 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34358 | 3004 | 1 | 3003 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34386 | 3004 | 1 | 3003 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34385 | 3004 | 1 | 3003 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34272 | 3004 | 1 | 3003 | 3003 | 15036 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
Code:
cas x0, x1, [x6] add x6, x6, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 7.0051
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50214 | 70548 | 44034 | 13919 | 30115 | 13897 | 30003 | 47363 | 790986 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15627 | 30000 | 20100 |
50204 | 70051 | 45728 | 15727 | 30001 | 15725 | 30003 | 47337 | 790915 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15627 | 30000 | 20100 |
50204 | 70051 | 45728 | 15727 | 30001 | 15725 | 30003 | 47336 | 790927 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15627 | 30000 | 20100 |
50204 | 70051 | 45728 | 15727 | 30001 | 15725 | 30003 | 47334 | 790930 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15627 | 30000 | 20100 |
50204 | 70051 | 45728 | 15727 | 30001 | 15725 | 30003 | 47337 | 790919 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15627 | 30000 | 20100 |
50204 | 70051 | 45728 | 15727 | 30001 | 15725 | 30003 | 47349 | 790918 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15627 | 30000 | 20100 |
50204 | 70051 | 45728 | 15727 | 30001 | 15725 | 30039 | 47570 | 791404 | 0 | 45775 | 20225 | 30039 | 0 | 20201 | 60006 | 15627 | 30000 | 20100 |
50204 | 70051 | 45728 | 15727 | 30001 | 15725 | 30003 | 47337 | 790920 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15627 | 30000 | 20100 |
50204 | 70051 | 45728 | 15727 | 30001 | 15725 | 30003 | 47337 | 790916 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15627 | 30000 | 20100 |
50204 | 70132 | 45776 | 15745 | 30031 | 15743 | 30003 | 47335 | 790965 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15627 | 30000 | 20100 |
Result (median cycles for code): 7.0060
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50034 | 70533 | 43932 | 13821 | 30111 | 13804 | 30003 | 47085 | 791291 | 45638 | 20021 | 30003 | 20020 | 60000 | 15629 | 30000 | 20010 |
50024 | 70058 | 45639 | 15639 | 30000 | 15635 | 30000 | 47084 | 791273 | 45635 | 20020 | 30000 | 20020 | 60000 | 15629 | 30000 | 20010 |
50024 | 70058 | 45639 | 15639 | 30000 | 15635 | 30000 | 47079 | 791273 | 45635 | 20020 | 30000 | 20020 | 60000 | 15629 | 30000 | 20010 |
50024 | 70058 | 45639 | 15639 | 30000 | 15635 | 30000 | 47084 | 791273 | 45635 | 20020 | 30000 | 20020 | 60000 | 15629 | 30000 | 20010 |
50024 | 70058 | 45639 | 15639 | 30000 | 15635 | 30000 | 47084 | 791273 | 45635 | 20020 | 30000 | 20020 | 60000 | 15629 | 30000 | 20010 |
50024 | 70058 | 45639 | 15639 | 30000 | 15635 | 30000 | 47084 | 791273 | 45635 | 20020 | 30000 | 20020 | 60000 | 15629 | 30000 | 20010 |
50024 | 70058 | 45639 | 15639 | 30000 | 15635 | 30000 | 47084 | 791273 | 45635 | 20020 | 30000 | 20020 | 60000 | 15629 | 30000 | 20010 |
50024 | 70058 | 45639 | 15639 | 30000 | 15635 | 30000 | 47084 | 791273 | 45635 | 20020 | 30000 | 20020 | 60000 | 15629 | 30000 | 20010 |
50024 | 70058 | 45639 | 15639 | 30000 | 15635 | 30000 | 47084 | 791273 | 45635 | 20020 | 30000 | 20020 | 60000 | 15629 | 30000 | 20010 |
50024 | 70058 | 45639 | 15639 | 30000 | 15635 | 30000 | 47084 | 791273 | 45635 | 20020 | 30000 | 20020 | 60000 | 15629 | 30000 | 20010 |
Code:
cas x0, x1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.5944
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
43095 | 106300 | 62425 | 14008 | 48417 | 13064 | 77473 | 307060 | 1216873 | 0 | 94734 | 26605 | 78311 | 0 | 27014 | 159016 | 19295 | 30000 | 13065 |
43035 | 105272 | 68040 | 19332 | 48708 | 17071 | 78006 | 293283 | 1192797 | 0 | 95221 | 26664 | 78588 | 0 | 26963 | 158618 | 19341 | 30000 | 13041 |
42909 | 104129 | 67307 | 19062 | 48245 | 16227 | 79556 | 309157 | 1214793 | 0 | 97213 | 27195 | 80197 | 0 | 26730 | 157443 | 19003 | 30000 | 13018 |
43063 | 106181 | 67541 | 19265 | 48276 | 17292 | 74682 | 267404 | 1101969 | 0 | 90690 | 25441 | 75087 | 0 | 26064 | 153659 | 18344 | 30000 | 12885 |
43138 | 106520 | 62455 | 13873 | 48582 | 13168 | 77401 | 286033 | 1182130 | 0 | 94594 | 26603 | 78181 | 0 | 26129 | 153745 | 18249 | 30000 | 12874 |
43067 | 104855 | 66633 | 18910 | 47723 | 16997 | 76807 | 299721 | 1210890 | 0 | 93751 | 26229 | 77597 | 0 | 26362 | 155664 | 18843 | 30000 | 12970 |
43074 | 105863 | 67106 | 19042 | 48064 | 17032 | 77721 | 284114 | 1194767 | 0 | 94925 | 26591 | 78271 | 0 | 26465 | 156109 | 19019 | 30000 | 12964 |
43184 | 107267 | 67344 | 18892 | 48452 | 17680 | 77128 | 300538 | 1221774 | 0 | 94237 | 26455 | 77921 | 0 | 25866 | 151684 | 18217 | 30000 | 12806 |
43003 | 104307 | 66434 | 18609 | 47825 | 16212 | 79406 | 294852 | 1159572 | 0 | 97133 | 27243 | 80131 | 0 | 26586 | 155165 | 18364 | 30000 | 12993 |
43100 | 106491 | 66426 | 18564 | 47862 | 16602 | 78266 | 258345 | 1216675 | 0 | 93732 | 26849 | 79083 | 0 | 26095 | 153687 | 19019 | 30000 | 12893 |
Result (median cycles for code): 10.6102
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
43027 | 107639 | 63459 | 13774 | 49685 | 13167 | 78053 | 278434 | 1182163 | 95028 | 26230 | 78558 | 26196 | 156859 | 19386 | 30000 | 12860 |
42874 | 105424 | 68580 | 19409 | 49171 | 16972 | 74519 | 275253 | 1107415 | 89689 | 24939 | 74665 | 25780 | 154401 | 19108 | 30000 | 12783 |
43024 | 107527 | 69544 | 19961 | 49583 | 17971 | 78845 | 279009 | 1199825 | 96253 | 26519 | 79419 | 26711 | 160002 | 19769 | 30000 | 12966 |
42984 | 106947 | 67004 | 17461 | 49543 | 15973 | 78652 | 280333 | 1197038 | 95970 | 26459 | 79221 | 26200 | 156902 | 19379 | 30000 | 12863 |
42874 | 105220 | 68566 | 19388 | 49178 | 16926 | 78653 | 280452 | 1195010 | 95910 | 26447 | 79213 | 26177 | 156798 | 19340 | 30000 | 12861 |
42993 | 106978 | 69318 | 19818 | 49500 | 17839 | 78321 | 276929 | 1186481 | 95438 | 26333 | 78859 | 26509 | 158751 | 19594 | 30000 | 12925 |
42967 | 106594 | 69064 | 19650 | 49414 | 17517 | 79938 | 280395 | 1223849 | 97900 | 26929 | 80651 | 26941 | 161386 | 19879 | 30000 | 13012 |
42963 | 106666 | 69081 | 19699 | 49382 | 17566 | 79577 | 283381 | 1217313 | 97371 | 26801 | 80243 | 26925 | 161259 | 19873 | 30000 | 13010 |
43024 | 107530 | 69482 | 19891 | 49591 | 17929 | 78708 | 278911 | 1196347 | 96036 | 26473 | 79275 | 26973 | 161545 | 19923 | 30000 | 13018 |
42816 | 104467 | 68146 | 19184 | 48962 | 16530 | 80036 | 285472 | 1224070 | 97991 | 26956 | 80732 | 26162 | 156691 | 19354 | 30000 | 12857 |