Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
casb w0, w1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.000
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74004 | 34658 | 3001 | 1 | 3000 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34860 | 3004 | 1 | 3003 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34491 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34835 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34460 | 3001 | 1 | 3000 | 3000 | 15018 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34297 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34300 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34286 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34289 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74005 | 34256 | 3004 | 1 | 3003 | 3000 | 15026 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
Code:
casb w0, w1, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 7.0060
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50208 | 70248 | 43924 | 13876 | 30048 | 13876 | 30003 | 47289 | 790932 | 0 | 45728 | 20201 | 30003 | 0 | 20225 | 60078 | 14651 | 30000 | 20100 |
50204 | 70060 | 45727 | 15726 | 30001 | 15725 | 30003 | 47252 | 790933 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70060 | 45727 | 15726 | 30001 | 15725 | 30003 | 47243 | 790897 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70060 | 45727 | 15726 | 30001 | 15725 | 31611 | 121663 | 809541 | 4439 | 49318 | 23481 | 31956 | 35 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70060 | 45727 | 15726 | 30001 | 15725 | 30003 | 47243 | 790893 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70060 | 45727 | 15726 | 30001 | 15725 | 30003 | 47243 | 790899 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70060 | 45727 | 15726 | 30001 | 15725 | 30003 | 47244 | 790900 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70060 | 45727 | 15726 | 30001 | 15725 | 30003 | 47244 | 790900 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70060 | 45727 | 15726 | 30001 | 15725 | 30003 | 47243 | 790899 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70060 | 45727 | 15726 | 30001 | 15725 | 30003 | 47243 | 790899 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
Result (median cycles for code): 7.0051
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50024 | 70088 | 45641 | 15640 | 30001 | 15635 | 30003 | 47003 | 791257 | 45638 | 20021 | 30003 | 20021 | 60006 | 15626 | 30000 | 20010 |
50024 | 70058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46999 | 791223 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46967 | 791102 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46997 | 791218 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46967 | 791102 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46967 | 791102 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46967 | 791102 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50025 | 70102 | 43985 | 13955 | 30030 | 13954 | 30000 | 46998 | 791229 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46996 | 791215 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70051 | 45636 | 15636 | 30000 | 15635 | 30000 | 46967 | 791102 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
Code:
casb w0, w1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.6080
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
43163 | 107022 | 63058 | 13780 | 49278 | 12958 | 76757 | 295387 | 1173500 | 93527 | 26235 | 77347 | 26730 | 157682 | 19199 | 30000 | 13032 |
43086 | 105547 | 67221 | 18997 | 48224 | 17104 | 76346 | 268655 | 1163683 | 92965 | 26068 | 76814 | 26700 | 157313 | 18958 | 30000 | 13013 |
43104 | 106428 | 67768 | 18988 | 48780 | 17190 | 77781 | 275668 | 1189375 | 94975 | 26598 | 78390 | 26007 | 153330 | 13134 | 30000 | 12897 |
42942 | 104892 | 66246 | 18768 | 47478 | 16598 | 79393 | 289074 | 1212630 | 97294 | 27205 | 80149 | 27206 | 160270 | 19474 | 30000 | 13097 |
43201 | 107246 | 68954 | 19718 | 49236 | 17902 | 79426 | 288635 | 1211801 | 97328 | 27206 | 80152 | 26537 | 156673 | 19316 | 30000 | 12979 |
42968 | 104160 | 64119 | 15569 | 48550 | 14058 | 76516 | 276938 | 1154987 | 93289 | 26174 | 77106 | 26288 | 154735 | 18780 | 30000 | 12945 |
43037 | 105404 | 66721 | 18746 | 47975 | 16790 | 77799 | 296356 | 1214691 | 95029 | 26711 | 78403 | 26614 | 156559 | 19227 | 30000 | 12971 |
43086 | 105402 | 68122 | 19221 | 48901 | 17052 | 77285 | 341003 | 1235876 | 94697 | 26533 | 78113 | 26655 | 157134 | 19002 | 30000 | 12989 |
43089 | 106122 | 67657 | 19145 | 48512 | 17121 | 73518 | 294763 | 1127620 | 88596 | 25031 | 73703 | 26074 | 153400 | 18527 | 30000 | 12872 |
43049 | 105570 | 67936 | 19328 | 48608 | 16994 | 77590 | 297575 | 1191655 | 94409 | 26485 | 78153 | 27102 | 159779 | 19492 | 30000 | 13069 |
Result (median cycles for code): 10.6133
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
43000 | 107796 | 63439 | 13719 | 49720 | 13079 | 79391 | 295052 | 1211998 | 0 | 96973 | 26718 | 80020 | 0 | 26473 | 158550 | 19566 | 30000 | 12918 |
42932 | 106148 | 68940 | 19590 | 49350 | 17328 | 79344 | 280709 | 1210667 | 0 | 97002 | 26704 | 79980 | 0 | 26529 | 158890 | 19641 | 30000 | 12928 |
42805 | 104288 | 68119 | 19142 | 48977 | 16410 | 76988 | 277758 | 1159297 | 0 | 93398 | 25843 | 77383 | 0 | 26417 | 158211 | 19531 | 30000 | 12909 |
42922 | 106080 | 67826 | 18512 | 49314 | 16567 | 74511 | 275048 | 1106281 | 0 | 89646 | 24926 | 74652 | 0 | 25843 | 154762 | 19119 | 30000 | 12791 |
42805 | 104288 | 68119 | 19142 | 48977 | 16410 | 77954 | 279607 | 1181786 | 0 | 94864 | 26198 | 78456 | 0 | 26949 | 161454 | 19985 | 30000 | 13018 |
43032 | 107684 | 69587 | 20005 | 49582 | 18001 | 79934 | 287099 | 1226799 | 0 | 97935 | 26949 | 80727 | 0 | 26632 | 159535 | 19661 | 30000 | 12951 |
42805 | 104288 | 68115 | 19142 | 48973 | 16410 | 76987 | 277790 | 1159324 | 0 | 93397 | 25843 | 77383 | 0 | 25843 | 154762 | 19119 | 30000 | 12791 |
42930 | 106133 | 68939 | 19594 | 49345 | 17272 | 78568 | 279626 | 1192933 | 0 | 95805 | 26424 | 79126 | 0 | 26373 | 157952 | 19503 | 30000 | 12899 |
42950 | 106445 | 68955 | 19631 | 49324 | 17448 | 78892 | 280151 | 1203767 | 0 | 96340 | 26551 | 79511 | 0 | 26783 | 160407 | 15472 | 30000 | 12976 |
42878 | 105371 | 68529 | 19388 | 49141 | 16927 | 78375 | 279068 | 1190713 | 0 | 95552 | 26350 | 78918 | 0 | 26167 | 156716 | 19330 | 30000 | 12858 |