Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
caspl w0, w1, w2, w3, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 6.002
Issues: 3.003
Integer unit issues: 0.001
Load/store unit issues: 3.003
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
76011 | 36483 | 3031 | 1 | 3030 | 1 | 3003 | 0 | 11250 | 0 | 3003 | 2002 | 4004 | 0 | 2000 | 7000 | 1 | 3000 | 3000 |
76006 | 34964 | 3004 | 1 | 3003 | 0 | 10229 | 155653 | 76985 | 532 | 21480 | 17396 | 12662 | 36 | 2000 | 7000 | 1 | 3000 | 3000 |
76007 | 34748 | 3007 | 1 | 3006 | 0 | 3006 | 0 | 11027 | 0 | 3006 | 2004 | 4008 | 0 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 35118 | 3004 | 1 | 3003 | 0 | 3003 | 0 | 11011 | 0 | 3003 | 2002 | 4004 | 0 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 35316 | 3004 | 1 | 3003 | 0 | 3003 | 0 | 11011 | 0 | 3003 | 2002 | 4004 | 0 | 2000 | 7000 | 1 | 3000 | 3000 |
76006 | 34910 | 3004 | 1 | 3003 | 0 | 3003 | 0 | 11011 | 0 | 3003 | 2002 | 4004 | 0 | 2002 | 7007 | 1 | 3000 | 3002 |
76004 | 34653 | 3001 | 1 | 3000 | 0 | 3003 | 0 | 11011 | 0 | 3003 | 2002 | 4004 | 0 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 35102 | 3004 | 1 | 3003 | 0 | 3003 | 0 | 11011 | 0 | 3003 | 2002 | 4004 | 0 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 34418 | 3004 | 1 | 3003 | 0 | 3003 | 0 | 11011 | 0 | 3003 | 2002 | 4004 | 0 | 2002 | 7007 | 1 | 3000 | 3002 |
76006 | 34884 | 3004 | 1 | 3003 | 0 | 3003 | 0 | 11011 | 0 | 3003 | 2002 | 4004 | 0 | 2002 | 7007 | 1 | 3000 | 3002 |
Code:
caspl w0, w1, w2, w3, [x6] add x6, x6, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 19.0052
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70204 | 190052 | 55060 | 25060 | 30000 | 25054 | 30002 | 75532 | 2245234 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 190052 | 55060 | 25060 | 30000 | 25054 | 30038 | 75663 | 2244614 | 0 | 55096 | 30238 | 40051 | 0 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 190052 | 55060 | 25060 | 30000 | 25054 | 30002 | 75532 | 2245262 | 0 | 55056 | 30202 | 40003 | 0 | 30240 | 70082 | 24955 | 30000 | 40100 |
70204 | 190053 | 55060 | 25060 | 30000 | 25054 | 30038 | 72115 | 2244686 | 0 | 53922 | 30238 | 40051 | 0 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 190052 | 55060 | 25060 | 30000 | 25054 | 30002 | 75532 | 2245262 | 0 | 55056 | 30202 | 40003 | 0 | 30238 | 70088 | 24202 | 30000 | 40100 |
70204 | 190057 | 55060 | 25060 | 30000 | 25054 | 30002 | 75532 | 2245262 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 190052 | 55060 | 25060 | 30000 | 25054 | 30002 | 75532 | 2245262 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 190052 | 55060 | 25060 | 30000 | 25054 | 30002 | 75532 | 2245262 | 0 | 55056 | 30202 | 40003 | 0 | 30238 | 70088 | 24897 | 30000 | 40100 |
70204 | 190052 | 55060 | 25060 | 30000 | 25054 | 30002 | 75532 | 2245262 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24960 | 30000 | 40100 |
70204 | 190052 | 55060 | 25060 | 30000 | 25054 | 30002 | 75532 | 2245262 | 0 | 55056 | 30202 | 40003 | 0 | 30202 | 70004 | 24960 | 30000 | 40100 |
Result (median cycles for code): 19.0052
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70040 | 190588 | 53983 | 23892 | 30091 | 23818 | 30002 | 75450 | 2249099 | 0 | 55006 | 30022 | 40003 | 0 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 190050 | 55009 | 25009 | 30000 | 25002 | 30000 | 75366 | 2248978 | 0 | 55002 | 30020 | 40000 | 0 | 30058 | 70088 | 24692 | 30000 | 40010 |
76540 | 222302 | 59340 | 27440 | 31900 | 27159 | 30000 | 75444 | 2249110 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 190050 | 55009 | 25009 | 30000 | 25002 | 30000 | 75366 | 2248975 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 190050 | 55009 | 25009 | 30000 | 25002 | 30000 | 75366 | 2248975 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 24999 | 30000 | 40010 |
70025 | 190095 | 54712 | 24682 | 30030 | 24676 | 30000 | 75444 | 2249094 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 190053 | 55009 | 25009 | 30000 | 25002 | 30000 | 75380 | 2249087 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 24999 | 30000 | 40010 |
70024 | 190053 | 55009 | 25009 | 30000 | 25002 | 30000 | 75366 | 2249089 | 0 | 55002 | 30020 | 40000 | 0 | 30058 | 70088 | 24552 | 30000 | 40010 |
70024 | 190050 | 55009 | 25009 | 30000 | 25002 | 30038 | 72407 | 2248359 | 0 | 54012 | 30058 | 40051 | 0 | 30020 | 70000 | 25001 | 30000 | 40010 |
70024 | 190053 | 55009 | 25009 | 30000 | 25002 | 30000 | 75366 | 2248975 | 0 | 55002 | 30020 | 40000 | 0 | 30020 | 70000 | 24999 | 30000 | 40010 |
Code:
caspl w0, w1, w2, w3, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 19.0094
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
70199 | 190266 | 112500 | 57731 | 54769 | 57924 | 104682 | 338924 | 1320350 | 165570 | 70392 | 139578 | 70402 | 244295 | 60397 | 30000 | 40092 |
70196 | 190101 | 115486 | 60745 | 54741 | 60960 | 104968 | 337955 | 1317309 | 166242 | 70582 | 139960 | 70582 | 244925 | 60774 | 30000 | 40090 |
70194 | 190094 | 115881 | 61074 | 54807 | 61274 | 104671 | 342905 | 1325031 | 165747 | 70384 | 139564 | 70384 | 244232 | 60378 | 30000 | 40090 |
70194 | 190095 | 115586 | 60876 | 54710 | 61076 | 104758 | 333987 | 1321316 | 164592 | 70444 | 139680 | 70388 | 244252 | 60565 | 30000 | 40092 |
70194 | 190095 | 115300 | 60682 | 54618 | 61080 | 104976 | 340424 | 1319631 | 166056 | 70592 | 139972 | 70592 | 244936 | 60382 | 30000 | 40090 |
70187 | 190158 | 114589 | 59936 | 54653 | 60340 | 104976 | 340440 | 1319652 | 166056 | 70592 | 139972 | 70384 | 244232 | 60576 | 30000 | 40090 |
70194 | 190094 | 116277 | 61272 | 55005 | 61274 | 105018 | 334448 | 1313128 | 166298 | 70618 | 140028 | 70386 | 244245 | 60561 | 30000 | 40092 |
70194 | 190094 | 114884 | 60476 | 54408 | 60876 | 104668 | 339391 | 1321250 | 165742 | 70382 | 139560 | 70582 | 244925 | 60972 | 30000 | 40090 |
70194 | 190094 | 115584 | 60876 | 54708 | 61076 | 104671 | 339375 | 1320632 | 165747 | 70384 | 139564 | 70572 | 244896 | 60949 | 30000 | 40090 |
70194 | 190092 | 116088 | 61084 | 55004 | 61092 | 104968 | 334384 | 1312811 | 166242 | 70582 | 139960 | 70384 | 244232 | 60572 | 30000 | 40090 |
Result (median cycles for code): 19.0101
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
70017 | 190228 | 114948 | 59905 | 55043 | 59912 | 104951 | 336348 | 1314573 | 0 | 166839 | 70030 | 139936 | 0 | 70032 | 244896 | 61851 | 30000 | 0 | 40002 |
70016 | 190101 | 116894 | 61881 | 55013 | 61892 | 104973 | 336026 | 1313996 | 0 | 166877 | 70044 | 139966 | 0 | 70030 | 244889 | 61849 | 30000 | 0 | 40002 |
70016 | 190098 | 116889 | 61879 | 55010 | 61890 | 104953 | 335952 | 1313900 | 0 | 166843 | 70030 | 139938 | 0 | 70030 | 244889 | 61849 | 30000 | 0 | 40002 |
70016 | 190098 | 116889 | 61879 | 55010 | 61890 | 104953 | 335952 | 1313900 | 0 | 166843 | 70030 | 139938 | 0 | 70080 | 245058 | 61813 | 30000 | 0 | 39992 |
70016 | 190117 | 116889 | 61879 | 55010 | 61890 | 104953 | 335952 | 1313900 | 0 | 166843 | 70030 | 139938 | 0 | 70030 | 244889 | 61849 | 30000 | 0 | 40002 |
70016 | 190098 | 116889 | 61879 | 55010 | 61890 | 104953 | 335952 | 1313900 | 0 | 166843 | 70030 | 139938 | 0 | 70030 | 244889 | 61849 | 30000 | 0 | 40002 |
70016 | 190098 | 116889 | 61879 | 55010 | 61890 | 104953 | 335952 | 1313900 | 0 | 166843 | 70030 | 139938 | 0 | 70030 | 244889 | 61849 | 30000 | 0 | 40002 |
70009 | 190140 | 115950 | 60899 | 55051 | 60918 | 104953 | 335952 | 1313900 | 0 | 166843 | 70030 | 139938 | 0 | 70030 | 244889 | 61849 | 30000 | 0 | 40002 |
70016 | 190098 | 116889 | 61879 | 55010 | 61890 | 104953 | 335952 | 1313900 | 0 | 166843 | 70030 | 139938 | 0 | 70030 | 244889 | 61849 | 30000 | 0 | 40002 |
70016 | 190098 | 116889 | 61879 | 55010 | 61890 | 105025 | 334796 | 1314282 | 0 | 166507 | 70080 | 140036 | 0 | 70826 | 245639 | 60203 | 30285 | 1 | 40394 |