Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
cas w0, w1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.000
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74007 | 34633 | 3013 | 1 | 3012 | 3003 | 15034 | 3003 | 1001 | 3003 | 1001 | 6006 | 1 | 3000 | 1001 |
74005 | 34295 | 3004 | 1 | 3003 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34266 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34256 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34276 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34258 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34269 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34256 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34271 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34271 | 3001 | 1 | 3000 | 3000 | 15022 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
Code:
cas w0, w1, [x6] add x6, x6, 4
(fused SUBS/B.cc loop)
Result (median cycles for code): 7.0054
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50210 | 70342 | 43953 | 13884 | 30069 | 13883 | 30003 | 47284 | 790906 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70058 | 45727 | 15726 | 30001 | 15725 | 30003 | 47237 | 790863 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50205 | 70171 | 44573 | 14512 | 30061 | 14511 | 30003 | 47238 | 790869 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47238 | 790894 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30039 | 47433 | 791293 | 0 | 45777 | 20225 | 30039 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47237 | 790863 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30003 | 47243 | 790894 | 0 | 45728 | 20201 | 30003 | 0 | 20225 | 60078 | 15638 | 30000 | 20100 |
50204 | 70058 | 45727 | 15726 | 30001 | 15725 | 30003 | 47236 | 790863 | 0 | 45728 | 20201 | 30003 | 0 | 20323 | 60363 | 15704 | 30000 | 20100 |
50204 | 70051 | 45727 | 15726 | 30001 | 15725 | 30039 | 46152 | 790859 | 0 | 45390 | 20225 | 30039 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
50204 | 70508 | 46004 | 15823 | 30181 | 15822 | 30003 | 47237 | 790844 | 0 | 45728 | 20201 | 30003 | 0 | 20201 | 60006 | 15626 | 30000 | 20100 |
Result (median cycles for code): 7.0058
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50034 | 70583 | 43961 | 13828 | 30133 | 13830 | 30003 | 46996 | 791245 | 45638 | 20021 | 30003 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46996 | 791227 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46996 | 791229 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46996 | 791241 | 45635 | 20020 | 30000 | 20045 | 60078 | 13953 | 30000 | 20010 |
50024 | 70058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46996 | 791229 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46996 | 791225 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46996 | 791229 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46997 | 791233 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
50024 | 70058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46996 | 791236 | 45635 | 20020 | 30000 | 20045 | 60078 | 15368 | 30000 | 20010 |
50024 | 70058 | 45636 | 15636 | 30000 | 15635 | 30000 | 46996 | 791238 | 45635 | 20020 | 30000 | 20020 | 60000 | 15626 | 30000 | 20010 |
Code:
cas w0, w1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.5320
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
43029 | 105037 | 62001 | 13588 | 48413 | 12746 | 75766 | 272950 | 1154355 | 92299 | 25884 | 76274 | 26148 | 154003 | 15344 | 30000 | 12931 |
43179 | 107665 | 69195 | 19765 | 49430 | 17586 | 77942 | 290984 | 1185296 | 95344 | 26624 | 78596 | 27150 | 159475 | 19423 | 30000 | 13074 |
43007 | 105178 | 67064 | 18867 | 48197 | 16487 | 77972 | 299480 | 1198863 | 95106 | 26626 | 78604 | 26215 | 153229 | 18696 | 30000 | 12902 |
43102 | 105783 | 67224 | 19167 | 48057 | 17392 | 77931 | 308139 | 1210804 | 95322 | 26757 | 78711 | 26148 | 154488 | 18338 | 30000 | 12946 |
42991 | 103888 | 66740 | 18691 | 48049 | 16763 | 76231 | 279464 | 1174987 | 92840 | 26237 | 76939 | 26292 | 154453 | 18216 | 30000 | 12960 |
43030 | 104350 | 66180 | 18375 | 47805 | 16668 | 76337 | 301080 | 1197941 | 93304 | 26154 | 77258 | 26940 | 157966 | 15821 | 30000 | 13062 |
43188 | 107398 | 67646 | 19440 | 48206 | 17668 | 75835 | 282555 | 1224474 | 92541 | 26207 | 76631 | 26395 | 155946 | 18869 | 30000 | 12996 |
43067 | 106348 | 66352 | 18546 | 47806 | 16648 | 77280 | 247944 | 1132661 | 93713 | 26279 | 77607 | 26332 | 155599 | 18650 | 30000 | 12970 |
43091 | 106198 | 67311 | 18871 | 48440 | 17235 | 79030 | 302883 | 1187109 | 96441 | 26989 | 79743 | 26775 | 157702 | 18507 | 30000 | 13015 |
42864 | 104036 | 65421 | 18246 | 47175 | 16101 | 78105 | 313053 | 1224162 | 95620 | 26663 | 78595 | 27013 | 158952 | 19152 | 30000 | 13048 |
Result (median cycles for code): 10.6100
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
42892 | 105701 | 62908 | 13603 | 49305 | 12757 | 79941 | 282423 | 1222183 | 97864 | 26934 | 80640 | 52527 | 172641 | 142 | 27678 | 37272 | 47 | 26511 |
42682 | 102469 | 67321 | 18715 | 48606 | 15543 | 80181 | 280948 | 1225626 | 98210 | 27015 | 80913 | 26200 | 156919 | 0 | 19380 | 30000 | 0 | 12856 |
42815 | 104358 | 68134 | 19168 | 48966 | 16477 | 77698 | 281023 | 1176824 | 94498 | 26098 | 78170 | 26645 | 159596 | 0 | 19688 | 30000 | 0 | 12954 |
43030 | 107610 | 69584 | 19949 | 49635 | 18015 | 78865 | 279806 | 1199735 | 96256 | 26534 | 79456 | 26297 | 157465 | 0 | 19433 | 30000 | 0 | 12882 |
42861 | 105111 | 68402 | 19319 | 49083 | 16858 | 79496 | 279831 | 1216376 | 97268 | 26775 | 80173 | 26506 | 158709 | 0 | 16426 | 30000 | 0 | 12917 |
42799 | 104108 | 68018 | 19082 | 48936 | 16412 | 76874 | 275473 | 1156680 | 93286 | 25795 | 77265 | 26841 | 160782 | 0 | 19860 | 30000 | 0 | 12993 |
42895 | 105630 | 68694 | 19479 | 49215 | 17113 | 78268 | 279064 | 1188643 | 95366 | 26311 | 78801 | 25900 | 155121 | 0 | 19133 | 30000 | 0 | 12804 |
42909 | 105790 | 68755 | 19498 | 49257 | 17180 | 78826 | 278031 | 1199368 | 96246 | 26509 | 79403 | 26048 | 156024 | 0 | 19230 | 30000 | 0 | 12835 |
42829 | 104661 | 68241 | 19211 | 49030 | 16601 | 78140 | 277914 | 1184265 | 95182 | 26270 | 78662 | 24725 | 148042 | 0 | 18331 | 30000 | 0 | 12569 |
42945 | 106291 | 68962 | 19624 | 49338 | 17418 | 77860 | 283121 | 1177576 | 94722 | 26165 | 78349 | 26228 | 157111 | 0 | 19424 | 30000 | 0 | 12871 |