Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
casa x0, x1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.000
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74006 | 34562 | 3007 | 1 | 3006 | 3000 | 15031 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34246 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34225 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 36776 | 3001 | 1 | 3000 | 3000 | 15012 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34244 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34224 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34225 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34224 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34244 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34225 | 3001 | 1 | 3000 | 3000 | 15020 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
Code:
casa x0, x1, [x6] add x6, x6, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 7.0051
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50204 | 70083 | 44270 | 14269 | 30001 | 14265 | 30003 | 42907 | 279933 | 44269 | 20201 | 30003 | 20201 | 60006 | 14168 | 30000 | 20100 |
50204 | 70058 | 44269 | 14268 | 30001 | 14265 | 30003 | 42869 | 279836 | 44268 | 20201 | 30003 | 20201 | 60006 | 14167 | 30000 | 20100 |
50204 | 70051 | 44268 | 14267 | 30001 | 14265 | 30003 | 42869 | 279836 | 44268 | 20201 | 30003 | 20201 | 60006 | 14167 | 30000 | 20100 |
50204 | 70051 | 44268 | 14267 | 30001 | 14265 | 30003 | 42869 | 279836 | 44268 | 20201 | 30003 | 20201 | 60006 | 14167 | 30000 | 20100 |
50204 | 70051 | 44268 | 14267 | 30001 | 14265 | 30003 | 42869 | 279836 | 44268 | 20201 | 30003 | 20201 | 60006 | 14167 | 30000 | 20100 |
50204 | 70051 | 44268 | 14267 | 30001 | 14265 | 30003 | 42869 | 279836 | 44268 | 20201 | 30003 | 20201 | 60006 | 14167 | 30000 | 20100 |
50204 | 70051 | 44268 | 14267 | 30001 | 14265 | 30003 | 42869 | 279836 | 44268 | 20201 | 30003 | 20201 | 60006 | 14167 | 30000 | 20100 |
50204 | 70051 | 44268 | 14267 | 30001 | 14265 | 30003 | 42870 | 279838 | 44268 | 20201 | 30003 | 20201 | 60006 | 14167 | 30000 | 20100 |
50204 | 70051 | 44268 | 14267 | 30001 | 14265 | 30003 | 42869 | 279836 | 44268 | 20201 | 30003 | 20201 | 60006 | 14167 | 30000 | 20100 |
50204 | 70051 | 44268 | 14267 | 30001 | 14265 | 30003 | 42869 | 279836 | 44268 | 20201 | 30003 | 20201 | 60006 | 14167 | 30000 | 20100 |
Result (median cycles for code): 7.0051
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50034 | 70680 | 41833 | 11719 | 30114 | 11716 | 30003 | 42625 | 280257 | 44178 | 20021 | 30003 | 20020 | 60000 | 14168 | 30000 | 20010 |
50025 | 70109 | 43704 | 13674 | 30030 | 13671 | 30000 | 42618 | 280099 | 44176 | 20020 | 30000 | 20020 | 60000 | 14167 | 30000 | 20010 |
50024 | 70058 | 44178 | 14178 | 30000 | 14175 | 30000 | 42623 | 280195 | 44175 | 20020 | 30000 | 20020 | 60000 | 14167 | 30000 | 20010 |
50024 | 70051 | 44177 | 14177 | 30000 | 14175 | 30000 | 42600 | 280090 | 44175 | 20020 | 30000 | 20020 | 60000 | 14167 | 30000 | 20010 |
50024 | 70051 | 44177 | 14177 | 30000 | 14175 | 30000 | 42600 | 280090 | 44175 | 20020 | 30000 | 20043 | 60072 | 12711 | 30000 | 20010 |
50024 | 70058 | 44178 | 14178 | 30000 | 14175 | 30000 | 42652 | 280271 | 44175 | 20020 | 30000 | 20020 | 60000 | 14167 | 30000 | 20010 |
50024 | 70058 | 44178 | 14178 | 30000 | 14175 | 30000 | 42598 | 280166 | 44175 | 20020 | 30000 | 20020 | 60000 | 14167 | 30000 | 20010 |
50024 | 70051 | 44177 | 14177 | 30000 | 14175 | 30000 | 42600 | 280090 | 44175 | 20020 | 30000 | 20020 | 60000 | 14167 | 30000 | 20010 |
50024 | 70051 | 44177 | 14177 | 30000 | 14175 | 30000 | 42600 | 280090 | 44175 | 20020 | 30000 | 20043 | 60072 | 13048 | 30000 | 20010 |
50024 | 70058 | 44178 | 14178 | 30000 | 14175 | 30000 | 42600 | 280090 | 44175 | 20020 | 30000 | 20020 | 60000 | 14167 | 30000 | 20010 |
Code:
casa x0, x1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 22.0044
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
40208 | 220179 | 33496 | 3431 | 30065 | 1772 | 30003 | 845692 | 2698560 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8330 | 30000 | 0 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8330 | 30000 | 0 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10212 | 60072 | 0 | 7428 | 30000 | 0 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8330 | 30000 | 0 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8330 | 30000 | 0 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10212 | 60072 | 0 | 4015 | 30000 | 0 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8330 | 30000 | 0 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8330 | 30000 | 0 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30036 | 603930 | 2699544 | 0 | 33110 | 10212 | 30036 | 0 | 10201 | 60006 | 0 | 8298 | 30000 | 0 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8330 | 30000 | 0 | 10100 |
Result (median cycles for code): 22.0039
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
40027 | 220277 | 33387 | 3346 | 30041 | 1681 | 30003 | 845418 | 2698697 | 34178 | 10021 | 30003 | 10020 | 60000 | 8329 | 30000 | 0 | 10010 |
40024 | 220039 | 38339 | 8339 | 30000 | 4175 | 30000 | 845430 | 2698702 | 34175 | 10020 | 30000 | 10020 | 60000 | 8329 | 30000 | 0 | 10010 |
40024 | 220039 | 38339 | 8339 | 30000 | 4175 | 30036 | 634885 | 2698922 | 33177 | 10032 | 30036 | 10020 | 60000 | 8329 | 30000 | 0 | 10010 |
40024 | 220039 | 38339 | 8339 | 30000 | 4175 | 30036 | 845635 | 2699474 | 34215 | 10032 | 30036 | 10031 | 60066 | 8328 | 30000 | 0 | 10010 |
40024 | 220124 | 38366 | 8337 | 30029 | 4180 | 30033 | 845541 | 2699662 | 34210 | 10031 | 30033 | 10108 | 60528 | 8318 | 30000 | 0 | 10010 |
40024 | 221296 | 38697 | 8320 | 30377 | 4214 | 30036 | 833731 | 2699024 | 34156 | 10032 | 30036 | 10020 | 60000 | 8330 | 30000 | 0 | 10010 |
40024 | 220036 | 38338 | 8338 | 30000 | 4175 | 30000 | 845422 | 2698677 | 34175 | 10020 | 30000 | 10020 | 60000 | 8330 | 30000 | 0 | 10010 |
40024 | 220036 | 38338 | 8338 | 30000 | 4175 | 30000 | 845422 | 2698677 | 34175 | 10020 | 30000 | 10020 | 60000 | 8330 | 30000 | 0 | 10010 |
40024 | 220036 | 38338 | 8338 | 30000 | 4175 | 30036 | 486381 | 2698897 | 32446 | 10032 | 30036 | 10020 | 60000 | 8330 | 30000 | 0 | 10010 |
40024 | 220036 | 38338 | 8338 | 30000 | 4175 | 30000 | 845422 | 2698677 | 34175 | 10020 | 30000 | 10020 | 60000 | 8330 | 30000 | 0 | 10010 |