Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
casab w0, w1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.000
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74004 | 35991 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34908 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 35135 | 3001 | 1 | 3000 | 3000 | 15087 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34455 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34362 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34677 | 3001 | 1 | 3000 | 3000 | 15036 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34342 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34233 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34372 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34288 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
Code:
casab w0, w1, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 7.0060
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50208 | 70262 | 41836 | 11786 | 30050 | 11786 | 30003 | 42913 | 279888 | 44269 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50205 | 70102 | 42008 | 11978 | 30030 | 11977 | 30003 | 42900 | 279895 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70058 | 44267 | 14266 | 30001 | 14265 | 30003 | 42890 | 279913 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70058 | 44267 | 14266 | 30001 | 14265 | 30003 | 42890 | 279911 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70058 | 44267 | 14266 | 30001 | 14265 | 30003 | 42890 | 279913 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70058 | 44267 | 14266 | 30001 | 14265 | 30003 | 42889 | 279912 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70058 | 44267 | 14266 | 30001 | 14265 | 30003 | 42890 | 279911 | 44268 | 20201 | 30003 | 20223 | 60072 | 11874 | 30000 | 20100 |
50204 | 70058 | 44267 | 14266 | 30001 | 14265 | 30003 | 42890 | 279911 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70058 | 44267 | 14266 | 30001 | 14265 | 30003 | 42890 | 279913 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70058 | 44267 | 14266 | 30001 | 14265 | 30003 | 42891 | 279929 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
Result (median cycles for code): 7.0060
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
50028 | 70257 | 41744 | 11696 | 30048 | 11696 | 30003 | 42671 | 280333 | 0 | 44178 | 20021 | 30003 | 0 | 20020 | 60000 | 0 | 14166 | 30000 | 0 | 20010 |
50025 | 70104 | 41808 | 11778 | 30030 | 11777 | 30000 | 42621 | 280201 | 0 | 44175 | 20020 | 30000 | 0 | 20020 | 60000 | 0 | 14166 | 30000 | 0 | 20010 |
50024 | 70058 | 44176 | 14176 | 30000 | 14175 | 30066 | 42944 | 281706 | 0 | 44268 | 20064 | 30066 | 0 | 20042 | 60066 | 0 | 14180 | 30000 | 0 | 20010 |
50024 | 70137 | 44219 | 14190 | 30029 | 14187 | 30033 | 42703 | 281505 | 0 | 44224 | 20042 | 30033 | 0 | 20020 | 60000 | 0 | 14167 | 30000 | 0 | 20010 |
50024 | 70160 | 44224 | 14194 | 30030 | 14191 | 30066 | 42871 | 281898 | 0 | 44272 | 20064 | 30066 | 0 | 20064 | 60132 | 0 | 14196 | 30000 | 0 | 20010 |
50024 | 70349 | 44352 | 14236 | 30116 | 14235 | 30267 | 46589 | 291704 | 0 | 42689 | 20337 | 30305 | 0 | 20021 | 60006 | 0 | 14167 | 30000 | 0 | 20010 |
50024 | 70060 | 44177 | 14177 | 30000 | 14175 | 30000 | 42661 | 280352 | 0 | 44175 | 20020 | 30000 | 0 | 20020 | 60000 | 0 | 14166 | 30000 | 0 | 20010 |
50024 | 70210 | 44262 | 14204 | 30058 | 14203 | 30132 | 43201 | 283430 | 0 | 44364 | 20108 | 30132 | 0 | 20042 | 60066 | 0 | 14183 | 30000 | 0 | 20010 |
50024 | 70208 | 44265 | 14207 | 30058 | 14206 | 30165 | 43251 | 284153 | 0 | 44411 | 20130 | 30165 | 0 | 20152 | 60396 | 0 | 11833 | 30000 | 0 | 20010 |
50024 | 70064 | 44178 | 14178 | 30000 | 14175 | 30000 | 42635 | 280240 | 0 | 44175 | 20020 | 30000 | 0 | 20020 | 60000 | 0 | 14166 | 30000 | 0 | 20010 |
Code:
casab w0, w1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 22.0046
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40205 | 220104 | 36341 | 6311 | 30030 | 3211 | 30003 | 845688 | 2698613 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 8330 | 30000 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10212 | 60072 | 8125 | 30000 | 10100 |
40204 | 220044 | 38432 | 8431 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 8331 | 30000 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845692 | 2698598 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 8330 | 30000 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30036 | 527518 | 2699490 | 0 | 32737 | 10212 | 30036 | 0 | 10212 | 60072 | 4344 | 30000 | 10100 |
40204 | 220044 | 38432 | 8431 | 30001 | 4265 | 30003 | 845692 | 2698604 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 8330 | 30000 | 10100 |
40204 | 220044 | 38431 | 8430 | 30001 | 4265 | 30003 | 845700 | 2698607 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 8323 | 30000 | 10100 |
40204 | 220046 | 38412 | 8411 | 30001 | 4265 | 30003 | 845700 | 2698629 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 8297 | 30000 | 10100 |
40204 | 220046 | 38398 | 8397 | 30001 | 4265 | 30003 | 845700 | 2698629 | 0 | 34268 | 10201 | 30003 | 0 | 10212 | 60072 | 6341 | 30000 | 10100 |
40204 | 220046 | 38398 | 8397 | 30001 | 4265 | 30003 | 845700 | 2698629 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 8297 | 30000 | 10100 |
Result (median cycles for code): 22.0044
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
40027 | 220152 | 33380 | 3344 | 30036 | 1681 | 30003 | 845430 | 2698697 | 34178 | 10021 | 30003 | 10032 | 60072 | 0 | 6721 | 30000 | 0 | 10010 |
40024 | 220046 | 38339 | 8339 | 30000 | 4175 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40025 | 220078 | 35176 | 5146 | 30030 | 2580 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220046 | 38339 | 8339 | 30000 | 4175 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220046 | 38339 | 8339 | 30000 | 4175 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220046 | 38339 | 8339 | 30000 | 4175 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10032 | 60072 | 0 | 6474 | 30000 | 0 | 10010 |
40024 | 220048 | 38339 | 8339 | 30000 | 4175 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220046 | 38339 | 8339 | 30000 | 4175 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220046 | 38339 | 8339 | 30000 | 4175 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220046 | 38339 | 8339 | 30000 | 4175 | 30003 | 845431 | 2698739 | 34178 | 10021 | 30003 | 10020 | 60000 | 0 | 8328 | 30000 | 0 | 10010 |