Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
casa w0, w1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.000
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
74006 | 34555 | 3007 | 1 | 3006 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34232 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34228 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 35289 | 3001 | 1 | 3000 | 3000 | 15031 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34301 | 3001 | 1 | 3000 | 3000 | 15031 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34244 | 3001 | 1 | 3000 | 3000 | 15031 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34249 | 3001 | 1 | 3000 | 3000 | 15031 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34253 | 3001 | 1 | 3000 | 3000 | 15031 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34288 | 3001 | 1 | 3000 | 3000 | 15031 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
74004 | 34251 | 3001 | 1 | 3000 | 3000 | 15031 | 3000 | 1000 | 3000 | 1000 | 6000 | 1 | 3000 | 1000 |
Code:
casa w0, w1, [x6] add x6, x6, 4
(fused SUBS/B.cc loop)
Result (median cycles for code): 7.0051
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50210 | 70360 | 41861 | 11792 | 30069 | 11793 | 30036 | 35894 | 280491 | 41961 | 20223 | 30036 | 20201 | 60006 | 14166 | 30000 | 20100 |
50237 | 70357 | 42018 | 11955 | 30063 | 11953 | 30003 | 42858 | 279845 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70078 | 44269 | 14268 | 30001 | 14265 | 30003 | 42921 | 279916 | 44269 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70051 | 44267 | 14266 | 30001 | 14265 | 30003 | 42903 | 279927 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70051 | 44267 | 14266 | 30001 | 14265 | 30003 | 42902 | 279925 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70051 | 44267 | 14266 | 30001 | 14265 | 30003 | 42903 | 279927 | 44268 | 20201 | 30003 | 20223 | 60072 | 12385 | 30000 | 20100 |
50204 | 70051 | 44267 | 14266 | 30001 | 14265 | 30003 | 42901 | 279937 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70051 | 44267 | 14266 | 30001 | 14265 | 30036 | 41650 | 280351 | 43866 | 20223 | 30036 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70051 | 44267 | 14266 | 30001 | 14265 | 30003 | 42902 | 279923 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 70051 | 44267 | 14266 | 30001 | 14265 | 30003 | 42903 | 279927 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
Result (median cycles for code): 7.0058
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50030 | 70489 | 41771 | 11702 | 30069 | 11703 | 30003 | 42625 | 280263 | 44179 | 20021 | 30003 | 20020 | 60000 | 14167 | 30000 | 20010 |
50024 | 70060 | 44176 | 14176 | 30000 | 14175 | 30003 | 42683 | 280337 | 44178 | 20021 | 30003 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 70064 | 44178 | 14178 | 30000 | 14175 | 30000 | 42655 | 280278 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 70058 | 44176 | 14176 | 30000 | 14175 | 30000 | 42742 | 280487 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 70062 | 44177 | 14177 | 30000 | 14175 | 30000 | 42661 | 280336 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 70058 | 44176 | 14176 | 30000 | 14175 | 30000 | 42659 | 280302 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 70058 | 44176 | 14176 | 30000 | 14175 | 30036 | 39514 | 280733 | 43146 | 20043 | 30036 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 70058 | 44176 | 14176 | 30000 | 14175 | 30000 | 42592 | 280192 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 70058 | 44176 | 14176 | 30000 | 14175 | 30000 | 42599 | 280220 | 44175 | 20020 | 30000 | 20020 | 60000 | 14167 | 30000 | 20010 |
50024 | 70058 | 44176 | 14176 | 30000 | 14175 | 30000 | 42596 | 280187 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
Code:
casa w0, w1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 22.0039
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | ldst uops in schedulers (5b) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map simd uop (7e) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
40207 | 220298 | 33477 | 3436 | 0 | 30041 | 1771 | 0 | 30003 | 845700 | 2698563 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8297 | 30000 | 0 | 10100 |
40204 | 220046 | 38398 | 8397 | 0 | 30001 | 4265 | 0 | 30003 | 845725 | 2698698 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8297 | 30000 | 0 | 10100 |
40204 | 220039 | 38398 | 8397 | 0 | 30001 | 4265 | 0 | 30036 | 566773 | 2698847 | 0 | 32930 | 10214 | 30036 | 0 | 10201 | 60006 | 0 | 8297 | 30000 | 0 | 10100 |
40204 | 220039 | 38398 | 8397 | 0 | 30001 | 4265 | 0 | 30003 | 845703 | 2698642 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8297 | 30000 | 0 | 10100 |
40204 | 220039 | 38398 | 8397 | 0 | 30001 | 4265 | 0 | 30003 | 845707 | 2698661 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8297 | 30000 | 0 | 10100 |
40205 | 220067 | 37603 | 7573 | 0 | 30030 | 3856 | 0 | 30003 | 845722 | 2698710 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8297 | 30000 | 0 | 10100 |
40204 | 220045 | 38400 | 8399 | 0 | 30001 | 4265 | 0 | 30003 | 845700 | 2698629 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8297 | 30000 | 0 | 10100 |
40204 | 220039 | 38398 | 8397 | 0 | 30001 | 4265 | 0 | 30003 | 845700 | 2698629 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8297 | 30000 | 0 | 10100 |
40204 | 220039 | 38398 | 8397 | 0 | 30001 | 4265 | 0 | 30003 | 845700 | 2698629 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8297 | 30000 | 0 | 10100 |
40205 | 220078 | 33639 | 3609 | 0 | 30030 | 1857 | 0 | 30003 | 845735 | 2698727 | 0 | 34268 | 10201 | 30003 | 0 | 10201 | 60006 | 0 | 8297 | 30000 | 0 | 10100 |
Result (median cycles for code): 22.0039
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
40028 | 220182 | 33411 | 3344 | 0 | 30067 | 1683 | 0 | 30003 | 845409 | 2698595 | 34178 | 10021 | 30003 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220039 | 38339 | 8339 | 0 | 30000 | 4175 | 0 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10031 | 60066 | 0 | 8016 | 30000 | 0 | 10010 |
40024 | 220056 | 38339 | 8339 | 0 | 30000 | 4175 | 0 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
124143 | 523808 | 110640 | 57383 | 1044 | 52213 | 53385 | 986 | 30003 | 845449 | 2698903 | 34178 | 10021 | 30003 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220051 | 38339 | 8339 | 0 | 30000 | 4175 | 0 | 30000 | 845449 | 2698910 | 34175 | 10020 | 30000 | 10032 | 60072 | 0 | 7350 | 30000 | 0 | 10010 |
40024 | 220050 | 38339 | 8339 | 0 | 30000 | 4175 | 0 | 30000 | 845449 | 2698908 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220039 | 38339 | 8339 | 0 | 30000 | 4175 | 0 | 30000 | 845409 | 2698578 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220040 | 38339 | 8339 | 0 | 30000 | 4175 | 0 | 30003 | 845397 | 2698566 | 34178 | 10021 | 30003 | 10021 | 60006 | 0 | 8329 | 30000 | 0 | 10010 |
40024 | 220037 | 38338 | 8338 | 0 | 30000 | 4175 | 0 | 30000 | 845397 | 2698549 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8328 | 30000 | 0 | 10010 |
40025 | 220095 | 35738 | 5708 | 0 | 30030 | 2866 | 0 | 30000 | 845397 | 2698549 | 34175 | 10020 | 30000 | 10020 | 60000 | 0 | 8328 | 30000 | 0 | 10010 |