Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
casalb w0, w1, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 4.000
Issues: 3.000
Integer unit issues: 0.001
Load/store unit issues: 3.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch ldst uop (58) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
74007 | 34561 | 3010 | 1 | 3009 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 0 | 1 | 3000 | 0 | 1000 |
74004 | 34586 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 0 | 1 | 3000 | 0 | 1000 |
74004 | 34233 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 0 | 1 | 3000 | 0 | 1000 |
74004 | 34229 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 0 | 1 | 3000 | 0 | 1000 |
74004 | 34230 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 0 | 1 | 3000 | 0 | 1000 |
74004 | 34235 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 0 | 1 | 3000 | 0 | 1000 |
74004 | 34234 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 0 | 1 | 3000 | 0 | 1000 |
74004 | 34266 | 3001 | 1 | 3000 | 3000 | 15035 | 3000 | 1000 | 3000 | 1000 | 6000 | 0 | 1 | 3000 | 0 | 1000 |
74004 | 34372 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 0 | 1 | 3000 | 0 | 1000 |
74004 | 34444 | 3001 | 1 | 3000 | 3000 | 15033 | 3000 | 1000 | 3000 | 1000 | 6000 | 0 | 1 | 3000 | 0 | 1000 |
Code:
casalb w0, w1, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 9.0054
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50208 | 90254 | 41830 | 11785 | 30045 | 11785 | 30003 | 42898 | 354987 | 44268 | 20201 | 30003 | 20223 | 60072 | 12870 | 30000 | 20100 |
50204 | 90060 | 44267 | 14266 | 30001 | 14265 | 30003 | 42893 | 354972 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 90060 | 44267 | 14266 | 30001 | 14265 | 30003 | 42893 | 354999 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 90060 | 44267 | 14266 | 30001 | 14265 | 30003 | 42893 | 355006 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 90060 | 44267 | 14266 | 30001 | 14265 | 30036 | 38595 | 355443 | 42860 | 20223 | 30036 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 90060 | 44267 | 14266 | 30001 | 14265 | 30003 | 42884 | 354964 | 44268 | 20201 | 30003 | 20201 | 60006 | 14168 | 30000 | 20100 |
50204 | 90060 | 44267 | 14266 | 30001 | 14265 | 30003 | 42893 | 354977 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 90060 | 44267 | 14266 | 30001 | 14265 | 30003 | 42893 | 354997 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 90060 | 44267 | 14266 | 30001 | 14265 | 30003 | 42893 | 355004 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
50204 | 90060 | 44267 | 14266 | 30001 | 14265 | 30003 | 42893 | 355000 | 44268 | 20201 | 30003 | 20201 | 60006 | 14166 | 30000 | 20100 |
Result (median cycles for code): 9.0051
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
50028 | 90924 | 41745 | 11697 | 30048 | 11695 | 30003 | 42611 | 355243 | 44178 | 20021 | 30003 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 90058 | 44176 | 14176 | 30000 | 14175 | 30000 | 42613 | 355223 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 90051 | 44176 | 14176 | 30000 | 14175 | 30000 | 42613 | 355218 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 90051 | 44176 | 14176 | 30000 | 14175 | 30000 | 42613 | 355213 | 44175 | 20020 | 30000 | 20043 | 60072 | 11891 | 30000 | 20010 |
50024 | 90051 | 44176 | 14176 | 30000 | 14175 | 30000 | 42613 | 355223 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 90051 | 44176 | 14176 | 30000 | 14175 | 30000 | 42613 | 355212 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 90051 | 44176 | 14176 | 30000 | 14175 | 30000 | 42613 | 355218 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 90051 | 44176 | 14176 | 30000 | 14175 | 30000 | 42611 | 355221 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 90051 | 44176 | 14176 | 30000 | 14175 | 30000 | 42613 | 355218 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
50024 | 90051 | 44176 | 14176 | 30000 | 14175 | 30000 | 42613 | 355222 | 44175 | 20020 | 30000 | 20020 | 60000 | 14166 | 30000 | 20010 |
Code:
casalb w0, w1, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 24.0039
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40207 | 240155 | 33473 | 3435 | 30038 | 1771 | 30003 | 920613 | 2938282 | 34268 | 10201 | 30003 | 10201 | 60006 | 8328 | 30000 | 10100 |
40204 | 240036 | 38429 | 8428 | 30001 | 4265 | 30003 | 920625 | 2938305 | 34268 | 10201 | 30003 | 10201 | 60006 | 8328 | 30000 | 10100 |
40205 | 240065 | 37514 | 7484 | 30030 | 3798 | 30003 | 920625 | 2938305 | 34268 | 10201 | 30003 | 10201 | 60006 | 8328 | 30000 | 10100 |
40204 | 240036 | 38429 | 8428 | 30001 | 4265 | 30003 | 920625 | 2938328 | 34268 | 10201 | 30003 | 10201 | 60006 | 8328 | 30000 | 10100 |
40204 | 240046 | 38431 | 8430 | 30001 | 4265 | 30003 | 920637 | 2938334 | 34268 | 10201 | 30003 | 10201 | 60006 | 8297 | 30000 | 10100 |
40204 | 240042 | 38398 | 8397 | 30001 | 4265 | 30003 | 920637 | 2938361 | 34268 | 10201 | 30003 | 10212 | 60072 | 7090 | 30000 | 10100 |
40204 | 240042 | 38398 | 8397 | 30001 | 4265 | 30003 | 920641 | 2938370 | 34268 | 10201 | 30003 | 10201 | 60006 | 8297 | 30000 | 10100 |
40204 | 240039 | 38398 | 8397 | 30001 | 4265 | 30003 | 920658 | 2938442 | 34268 | 10201 | 30003 | 10201 | 60006 | 8297 | 30000 | 10100 |
40204 | 240039 | 38398 | 8397 | 30001 | 4265 | 30036 | 600995 | 2939052 | 32855 | 10212 | 30036 | 10201 | 60006 | 8297 | 30000 | 10100 |
40204 | 240039 | 38398 | 8397 | 30001 | 4265 | 30003 | 920637 | 2938357 | 34268 | 10201 | 30003 | 10212 | 60072 | 6056 | 30000 | 10100 |
Result (median cycles for code): 24.0046
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40027 | 240155 | 33383 | 3345 | 30038 | 1681 | 30003 | 920368 | 2938523 | 34178 | 10021 | 30003 | 10032 | 60072 | 5281 | 30000 | 10010 |
40024 | 240044 | 38341 | 8340 | 30001 | 4175 | 30000 | 920380 | 2938529 | 34175 | 10020 | 30000 | 10032 | 60072 | 4135 | 30000 | 10010 |
40024 | 240044 | 38340 | 8340 | 30000 | 4175 | 30000 | 920380 | 2938529 | 34175 | 10020 | 30000 | 10020 | 60000 | 8330 | 30000 | 10010 |
40025 | 240073 | 38234 | 8204 | 30030 | 4109 | 30000 | 920380 | 2938529 | 34175 | 10020 | 30000 | 10020 | 60000 | 8330 | 30000 | 10010 |
40024 | 240044 | 38340 | 8340 | 30000 | 4175 | 30000 | 920380 | 2938529 | 34175 | 10020 | 30000 | 10020 | 60000 | 8330 | 30000 | 10010 |
40024 | 240048 | 38340 | 8340 | 30000 | 4175 | 30000 | 920380 | 2938529 | 34175 | 10020 | 30000 | 10032 | 60072 | 6607 | 30000 | 10010 |
40024 | 240044 | 38340 | 8340 | 30000 | 4175 | 30000 | 920380 | 2938529 | 34175 | 10020 | 30000 | 10020 | 60000 | 8330 | 30000 | 10010 |
40024 | 240044 | 38340 | 8340 | 30000 | 4175 | 30000 | 920380 | 2938529 | 34175 | 10020 | 30000 | 10020 | 60000 | 8330 | 30000 | 10010 |
40024 | 240044 | 38340 | 8340 | 30000 | 4175 | 30036 | 559076 | 2939187 | 32577 | 10032 | 30036 | 10020 | 60000 | 8330 | 30000 | 10010 |
40024 | 240045 | 38340 | 8340 | 30000 | 4175 | 30000 | 920380 | 2938529 | 34175 | 10020 | 30000 | 10020 | 60000 | 8330 | 30000 | 10010 |