Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
ldp x0, x1, [x6], #8
mov x0, 1 mov x1, 2 mov x8, 0
(no loop instructions)
Retires: 3.000
Issues: 2.000
Integer unit issues: 1.001
Load/store unit issues: 1.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
3005 | 1578 | 2057 | 1029 | 1028 | 1028 | 1000 | 13208 | 14565 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1112 | 2001 | 1001 | 1000 | 1000 | 1000 | 13371 | 14569 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1102 | 2001 | 1001 | 1000 | 1000 | 1000 | 13330 | 14565 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1124 | 2001 | 1001 | 1000 | 1000 | 1000 | 13141 | 14594 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1157 | 2001 | 1001 | 1000 | 1000 | 1000 | 13243 | 14871 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1104 | 2001 | 1001 | 1000 | 1000 | 1000 | 13556 | 15110 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1143 | 2001 | 1001 | 1000 | 1000 | 1000 | 13296 | 14622 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1111 | 2001 | 1001 | 1000 | 1000 | 1000 | 13543 | 14772 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1156 | 2001 | 1001 | 1000 | 1000 | 1000 | 13253 | 14525 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1080 | 2001 | 1001 | 1000 | 1000 | 1000 | 13470 | 14885 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
Chain cycles: 3
Code:
ldp x0, x1, [x6], #8 eor x8, x8, x0 eor x8, x8, x0 add x6, x6, x8
mov x0, 1 mov x1, 2 mov x8, 0
(fused SUBS/B.cc loop)
Result (median cycles for code, minus 3 chain cycles): 5.9003
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
60209 | 126516 | 51504 | 41499 | 10005 | 40348 | 10003 | 2365307 | 701466 | 50209 | 40212 | 20006 | 70221 | 20008 | 41369 | 10000 | 50100 |
60204 | 89010 | 51469 | 41469 | 10000 | 40206 | 10003 | 2359655 | 699824 | 50209 | 40212 | 20008 | 70221 | 20008 | 41368 | 10000 | 50100 |
60204 | 88989 | 51469 | 41469 | 10000 | 40206 | 10003 | 2359735 | 699881 | 50209 | 40212 | 20008 | 70221 | 20008 | 41368 | 10000 | 50100 |
60204 | 88989 | 51469 | 41469 | 10000 | 40206 | 10003 | 2359735 | 699881 | 50209 | 40212 | 20008 | 70221 | 20008 | 41368 | 10000 | 50100 |
60204 | 88989 | 51469 | 41469 | 10000 | 40206 | 10013 | 2360032 | 699964 | 50253 | 40252 | 20028 | 70221 | 20008 | 41368 | 10000 | 50100 |
60204 | 88989 | 51469 | 41469 | 10000 | 40206 | 10003 | 2359735 | 699881 | 50209 | 40212 | 20008 | 70221 | 20008 | 41369 | 10000 | 50100 |
60204 | 88989 | 51469 | 41469 | 10000 | 40206 | 10003 | 2359735 | 699881 | 50209 | 40212 | 20008 | 70221 | 20008 | 41369 | 10000 | 50100 |
60204 | 88989 | 51469 | 41469 | 10000 | 40206 | 10003 | 2359303 | 699753 | 50209 | 40212 | 20008 | 70221 | 20008 | 41369 | 10000 | 50100 |
60204 | 88989 | 51469 | 41469 | 10000 | 40206 | 10003 | 2359735 | 699881 | 50209 | 40212 | 20008 | 70291 | 20028 | 41378 | 10000 | 50100 |
60204 | 89317 | 51470 | 41470 | 10000 | 40206 | 10003 | 2359735 | 699881 | 50209 | 40212 | 20008 | 70221 | 20008 | 41369 | 10000 | 50100 |
Result (median cycles for code, minus 3 chain cycles): 5.9018
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | map simd uop inputs (81) | ? int output thing (e9) | ? ldst retires (ed) | ? simd retires (ee) | ? int retires (ef) |
60029 | 124177 | 51324 | 41319 | 10005 | 40166 | 10003 | 2359591 | 700595 | 50029 | 40032 | 20008 | 70020 | 20000 | 0 | 41277 | 10000 | 0 | 50010 |
60024 | 88986 | 51287 | 41287 | 10000 | 40020 | 10000 | 2359318 | 700492 | 50020 | 40020 | 20000 | 70020 | 20000 | 0 | 41276 | 10000 | 0 | 50010 |
60024 | 88969 | 51286 | 41286 | 10000 | 40020 | 10000 | 2360749 | 700916 | 50020 | 40020 | 20000 | 70020 | 20000 | 0 | 41276 | 10000 | 0 | 50010 |
60024 | 89190 | 51288 | 41288 | 10000 | 40020 | 10000 | 2362261 | 701370 | 50020 | 40020 | 20000 | 70300 | 20088 | 0 | 41318 | 10000 | 0 | 50010 |
60024 | 89132 | 51287 | 41287 | 10000 | 40020 | 10000 | 2359318 | 700492 | 50020 | 40020 | 20000 | 70111 | 20028 | 0 | 41284 | 10000 | 0 | 50010 |
60024 | 88984 | 51287 | 41287 | 10000 | 40020 | 10000 | 2359642 | 700588 | 50020 | 40020 | 20000 | 70020 | 20000 | 0 | 41276 | 10000 | 0 | 50010 |
60024 | 88984 | 51287 | 41287 | 10000 | 40020 | 10000 | 2358913 | 700372 | 50020 | 40020 | 20000 | 70020 | 20000 | 0 | 41275 | 10000 | 0 | 50010 |
60024 | 88977 | 51287 | 41287 | 10000 | 40020 | 10000 | 2358724 | 700316 | 50020 | 40020 | 20000 | 70020 | 20000 | 0 | 41276 | 10000 | 0 | 50010 |
60024 | 88977 | 51287 | 41287 | 10000 | 40020 | 10000 | 2358319 | 700196 | 50020 | 40020 | 20000 | 70020 | 20000 | 0 | 41275 | 10000 | 0 | 50010 |
60024 | 89278 | 51286 | 41286 | 10000 | 40020 | 10000 | 2358373 | 700212 | 50020 | 40020 | 20000 | 70020 | 20000 | 0 | 41275 | 10000 | 0 | 50010 |
Chain cycles: 3
Code:
ldp x0, x1, [x6], #8 eor x8, x8, x1 eor x8, x8, x1 add x6, x6, x8
mov x0, 1 mov x1, 2 mov x8, 0
(fused SUBS/B.cc loop)
Result (median cycles for code, minus 3 chain cycles): 5.9016
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
60209 | 123915 | 51505 | 41500 | 10005 | 40348 | 10003 | 2359574 | 699800 | 50209 | 40212 | 20008 | 70221 | 20008 | 41370 | 10000 | 50100 |
60205 | 89032 | 51480 | 41478 | 10002 | 40240 | 10003 | 2360086 | 699985 | 50209 | 40212 | 20008 | 70221 | 20008 | 41370 | 10000 | 50100 |
60204 | 89002 | 51470 | 41470 | 10000 | 40206 | 10003 | 2360086 | 699985 | 50209 | 40212 | 20008 | 70221 | 20008 | 41370 | 10000 | 50100 |
60204 | 89002 | 51470 | 41470 | 10000 | 40206 | 10003 | 2360086 | 699985 | 50209 | 40212 | 20008 | 70221 | 20008 | 41370 | 10000 | 50100 |
60204 | 89002 | 51470 | 41470 | 10000 | 40206 | 10003 | 2360086 | 699985 | 50209 | 40212 | 20008 | 70221 | 20008 | 41370 | 10000 | 50100 |
60204 | 89002 | 51470 | 41470 | 10000 | 40206 | 10003 | 2361868 | 700513 | 50209 | 40212 | 20008 | 70221 | 20008 | 41370 | 10000 | 50100 |
60204 | 89002 | 51470 | 41470 | 10000 | 40206 | 10003 | 2362813 | 700798 | 50209 | 40212 | 20008 | 70221 | 20008 | 41370 | 10000 | 50100 |
60205 | 89169 | 51482 | 41480 | 10002 | 40240 | 10003 | 2360383 | 700073 | 50209 | 40212 | 20008 | 70221 | 20008 | 41370 | 10000 | 50100 |
60204 | 89002 | 51470 | 41470 | 10000 | 40206 | 10003 | 2360086 | 699985 | 50209 | 40212 | 20008 | 70221 | 20008 | 41370 | 10000 | 50100 |
60204 | 89002 | 51470 | 41470 | 10000 | 40206 | 10003 | 2360086 | 699985 | 50209 | 40212 | 20008 | 70221 | 20008 | 41370 | 10000 | 50100 |
Result (median cycles for code, minus 3 chain cycles): 5.8977
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
60030 | 123908 | 51332 | 41325 | 10007 | 40200 | 10003 | 2365288 | 702284 | 50029 | 40032 | 20008 | 70020 | 20000 | 41278 | 10000 | 50010 |
60025 | 89559 | 51301 | 41299 | 10002 | 40059 | 10000 | 2365421 | 702268 | 50020 | 40020 | 20000 | 70020 | 20000 | 41275 | 10000 | 50010 |
60024 | 88956 | 51286 | 41286 | 10000 | 40020 | 10000 | 2358994 | 700396 | 50020 | 40020 | 20000 | 70020 | 20000 | 41275 | 10000 | 50010 |
60024 | 88956 | 51286 | 41286 | 10000 | 40020 | 10000 | 2358994 | 700396 | 50020 | 40020 | 20000 | 70020 | 20000 | 41275 | 10000 | 50010 |
60024 | 88956 | 51286 | 41286 | 10000 | 40020 | 10000 | 2358562 | 700268 | 50020 | 40020 | 20000 | 70020 | 20000 | 41275 | 10000 | 50010 |
60024 | 88956 | 51286 | 41286 | 10000 | 40020 | 10000 | 2358562 | 700268 | 50020 | 40020 | 20000 | 70020 | 20000 | 41275 | 10000 | 50010 |
60025 | 88986 | 51296 | 41294 | 10002 | 40060 | 10000 | 2358994 | 700396 | 50020 | 40020 | 20000 | 70020 | 20000 | 41276 | 10000 | 50010 |
60024 | 88956 | 51286 | 41286 | 10000 | 40020 | 10000 | 2358562 | 700268 | 50020 | 40020 | 20000 | 70020 | 20000 | 41275 | 10000 | 50010 |
60024 | 88956 | 51286 | 41286 | 10000 | 40020 | 10000 | 2358562 | 700268 | 50020 | 40020 | 20000 | 70020 | 20000 | 41275 | 10000 | 50010 |
60024 | 88956 | 51286 | 41286 | 10000 | 40020 | 10000 | 2358562 | 700268 | 50020 | 40020 | 20000 | 70020 | 20000 | 41276 | 10000 | 50010 |
Count: 8
Code:
ldp x0, x1, [x6], #8 ldp x0, x1, [x7], #8 ldp x0, x1, [x8], #8 ldp x0, x1, [x9], #8 ldp x0, x1, [x10], #8 ldp x0, x1, [x11], #8 ldp x0, x1, [x12], #8 ldp x0, x1, [x13], #8
mov x7, x6 mov x8, x6 mov x9, x6 mov x10, x6 mov x11, x6 mov x12, x6 mov x13, x6
(fused SUBS/B.cc loop)
Result (median cycles for code divided by count): 0.7632
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
240209 | 62184 | 160368 | 80240 | 80128 | 80241 | 80008 | 240788 | 252069 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61075 | 160111 | 80106 | 80005 | 80108 | 80008 | 240790 | 252012 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61055 | 160111 | 80106 | 80005 | 80108 | 80008 | 240722 | 251869 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61053 | 160111 | 80106 | 80005 | 80108 | 80008 | 240722 | 251877 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61056 | 160111 | 80106 | 80005 | 80108 | 80008 | 240722 | 251885 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61060 | 160111 | 80106 | 80005 | 80108 | 80008 | 240722 | 251885 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61053 | 160111 | 80106 | 80005 | 80108 | 80008 | 240722 | 251884 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61054 | 160111 | 80106 | 80005 | 80108 | 80008 | 240722 | 251872 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61056 | 160111 | 80106 | 80005 | 80108 | 80008 | 240722 | 251854 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61057 | 160111 | 80106 | 80005 | 80108 | 80008 | 240722 | 251877 | 160116 | 80208 | 160016 | 80235 | 160073 | 80033 | 80000 | 160100 |
Result (median cycles for code divided by count): 0.7627
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
240029 | 62140 | 160288 | 80149 | 80139 | 80151 | 80009 | 240526 | 252197 | 160027 | 80028 | 160018 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61017 | 160011 | 80011 | 80000 | 80010 | 80000 | 240495 | 252004 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61021 | 160011 | 80011 | 80000 | 80010 | 80000 | 240494 | 252024 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61017 | 160011 | 80011 | 80000 | 80010 | 80000 | 240500 | 252019 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61020 | 160011 | 80011 | 80000 | 80010 | 80000 | 240494 | 252090 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61018 | 160011 | 80011 | 80000 | 80010 | 80000 | 240494 | 252024 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61017 | 160011 | 80011 | 80000 | 80010 | 80000 | 240493 | 252026 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61018 | 160011 | 80011 | 80000 | 80010 | 80000 | 240490 | 252027 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61018 | 160011 | 80011 | 80000 | 80010 | 80000 | 240493 | 252022 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61016 | 160011 | 80011 | 80000 | 80010 | 80000 | 240493 | 252017 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |