Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
ldp x0, x1, [x6, #8]!
mov x0, 1 mov x1, 2 mov x8, 0
(no loop instructions)
Retires: 3.000
Issues: 2.000
Integer unit issues: 1.001
Load/store unit issues: 1.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
3005 | 1515 | 2057 | 1029 | 1028 | 1028 | 1000 | 13182 | 14528 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1069 | 2001 | 1001 | 1000 | 1000 | 1000 | 13265 | 15121 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1084 | 2001 | 1001 | 1000 | 1000 | 1000 | 13183 | 14145 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1105 | 2001 | 1001 | 1000 | 1000 | 1000 | 13528 | 14844 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1094 | 2001 | 1001 | 1000 | 1000 | 1000 | 13352 | 14536 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1066 | 2001 | 1001 | 1000 | 1000 | 1028 | 13782 | 14703 | 2056 | 1028 | 2057 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1062 | 2001 | 1001 | 1000 | 1000 | 1000 | 13441 | 14678 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1084 | 2001 | 1001 | 1000 | 1000 | 1000 | 13398 | 14797 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1083 | 2001 | 1001 | 1000 | 1000 | 1000 | 13763 | 15374 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
3004 | 1112 | 2001 | 1001 | 1000 | 1000 | 1000 | 13737 | 14848 | 2000 | 1000 | 2000 | 1000 | 2000 | 1001 | 1000 | 2000 |
Chain cycles: 3
Code:
ldp x0, x1, [x6, #8]! eor x8, x8, x0 eor x8, x8, x0 add x6, x6, x8
mov x0, 1 mov x1, 2 mov x8, 0
(fused SUBS/B.cc loop)
Result (median cycles for code, minus 3 chain cycles): 5.9003
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
60209 | 126479 | 51504 | 41499 | 10005 | 40348 | 10003 | 2362869 | 700739 | 50209 | 40212 | 20006 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 89376 | 51474 | 41474 | 10000 | 40206 | 10003 | 2367230 | 702098 | 50209 | 40212 | 20008 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 89003 | 51473 | 41473 | 10000 | 40206 | 10003 | 2360129 | 699993 | 50209 | 40212 | 20008 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 89003 | 51473 | 41473 | 10000 | 40206 | 10003 | 2360129 | 699993 | 50209 | 40212 | 20008 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 89003 | 51473 | 41473 | 10000 | 40206 | 10003 | 2360129 | 699993 | 50209 | 40212 | 20008 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 89003 | 51473 | 41473 | 10000 | 40206 | 10003 | 2360129 | 699993 | 50209 | 40212 | 20008 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 89003 | 51473 | 41473 | 10000 | 40206 | 10003 | 2360129 | 699993 | 50209 | 40212 | 20008 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 89003 | 51473 | 41473 | 10000 | 40206 | 10003 | 2360129 | 699993 | 50209 | 40212 | 20008 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 89003 | 51473 | 41473 | 10000 | 40206 | 10003 | 2360129 | 699993 | 50209 | 40212 | 20008 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 89003 | 51473 | 41473 | 10000 | 40206 | 10003 | 2360129 | 699993 | 50209 | 40212 | 20008 | 70221 | 20008 | 41373 | 10000 | 50100 |
Result (median cycles for code, minus 3 chain cycles): 5.9016
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
60030 | 123857 | 51334 | 41327 | 10007 | 40199 | 10003 | 2360282 | 700795 | 50029 | 40032 | 20008 | 70041 | 20008 | 41284 | 10000 | 50010 |
60024 | 89016 | 51294 | 41294 | 10000 | 40020 | 10000 | 2360630 | 700876 | 50020 | 40020 | 20000 | 70020 | 20000 | 41284 | 10000 | 50010 |
60024 | 89016 | 51294 | 41294 | 10000 | 40020 | 10000 | 2365355 | 702279 | 50020 | 40020 | 20000 | 70020 | 20000 | 41284 | 10000 | 50010 |
60024 | 89171 | 51294 | 41294 | 10000 | 40020 | 10000 | 2366516 | 702620 | 50020 | 40020 | 20000 | 70020 | 20000 | 41284 | 10000 | 50010 |
60024 | 89151 | 51294 | 41294 | 10000 | 40020 | 10000 | 2364275 | 701958 | 50020 | 40020 | 20000 | 70020 | 20000 | 41284 | 10000 | 50010 |
60024 | 89016 | 51294 | 41294 | 10000 | 40020 | 10000 | 2360630 | 700876 | 50020 | 40020 | 20000 | 70020 | 20000 | 41284 | 10000 | 50010 |
60024 | 89016 | 51294 | 41294 | 10000 | 40020 | 10000 | 2360630 | 700876 | 50020 | 40020 | 20000 | 70020 | 20000 | 41284 | 10000 | 50010 |
60024 | 89016 | 51294 | 41294 | 10000 | 40020 | 10000 | 2360630 | 700876 | 50020 | 40020 | 20000 | 70020 | 20000 | 41284 | 10000 | 50010 |
60024 | 89016 | 51294 | 41294 | 10000 | 40020 | 10000 | 2360630 | 700876 | 50020 | 40020 | 20000 | 70020 | 20000 | 41284 | 10000 | 50010 |
60024 | 89016 | 51294 | 41294 | 10000 | 40020 | 10000 | 2360630 | 700876 | 50020 | 40020 | 20000 | 70020 | 20000 | 41284 | 10000 | 50010 |
Chain cycles: 3
Code:
ldp x0, x1, [x6, #8]! eor x8, x8, x1 eor x8, x8, x1 add x6, x6, x8
mov x0, 1 mov x1, 2 mov x8, 0
(fused SUBS/B.cc loop)
Result (median cycles for code, minus 3 chain cycles): 5.8990
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
62287 | 140495 | 53303 | 42596 | 10707 | 41408 | 10003 | 2374170 | 704122 | 50209 | 40212 | 20008 | 70221 | 20008 | 41371 | 10000 | 50100 |
60204 | 88974 | 51471 | 41471 | 10000 | 40206 | 10012 | 2370130 | 703037 | 50246 | 40248 | 20024 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 89180 | 51472 | 41472 | 10000 | 40206 | 10003 | 2367473 | 702169 | 50209 | 40212 | 20008 | 70221 | 20008 | 41372 | 10000 | 50100 |
60204 | 89134 | 51472 | 41472 | 10000 | 40206 | 10003 | 2359778 | 699889 | 50209 | 40212 | 20008 | 70221 | 20008 | 41372 | 10000 | 50100 |
60204 | 88990 | 51472 | 41472 | 10000 | 40206 | 10003 | 2359778 | 699889 | 50209 | 40212 | 20008 | 70221 | 20008 | 41374 | 10000 | 50100 |
60204 | 89482 | 51472 | 41472 | 10000 | 40206 | 10013 | 2367334 | 702124 | 50253 | 40252 | 20028 | 70221 | 20008 | 41373 | 10000 | 50100 |
60204 | 88990 | 51472 | 41472 | 10000 | 40206 | 10003 | 2359778 | 699889 | 50209 | 40212 | 20008 | 70221 | 20008 | 41372 | 10000 | 50100 |
60204 | 88990 | 51472 | 41472 | 10000 | 40206 | 10003 | 2359778 | 699889 | 50209 | 40212 | 20008 | 70221 | 20008 | 41372 | 10000 | 50100 |
60204 | 88990 | 51472 | 41472 | 10000 | 40206 | 10003 | 2359778 | 699889 | 50209 | 40212 | 20008 | 70221 | 20008 | 41372 | 10000 | 50100 |
60204 | 88990 | 51472 | 41472 | 10000 | 40206 | 10003 | 2359778 | 699889 | 50209 | 40212 | 20008 | 70221 | 20008 | 41372 | 10000 | 50100 |
Result (median cycles for code, minus 3 chain cycles): 5.8990
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
60029 | 123902 | 51322 | 41317 | 10005 | 40166 | 10003 | 2359459 | 700520 | 50029 | 40032 | 20008 | 70020 | 20000 | 41279 | 10000 | 50010 |
60024 | 89112 | 51294 | 41294 | 10000 | 40026 | 10000 | 2360279 | 700772 | 50020 | 40020 | 20000 | 70020 | 20000 | 41282 | 10000 | 50010 |
60024 | 88990 | 51292 | 41292 | 10000 | 40020 | 10000 | 2359928 | 700668 | 50020 | 40020 | 20000 | 70020 | 20000 | 41282 | 10000 | 50010 |
60024 | 88990 | 51292 | 41292 | 10000 | 40020 | 10000 | 2359928 | 700668 | 50020 | 40020 | 20000 | 70020 | 20000 | 41282 | 10000 | 50010 |
60024 | 88990 | 51292 | 41292 | 10000 | 40020 | 10000 | 2359928 | 700668 | 50020 | 40020 | 20000 | 70020 | 20000 | 41282 | 10000 | 50010 |
60024 | 88990 | 51292 | 41292 | 10000 | 40020 | 10000 | 2359928 | 700668 | 50020 | 40020 | 20000 | 70020 | 20000 | 41282 | 10000 | 50010 |
60024 | 88990 | 51292 | 41292 | 10000 | 40020 | 10013 | 2360609 | 700902 | 50073 | 40072 | 20028 | 70020 | 20000 | 41283 | 10000 | 50010 |
60024 | 88990 | 51292 | 41292 | 10000 | 40020 | 10000 | 2359928 | 700668 | 50020 | 40020 | 20000 | 70020 | 20000 | 41282 | 10000 | 50010 |
60024 | 88990 | 51292 | 41292 | 10000 | 40020 | 10000 | 2359928 | 700668 | 50020 | 40020 | 20000 | 70020 | 20000 | 41282 | 10000 | 50010 |
60024 | 88990 | 51292 | 41292 | 10000 | 40020 | 10000 | 2359928 | 700668 | 50020 | 40020 | 20000 | 70020 | 20000 | 41282 | 10000 | 50010 |
Count: 8
Code:
ldp x0, x1, [x6, #8]! ldp x0, x1, [x7, #8]! ldp x0, x1, [x8, #8]! ldp x0, x1, [x9, #8]! ldp x0, x1, [x10, #8]! ldp x0, x1, [x11, #8]! ldp x0, x1, [x12, #8]! ldp x0, x1, [x13, #8]!
mov x7, x6 mov x8, x6 mov x9, x6 mov x10, x6 mov x11, x6 mov x12, x6 mov x13, x6
(fused SUBS/B.cc loop)
Result (median cycles for code divided by count): 0.7633
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
240209 | 62328 | 160369 | 80240 | 80129 | 80241 | 80008 | 240762 | 251863 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61064 | 160111 | 80106 | 80005 | 80108 | 80008 | 240760 | 251861 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61062 | 160111 | 80106 | 80005 | 80108 | 80008 | 240760 | 251873 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61059 | 160111 | 80106 | 80005 | 80108 | 80008 | 240762 | 251868 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61063 | 160111 | 80106 | 80005 | 80108 | 80008 | 240760 | 251903 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61444 | 160319 | 80210 | 80109 | 80212 | 80008 | 240770 | 251681 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61056 | 160111 | 80106 | 80005 | 80108 | 80062 | 240910 | 253508 | 160224 | 80262 | 160126 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61153 | 160167 | 80134 | 80033 | 80135 | 80008 | 240762 | 251858 | 160116 | 80208 | 160016 | 80236 | 160072 | 80034 | 80000 | 160100 |
240204 | 61059 | 160111 | 80106 | 80005 | 80108 | 80008 | 240762 | 251831 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
240204 | 61155 | 160167 | 80134 | 80033 | 80135 | 80008 | 240762 | 251840 | 160116 | 80208 | 160016 | 80208 | 160016 | 80006 | 80000 | 160100 |
Result (median cycles for code divided by count): 0.7627
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
240029 | 62123 | 160275 | 80149 | 80126 | 80151 | 80008 | 240501 | 251921 | 160026 | 80028 | 160016 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61020 | 160011 | 80011 | 80000 | 80010 | 80000 | 240477 | 251879 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61017 | 160011 | 80011 | 80000 | 80010 | 80000 | 240481 | 251899 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61022 | 160011 | 80011 | 80000 | 80010 | 80000 | 240475 | 251899 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240025 | 61116 | 160077 | 80044 | 80033 | 80046 | 80000 | 240476 | 251885 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61013 | 160011 | 80011 | 80000 | 80010 | 80000 | 240481 | 251898 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61015 | 160011 | 80011 | 80000 | 80010 | 80000 | 240473 | 251898 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61015 | 160011 | 80011 | 80000 | 80010 | 80000 | 240479 | 251905 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61015 | 160011 | 80011 | 80000 | 80010 | 80000 | 240480 | 251874 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |
240024 | 61014 | 160011 | 80011 | 80000 | 80010 | 80000 | 240478 | 251889 | 160010 | 80020 | 160000 | 80020 | 160000 | 80001 | 80000 | 160010 |