Apple Microarchitecture Research by Dougall Johnson M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions M1/A14 E-core (Icestorm): Overview | Base Instructions | SIMD and FP Instructions
Code:
stclrlb w0, [x6] nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
mov x0, 0
(no loop instructions)
Retires (minus 70 nops): 3.000
Issues: 3.001
Integer unit issues: 1.002
Load/store unit issues: 2.000
SIMD/FP unit issues: 0.000
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
73006 | 34795 | 3040 | 1026 | 0 | 2014 | 1007 | 0 | 2002 | 7823 | 10604 | 3003 | 1001 | 2002 | 2000 | 4000 | 1004 | 2000 | 1000 |
73004 | 34850 | 3003 | 1003 | 0 | 2000 | 1000 | 0 | 2000 | 7789 | 10562 | 3000 | 1000 | 2000 | 2000 | 4000 | 1003 | 2000 | 1000 |
73004 | 35847 | 3003 | 1003 | 0 | 2000 | 1000 | 0 | 2000 | 8150 | 10901 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 35351 | 3005 | 1003 | 0 | 2002 | 1001 | 0 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 35113 | 3005 | 1003 | 0 | 2002 | 1001 | 0 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
31611 | 33584 | 9854 | 5313 | 37 | 4504 | 4955 | 32 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34371 | 3005 | 1003 | 0 | 2002 | 1001 | 0 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34382 | 3005 | 1003 | 0 | 2002 | 1001 | 0 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34270 | 3002 | 1002 | 0 | 2000 | 1000 | 0 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2000 | 4000 | 1002 | 2000 | 1000 |
73004 | 34258 | 3002 | 1002 | 0 | 2000 | 1000 | 0 | 2000 | 7762 | 10513 | 3000 | 1000 | 2000 | 2006 | 4012 | 1006 | 2000 | 1000 |
Code:
stclrlb w0, [x6] add x6, x6, 2
(fused SUBS/B.cc loop)
Result (median cycles for code): 6.0062
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40206 | 60312 | 40250 | 20206 | 20044 | 20152 | 20005 | 115576 | 95845 | 40110 | 20205 | 20005 | 30208 | 40009 | 20007 | 20000 | 20100 |
40204 | 60066 | 40107 | 20107 | 20000 | 20102 | 20005 | 115610 | 95878 | 40110 | 20205 | 20005 | 30203 | 40004 | 20008 | 20000 | 20100 |
40205 | 60110 | 40175 | 20143 | 20032 | 20134 | 20002 | 115670 | 95555 | 40104 | 20202 | 20002 | 30203 | 40004 | 20008 | 20000 | 20100 |
40204 | 60062 | 40108 | 20108 | 20000 | 20102 | 20034 | 102231 | 101542 | 40168 | 20234 | 20034 | 30203 | 40004 | 20006 | 20000 | 20100 |
40204 | 60062 | 40108 | 20108 | 20000 | 20102 | 20002 | 115687 | 95564 | 40104 | 20202 | 20002 | 30203 | 40004 | 20008 | 20000 | 20100 |
40204 | 60062 | 40108 | 20108 | 20000 | 20102 | 20002 | 115689 | 95567 | 40104 | 20202 | 20002 | 30203 | 40004 | 20008 | 20000 | 20100 |
40204 | 60062 | 40108 | 20108 | 20000 | 20102 | 20002 | 115689 | 95569 | 40104 | 20202 | 20002 | 30203 | 40004 | 20008 | 20000 | 20100 |
40204 | 60062 | 40108 | 20108 | 20000 | 20102 | 20033 | 102236 | 101426 | 40168 | 20235 | 20033 | 30203 | 40004 | 20006 | 20000 | 20100 |
40205 | 60120 | 40179 | 20145 | 20034 | 20136 | 20002 | 115692 | 95591 | 40104 | 20202 | 20002 | 30254 | 40070 | 20045 | 20000 | 20100 |
40204 | 60062 | 40110 | 20107 | 20003 | 20105 | 20005 | 115436 | 95713 | 40110 | 20205 | 20005 | 30203 | 40004 | 20006 | 20000 | 20100 |
Result (median cycles for code): 6.0065
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
40026 | 60284 | 40144 | 20106 | 20038 | 20060 | 20002 | 115368 | 95377 | 40014 | 20022 | 20002 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115356 | 95358 | 40010 | 20020 | 20000 | 30020 | 40000 | 20005 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115381 | 95381 | 40010 | 20020 | 20000 | 30020 | 40000 | 20007 | 20000 | 20010 |
40024 | 60058 | 40017 | 20017 | 20000 | 20010 | 20000 | 115354 | 95355 | 40010 | 20020 | 20000 | 30020 | 40000 | 20007 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115375 | 95371 | 40010 | 20020 | 20000 | 30020 | 40000 | 20007 | 20000 | 20010 |
40024 | 60058 | 40017 | 20017 | 20000 | 20010 | 20000 | 115372 | 95366 | 40010 | 20020 | 20000 | 30074 | 40070 | 20045 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115375 | 95372 | 40010 | 20020 | 20000 | 30020 | 40000 | 20007 | 20000 | 20010 |
40024 | 60058 | 40018 | 20018 | 20000 | 20010 | 20000 | 115374 | 95368 | 40010 | 20020 | 20000 | 30020 | 40000 | 20007 | 20000 | 20010 |
40024 | 60058 | 40015 | 20015 | 20000 | 20010 | 20000 | 115379 | 95378 | 40010 | 20020 | 20000 | 30020 | 40000 | 20007 | 20000 | 20010 |
40024 | 60058 | 40017 | 20017 | 20000 | 20010 | 20000 | 115377 | 95373 | 40010 | 20020 | 20000 | 30020 | 40000 | 20007 | 20000 | 20010 |
Code:
stclrlb w0, [x6]
mov x7, 8
(fused SUBS/B.cc loop)
Result (median cycles for code): 10.7508
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule ldst uop (55) | dispatch int uop (56) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30205 | 114270 | 46423 | 22386 | 24037 | 14080 | 22446 | 2042102 | 1973347 | 34719 | 12377 | 24159 | 24488 | 47773 | 21361 | 20000 | 10100 |
30204 | 109255 | 42104 | 20442 | 21662 | 11755 | 21537 | 1978432 | 1921896 | 33007 | 11572 | 22633 | 21424 | 42298 | 19754 | 20000 | 10100 |
30204 | 106503 | 39958 | 19499 | 20459 | 10413 | 21105 | 1913879 | 1890054 | 31998 | 10996 | 21510 | 20836 | 41152 | 19073 | 20000 | 10100 |
30204 | 104871 | 38961 | 18816 | 20145 | 10199 | 20207 | 1895701 | 1878609 | 30474 | 10367 | 20333 | 20282 | 40163 | 19160 | 20000 | 10100 |
30204 | 107995 | 40645 | 19881 | 20764 | 10566 | 20471 | 1939431 | 1894252 | 30961 | 10590 | 20813 | 21328 | 41971 | 19472 | 20000 | 10100 |
30204 | 107961 | 41477 | 20086 | 21391 | 11489 | 22253 | 1982917 | 1931704 | 34290 | 12166 | 23559 | 20886 | 41323 | 19542 | 20000 | 10100 |
30204 | 106443 | 39450 | 19298 | 20152 | 10186 | 20148 | 1931781 | 1906015 | 30387 | 10339 | 20274 | 20356 | 40296 | 19003 | 20000 | 10100 |
30204 | 106093 | 39200 | 19174 | 20026 | 10106 | 20405 | 1978865 | 1942944 | 30765 | 10461 | 20545 | 22411 | 43980 | 20059 | 20000 | 10100 |
30204 | 106865 | 40514 | 19722 | 20792 | 10778 | 20678 | 1944663 | 1897763 | 31447 | 10869 | 21320 | 20550 | 40670 | 19199 | 20000 | 10100 |
30204 | 107606 | 39276 | 19086 | 20190 | 10219 | 20723 | 1961557 | 1908563 | 31439 | 10839 | 21184 | 21992 | 43260 | 19708 | 20000 | 10100 |
Result (median cycles for code): 11.4199
retire uop (01) | cycle (02) | schedule uop (52) | schedule int uop (53) | schedule simd uop (54) | schedule ldst uop (55) | dispatch int uop (56) | dispatch simd uop (57) | dispatch ldst uop (58) | int uops in schedulers (59) | simd uops in schedulers (5a) | dispatch uop (78) | map int uop (7c) | map ldst uop (7d) | map int uop inputs (7f) | map ldst uop inputs (80) | ? int output thing (e9) | ? ldst retires (ed) | ? int retires (ef) |
30025 | 114199 | 46551 | 22245 | 0 | 24306 | 14683 | 0 | 25070 | 2080677 | 2006480 | 39471 | 14415 | 28342 | 27599 | 53764 | 22502 | 20000 | 10010 |
30024 | 114408 | 46385 | 22462 | 0 | 23923 | 14208 | 0 | 24125 | 2078414 | 2002913 | 37740 | 13629 | 27004 | 27936 | 53662 | 22126 | 20000 | 10010 |
30024 | 114375 | 46627 | 22535 | 0 | 24092 | 13873 | 0 | 24895 | 2079323 | 2004657 | 39105 | 14223 | 28166 | 28006 | 54603 | 22342 | 20000 | 10010 |
30025 | 114241 | 46525 | 22365 | 0 | 24160 | 14353 | 0 | 24115 | 2081738 | 2005607 | 37655 | 13555 | 26670 | 28390 | 54976 | 22164 | 20000 | 10010 |
30024 | 113979 | 46371 | 22220 | 0 | 24151 | 14122 | 0 | 24709 | 2080058 | 2004827 | 38745 | 14051 | 27817 | 29318 | 56676 | 22245 | 20000 | 10010 |
30024 | 114116 | 46469 | 22235 | 0 | 24234 | 14144 | 0 | 24416 | 2081842 | 2007257 | 38087 | 13682 | 27409 | 29332 | 56652 | 22067 | 20000 | 10010 |
30024 | 113580 | 45923 | 22124 | 0 | 23799 | 13825 | 0 | 24444 | 2067988 | 1994643 | 38521 | 14089 | 27675 | 27872 | 53913 | 21968 | 20000 | 10010 |
30024 | 114375 | 45895 | 22334 | 0 | 23561 | 13453 | 0 | 24677 | 2076485 | 2001398 | 38902 | 14239 | 27907 | 27914 | 54075 | 22376 | 20000 | 10010 |
30024 | 113684 | 45823 | 22050 | 0 | 23773 | 13921 | 0 | 24298 | 2083837 | 2007331 | 37992 | 13708 | 27178 | 28288 | 54349 | 22151 | 20000 | 10010 |
30024 | 114335 | 46195 | 22464 | 0 | 23731 | 13735 | 0 | 24296 | 2075953 | 2000421 | 38106 | 13820 | 27075 | 27508 | 53418 | 22078 | 20000 | 10010 |