Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CMN (uxtw, 32-bit)

Test 1: uops

Code:

  cmn w0, w1, uxtw
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)f5f6f7f8fd
100436930362510001000100050001369369206322510001000200036966111001100073118113661000370370370370370
100436930362510001000100050001369369206322510001000200036966111001100073118113661000370370370370370
100436930362510001000100050001369369206322510001000200036966111001100073118113661000370370370370370
100436930362510001000100050001369369206322510001000200036966111001100073118113661000370370370370370
100436933362510001000100050001369369206322510001000200036966111001100073118113661000370370370370370
100436920362510001000100050000369369206322510001000200036966111001100073118113661000370370370370370
100436930362510001000100050001369369206322510001000200036966111001100073118113661000370370370370370
1004369342362510001000100050001369369206322510001000200036966111001100073118113661000370370370370370
100436930362510001000100050001369369206322510001000200036966111001100073118113661000370370370370370
100436930362510001000100050001369369206322510001000200036966111001100073118113661000370370370370370

Test 2: Latency 3->1

Chain cycles: 1

Code:

  cmn w0, w1, uxtw
  cset x0, cc
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101001013101228221999220000101002003620036200362003620036
2020420035149053619926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010001213101228221999220000101002003620036200362003620036
20204200351503611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101001313101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101001913101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
202042003515007261992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101005013101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101001313101228221999220000101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9faccfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2002420035150361199182520010200102001012972471049169552003520035174283175042001020020300202003510411200211091020010100103127000127111999520000100102003620036200362003620036
2002420035150061199184620010200342001012972471049169552003520035174283175042001020020300202003510411200211091020010100100127000127111999520000100102006820036200362003620036
2002420035150061199182520010200102001012972471049169552003520035174283175042001020020300202003510411200211091020010100100127000127111999520000100102003620036200362003620036
2002420035150061199182520010200102001012972471049169552003520035174283175042001020020300202003510411200211091020010100100127000127111999520000100102003620036200362003620036
2002420035150061199182520010200102001012972471049169552003520035174283175042001020020300202003510411200211091020010100100127000127111999520000100102003620036200362003620036
20024200351500441199182520010200102001012972471049169552003520035174283175042001020020300202003510411200211091020010100100127000127111999520000100102003620036200362003620036
2002420035150061199182520010200102001012972471049169552003520035174283175042001020020300202003510411200211091020010100100127000127111999520000100102003620036200362003620036
2002420035150061199182520010200102001012972471049169552003520035174283175042001020020300202003510411200211091020010100100127000127111999520015100102003620036200362003620036
2002420035150061199182520010200102001012972471049169552003520035174283175042001020020300202003510411200211091020010100100127000127121999520000100102003620036200362003620036
2002420035150061199182520010200102001012972471049169552003520035174283175042001020020300202003510411200211091020010100100127000127111999520000100102003620036200362003620036

Test 3: Latency 3->2

Chain cycles: 1

Code:

  cmn w0, w1, uxtw
  cset x1, cc
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
202042003515006119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010001513101228221999220000101002003620036200362003620036
202042003515007261992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
202042003515006119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010001213101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101001313101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101001313101228221999220000101002003620036200362003620036
20204200351500611992625201002010020100129715049169552003520035174063174812010020200302002003510411202011009910020100101001013101228221999220000101002003620036200362003620036
202042003515006119926252010020100201001297150491695520035200351740631748120100202003020020035104112020110099100201001010031313101228221999220000101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
20024200351500000000900611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000000001270527431999520000100102003620036200362003620036
20024200351500000000000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000000001270727431999520000100102003620036200362003620036
20024200351500000000000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000000001270327341999520000100102003620036200362003620036
20024200351500000000000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000000001270427431999520000100102003620036200362003620036
20024200351500000000000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000000001270427441999520000100102003620036200362003620036
2002420035150000000022200611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000000001270427341999520000100102003620036200362003620036
20024200351500000000000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000000001270527431999520000100102003620036200362003620036
200242003515000000001200611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000000001270427441999520000100102003620036200362003620036
20024200351500000000000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000000001270427441999520000100102003620036200362003620036
20024200351500000000000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000000001270427451999520000100102003620036200362003620036

Test 4: throughput

Count: 8

Code:

  cmn w0, w1, uxtw
  cmn w0, w1, uxtw
  cmn w0, w1, uxtw
  cmn w0, w1, uxtw
  cmn w0, w1, uxtw
  cmn w0, w1, uxtw
  cmn w0, w1, uxtw
  cmn w0, w1, uxtw
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
802042677120100000098258010080100801004005001492365526735267861667271672180100802001602002673566118020110099100801001000000009480051103191126731800001002673626736267862679126736
8020426735201000000352580100801008010040050014923655267942673516672316690801008020016020026735661180201100991008010010000001000051101191126731800001002673626736267362673626736
8020426735200000000352580100801008010040050014923655267352673516672316690801008020016020026735661180201100991008010010000000000051101191126731800001002673626736267362673626736
80204267352000000120772580100801008010040050014923655267352673516672316690801008020016020026735661180201100991008010010000000000051101191126769800001002673626736267362673626736
802042673520000009013982580100801008010040050014923655267352673516672316690801008020016020026735661180201100991008010010000000000051101191126731800001002673626736267362673626736
80204267352000000120352580100801008010040050014923655267352673516672316690801008020016020026735661180201100991008010010000000000051101191126731800001002673626736267362673626736
8020426735200000000352580100801008010040050014923655267352673516672316690801008020016020026735661180201100991008010010000000000051101191126731800001002673626736267362673626736
8020426735200000000352580100801008010040050014923655267352673516672316690801008020016020026735661180201100991008010010000000000051101191126731800001002673626736267362673626736
8020426735200000000352580100801008010040050014923655267352673516672316690801008020016020026735661180201100991008010010000000000051101191126731800001002673626736267362673626736
8020426735200000000352580100801008010040050014923655267352673516672316690801008020016020026735661180201100991008010010000000000051101191126731800001002673626736267362673626736

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire uop (01)cycle (02)0309l2 tlb miss data (0b)1e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acc2cfd5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)ea? int retires (ef)f5f6f7f8fd
8002426722200000352580010800108001040005014923625267052670516665316683800108002016002026705661180021109108001010000502024180252826701800000102670626706267062670626706
8002426705200000352580010800108001040005014923625267052670516665316683800108002016002026705661180021109108001010000502025180142426701800000102670626706267062670626706
8002426705200000352580010800108001040005014923625267052670516665316683800108002016002026705661180021109108001010000502027180132726701800000102670626706267062670626706
8002426705200000352580010800108001040005014923625267052670516665316683800108002016002026705661180021109108001010000502027180132526701800000102670626706267062670626706
8002426705200000352580010800108001040005014923625267052670516684316683800108002016002026705661180021109108001010000502027180162726701800000102670626706267062670626706
8002426705200009352580010800108001040005014923625267052670516665316683800108002016002026705661180021109108001010000502026180282626701800000102670626706267062670626706
8002426705200000352580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000502027180262226701800000102670626706267062670626706
8002426705199000352580010800108001040005014923625267052670516665316683800108002016002026705661180021109108001010000502027180152726701800000102670626706267062670626706
8002426705200000352580010800108001040005014923625267052670516665316683800108002016002026705661180021109108001010000502027180272726701800000102670626706267062670626706
8002426705200000352580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000502026180262626701800000102670626706267062670626706