Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CMN (register, 32-bit)

Test 1: uops

Code:

  cmn w0, w1
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)033f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d cache writeback (a8)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)f5f6f7f8fd
100436923625100010001000500013693692063225100010002000369661110011000073118113661000370370370370370
100436933625100010001000500013693692063225100010002000369661110011000073118113661000370370370370370
100436933625100010001000500003693692063225100010002000369661110011000073118113661000370370370370370
100436923625100010001000500013693692063225100010002000369661110011000073118113661000370370370370370
100436923625100010001000500003693692063225100010002000369661110011000073118113661000370370370370370
100436923625100010001000500003693692063225100010002000369661110011000073118113661000370370370370370
100436923625100010001000500013693692063225100010002000369661110011000073118113661000370370370370370
100436923625100010001000500013693692063225100010002000369661110011000073118113661000370370370370370
100436933625100010001000500003693692063225100010002000369661110011000073118113661000370370370370370
100436923625100010001000500003693692063225100010002000369661110011000073118113661000370370370370370

Test 2: Latency 3->1

Chain cycles: 1

Code:

  cmn w0, w1
  cset x0, cc
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9faccfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2020420035150061199262520100201002010012971504916955200352003517406317481201002020030200200351041120201100991002010010100013101328331999220000101002003620036200362003620036
20204200351508161199262520100201002010012971504916955200352003517406317481201002020030200200351041120201100991002010010100013101428331999220000101002003620036200362003620036
202042003515025861199262520100201002010012971504916955200352003517406317481204532020030200200351041120201100991002010010100013101328331999220000101002008120172200812008120082
2020420035150061199262520100201002010012971504916955200352003517406317481201002020030200200351041120201100991002010010100013101328331999220000101002003620036200362003620036
2020420035150061199262520100201002010012971504916955200352003517406317481201002020030200200351041120201100991002010010100013101328431999220000101002003620036200362003620036
2020420035150061199262520100201002010012971504916955200352003517406317481201002020030200200351041120201100991002010010100013101328331999220000101002003620036200362003620036
2020420035150061199262520100201002010012971504916955200352003517406317481201002020030200200351041120201100991002010010100013101328331999220000101002003620036200362003620036
2020420035150061199262520100201002010012971504916955200352003517406317481201002020030200200351041120201100991002010010100013101328341999220000101002003620036200362003620036
2020420035150061199262520100201002010012971504916955200352003517406317481201002020030200200351041120201100991002010010100013101328331999220000101002003620036200362003620036
20204200351500105199262520100201002010012971504916955200352003517406317481201002020030200200351041120201100991002010010100013101328331999220000101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03181e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fld unit uop (a6)l1d cache writeback (a8)a9acbranch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
200242003515000611991825200102001020010129724714916955200352003517428317504200102002030020201271041120021109102001010010000301272427931999520000100102003620036200362003620036
200242003515000611991825200102001020010129724714916955200352003517428317504200102002030020200351041120021109102001010010000001270227121999520000100102003620036200362003620036
200242003515000611991825200102001020010129724714916955200352003517428317504200102002030020200351041120021109102001010010000001270327121999520000100102003620036200362003620036
200242003515000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010000001270227241999520000100102003620036200362003620036
200242003515000611991825200102001020010129724714916955200352003517428317504200102002030020200351041120021109102001010010000001270327341999520000100102003620036200362003620036
200242003515000611991825200102001020010129724714916955200352003517428317504200102002030020200351041120021109102001010010000001270227221999520000100102003620036200362003620036
200242003515000611991825200102001020010129724714916955200352003517428317504200102002030020200351041120021109102001010010000001270227131999520000100102017220067200362003620036
200242003515000611991825200102001020010129724714916955200352003517428317504200102002030020200351041120021109102001010010000001270327221999520000100102003620036200362003620036
2002420035150001031991825200102001020010129724714916955200352003517428317504200102002030020200351041120021109102001010010000001270327231999520000100102003620036200362003620036
200242003515000611991825200102001020010129724714916955200352003517428317504200102002030020200351041120021109102001010010000001270227111999520000100102003620036200362003620036

Test 3: Latency 3->2

Chain cycles: 1

Code:

  cmn w0, w1
  cset x1, cc
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03l2 tlb miss data (0b)18191e3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)flags prf full (73)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
202042003515000001611993025201002010020112129723349169552003520035174250617487201122022430236200351041120201100991002010010100000011113180216222001120000101002003620036200362003620036
202042003515000001611993025201002010020112129715049169552003520067174060317481201002020030200200351041120201100991002010010100000000013101228221999220000101002003620036200362003620036
202042003515000000611992625201002010020100129715049169552003520035174060317481201002020030200200351041120201100991002010010100003000013101228231999220000101002003620036200362003620036
202042003515000060661992625201002010020100129715049169552003520035174060317481201002020030200200351041120201100991002010010100000000013101328231999220000101002003620036200362003620036
2020420035150000005361992625201002010020100129715049169552003520035174060317481201002020030200200351041120201100991002010010100000000013101228331999220000101002003620036200362003620036
202042003515500000611992625201002010020100129715049169552003520035174060317481201002020030200200351041120201100991002010010100000000013101228221999220000101002003620036200362006720036
202042003515000060611992625201002010020100129715049169552003520035174060317481201002020030200200351041120201100991002010010100000000013101328321999220000101002003620036200362003620036
202042003515000000611992625201002010020100129715049169552003520035174060317481201002020030200200351041120201100991002010010100000000013101328331999220000101002003620036200362003620036
202042003515000000611992625201002010020202129715049169552003520035174060317481201002020030200200351041120201100991002010010100000000013101228331999220000101002003620036200362003620036
2020420035160000001031992625201002010020100129715049169552003520035174060317481201002020030200200351041120201100991002010010100000000013101228321999220000101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
200242003515000000000611991825200102001020010129724749169552003520035174283175042001020020300202003510411200211091020010100100000001270227111999520000100102003620036200362003620036
2002420035150000000001241991825200102001020010129724749169552003520035174283175042001020020300202003510411200211091020010100100000001270127111999520000100102003620036200362003620036
200242003515010000000611991825200102001020010129724749169552003520035174283175042001020020300202003510411200211091020010100100000001270127111999520000100102003620036200362003620036
200242003515000000000821991825200102001020010129724749169552003520035174283175042001020020300202003510411200211091020010100100000001270127111999520000100102003620036200362003620036
2002420035150000000901241991825200102001020010129724749169552003520035174283175042001020020300202003510411200211091020010100100000001270127111999520000100102003620036200362003620036
200242003515010000000611991825200102001020010129724749169552003520035174283175042001020020300202003510411200211091020010100100000001270127121999520000100102003620036200362003620036
200242003515000000090611991825200102001020010129724749169552003520035174283175042001020020300202003510411200211091020010100100000001270127111999520000100102003620036200362003620036
2002420035150000000120611991825200102001020010129724749169552003520035174283175042001020020300202003510411200211091020010100100000001270127111999520000100102003620036200362003620036
200242003515000000000611991825200102001020010129724749169552003520035174283175042001020020300202003510411200211091020010100100000001270127111999520000100102003620036200362003620036
2002420035150000000001241991825200102001020010129724749169552003520035174283175042001020020300202003510411200211091020010100100000001270127111999520000100102003620036200362003620036

Test 4: throughput

Count: 8

Code:

  cmn w0, w1
  cmn w0, w1
  cmn w0, w1
  cmn w0, w1
  cmn w0, w1
  cmn w0, w1
  cmn w0, w1
  cmn w0, w1
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03mmu table walk data (08)181e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fld unit uop (a6)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
80204267722000000352580100801008010040050049236552673526735166723166908010080200160330267356611802011009910080100100000051103191126731800001002673626736267362673626736
80204267352000000352580100801008010040050049236552682926735166723166908010080200160200267356611802011009910080100100000051101261126731800001002673626736267362673626736
80204267352000000352580100801008010040050049236552673526735166723166908010080200160200267356611802011009910080100100000051101191126731800001002673626736267362673626925
80204267352000000352580370801008010040050049236552673526735166723166908010080200160200267356611802011009910080100100000051101191126731800001002673626736267362673626736
80204267352000000352580100801008010040050049236552673526735166723166908010080200160200267356611802011009910080100100200051101191126731800001002673626736267362673626736
80204267352000000352580100801008010040050049236552673526735166723166908010080200160200267356611802011009910080100100000051101191126731800001002673626736267362673626736
80204267352000000352580100801008010040050049236552673526735166723166908010080200160200267356611802011009910080100100000051101191126731800001002673626736267362673626736
80204267352010000352580100801008010040050049236552673526735166723166908010080200160200267356611802011009910080100100000051101191126731800001002673626736267362673626736
80204267352000000352580100801008010040050049236552673526735166723166908010080200160200267356611802011009910080100100000051101191126731800001002673626736267362673626736
80204267352020000352580100803158010040050049236552673526735166723166908026180200160200267356611802011009910080100100000051101191126731800001002673626736267362673626736

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)dadbddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
80024267102000352580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000050200081800242670180000102670626706267062670626706
80024267052000352580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000050200041801252670180000102670626706267062670626706
80024267052000352580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000050200041820242670180000102670626706267062670626706
800242670520021352580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000050200021800422670180000102670626706267062670626706
80024267052000352580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000050200041800422670180000102670626706267062670626706
80024267052000352580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000050200041800422670180000102670626706267062670626706
800242670520042352580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000050200041800422670180000102670626706267062670626706
8002426705200012262580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000050430041800422670180000102670626706267062670626706
80024267052000352580010800108001040005004923625267052670516665316683800108002016002026705661180021109108001010000050200041801242670180000102670626706267062670626706
80024267052000352580010800108001040005014923625267052670516665316683800108002016002026705661180021109108001010030050200041800642670180000102670626706267062670626706