Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CMP (sxtx, 64-bit)

Test 1: uops

Code:

  cmp x0, x1, sxtx
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)f5f6f7f8fd
10043692036251000100010005000036936920632251000100020003696611100110000073118113661000370370370370370
100436920362510001000100050000369369206322510001000200036966111001100001873118113661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073118113661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073118113661000370370370370370
100436931536251000100010005000036936920632251000100020003696611100110000073118113661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073118113661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073118113661000370370370370370
10043693036251000100010005000036936920632251000100020003696611100110000073118113661000370370370370370
10043692036251000100010005000036936920632251000100020003696611100110000073118113661000370370370370370
10043693036251000100010005000136936920632251000100020003696611100110000073118113661000370370370370370

Test 2: Latency 3->1

Chain cycles: 1

Code:

  cmp x0, x1, sxtx
  cset x0, cc
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2020420035150010319926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
202042003515006119926252010020100204541297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228231999220000101002003620036200362003620036
202042003515006119926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101001013101228221999220000101002003620036200362003620036
202042003515008419926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
2020420035150061199262520100201002010012971501491695520035200351740631748120100202003020020035104112020110099100201001010045313101228221999220000101002003620036200362003620036
2020420035150016619926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
2020420035150012819926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228222001320000101002003620036200362003620036
2020420035150012619926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
2020420035150049419926252010020100201001297150149169552003520035174063174812010020200302002003510411202011009910020100101000013101228221999220000101002003620036200362003620036
2020420035150061199262520100201002010012971501491695520035200351740631748120100202003020020035104112020110099100201001010068313101228221999220000101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
20024200351500000611991825200102001020010129724714916955200352003517428317504200102002030020200351041120021109102001010010001270145111999520000100102003620036200362003620036
20024200351500000611991825200102001020010129724714916955200352003517428317504200102002030020200351041120021109102001010010001270227111999520000100102003620036200362003620036
20024200351500000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010001270127111999520000100102003620036200362003620036
20024200351500000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010001270127111999520000100102003620036200362003620036
2002420035149000363611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010001270127111999520000100102003620036200362003620036
20024200351500000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010001270127121999520000100102003620036200362003620036
20024200351490000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010001270127111999520000100102003620036200362003620036
20024200351500000821991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010001270127111999520000100102003620036200362003620036
20024200351500000611991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010001270127111999520000100102003620036200362003620036
200242003515000001491991825200102001020010129724704916955200352003517428317504200102002030020200351041120021109102001010010001270127111999520000100102003620036200362003620036

Test 3: Latency 3->2

Chain cycles: 1

Code:

  cmp x0, x1, sxtx
  cset x1, cc
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)0318191e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acbranch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
202042003515000061199262520100201002010012971501491695520035200351740631748120100202003020020035104112020110099100201001010000013101228221999220000101002003620036200362003620036
20204200351500001261992625201002010020100129715014916955200352003517406317481201002020030200200351041120201100991002010010100493013101328331999220000101002003620036200362003620036
202042003515000061199262520100201002036012971501491695520035200351740631748120100202003020020035104112020110099100201001010000013101228232001020000101002003620036200362003620036
202042003515000061199262520100201002010012971501491695520035200351740631748120100202003020020035104112020110099100201001010000013101228221999220000101002003620036200362003620036
2020420035150000300199262520100201002010012971501491695520035200351740631748120100202003020020035104112020110099100201001010000013101328331999220000101002003620036200362003620036
20204200351500004229199262520100201002010012971501491695520035200351740631748120100202003020020035104112020110099100201001010000013101228231999220000101002003620036200362003620036
202042003515000061199262520100201002010012971501491695520035200351740631748120100202003020020035104112020110099100201001010000013101328231999220000101002003620036200362003620036
202042003515000061199262520100201002010012971501491695520035200351740631748120100202003020020035104112020110099100201001010000213101228321999220000101002003620036200362003620036
202042003515000099199262520100201002010012971501491695520035200351740631748120100202003020020035104112020110099100201001010000013101228221999220000101002003620036200362003620036
2020420035150000602199262520100201002010012971501491695520035200351740631748120100202003020020035104112020110099100201001010000013101228231999220000101002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
2002420035150000000000061199182520010200102001012972470491695520035200351742831750420010200203002020035104112002110910200101001000000001270427441999520000100102003620036200362003620036
200242003515000000003870061199182520010200102001012972470491695520035200351742831750420010200203002020035104112002110910200101001000000001270427431999520000100102003620036200362003620036
2002420035150000000000061199182520010200102001012972470491695520035200351742831750420010200203002020035104112002110910200101001000000001270427441999520000100102003620036200362003620036
200242003515000000004080061199182520010200102001012972470491695520035200351742831750420010200203002020035104112002110910200101001000000001270427441999520000100102003620036200362003620036
2002420035150000000000061199182520010200102001012972470491695520035200351742831750420010200203002020035104112002110910200101001000000001335667872013020091100102021720256202192026120263
200242026215210000555284400178819914110201232010020092130037204917182202602026017484211760520421203943073820035104512002110910200101001022010405521343766752019120022100102025120263201272021820218
200242026115201010656604400119519917127201212012320341130038404917181202642026217477251763420423204913073720260104612002110910200101001000000405341303869662016220111100102026220263202642026120264
200242026215201101656604400132119915126201212012220423130044504916999202622026217484251765720010204903073320261104612002110910200101001004000468001339668762013720111100102026220036203092025920218
20024203001520101156105044002029199121242014320122204221300384049171812021720260174741617657205052057631006203061048120021109102001010010022044898213541068672006120089100102026120080202632026320261
20024202611500102125132440185219917109200782012620423129975604917000200812026217446271752820423201173030020172104612002110910200101001040210406041270427341999520000100102003620036200362003620036

Test 4: throughput

Count: 8

Code:

  cmp x0, x1, sxtx
  cmp x0, x1, sxtx
  cmp x0, x1, sxtx
  cmp x0, x1, sxtx
  cmp x0, x1, sxtx
  cmp x0, x1, sxtx
  cmp x0, x1, sxtx
  cmp x0, x1, sxtx
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fld unit uop (a6)l1d cache writeback (a8)a9acbranch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
802042673920118094325801008010080100400500492365526735267351667231669080100802001602002673566118020110099100801001000003051102191126731800001002673626736267362673626736
802042673520103525801008010080100400500492365526735267351667231669080100802001602002673566118020110099100801001000100051101191126731800001002673626736267362673626736
8020426735200014625801008010080100400500492365526735267351667231669080100802001602002673566118020110099100801001000006051101191126731800001002673626736267362673626736
802042673520103525801008010080100400500492365526735267351667231669080183802001602002679866218020110099100801001000100051101191126731800001002673626736267362673626736
802042673520009825801008010080100400500492365526735267351667231669080100802001602002673566118020110099100801001000000151101192326731800001002673626736267362673626736
802042673520009825801008010080100400500492365526735267351667231669080100802001602002673566118020110099100801001000000051101191126731800001002673626736267362673626736
8020426735200010025801008010080100400500492365526735267351667231669080100802001602002673566118020110099100801001000000051101191126731800001002673626736267362673626736
8020526735200056025801008010080100400500492365526735267351667231669080100802001602002673566118020110099100801001000000051101191126731800001002673626736267362673626736
802042673520103525801008010080100400500492365526735267351667231669080100802001602002673566118020110099100801001000200051101191126731800001002673626736267362673626736
802042673520003525801008010080100400500492365526777267351667231669080100802001602002673566118020110099100801001000000051101192126731800001002673626736267362673626736

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9faccfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
8002426720200030352580010800108001040005014923625026705267051666531668380010800201600202670566118002110910800101005020518442670180000102670626706267062670626706
8002426705200000352580010800108001040005014923625326705267051666531668380010800201600202670566118002110910800101005020418342670180000102670626706267062670626706
8002426705200000352580010800108001040005004923625026705267051666531668380010800201600202670566118002110910800101005020318432670180000102670626706267062670626706
8002426705199000352580010800108001040005004923625026705267051666531668380010800201600202670566118002110910800101005020318442670180000102670626706267062670626706
8002426705199000352580010800108001040005004923625026705267051666531668380010800201600202670566118002110910800101005020418432670180000102670626706267062670626706
800242670520006360352580010800108001040005004923625026705267051666531668380010800201600202670566118002110910800101005020418342670180000102670626706267062670626706
80024267052000120352580010800108001040005004923625026705267051666531668380010800201600202670566118002110910800101005020318442670180000102670626706267062670626706
8002426705200000352580010800108001040005004923625026705267051666531668380010800201600202670566118002110910800101005020418432670180000102670626706267062670626706
8002426705200000352580010800108001040005004923625026705267051666531668380010800201600202670566118002110910800101005020418442670180000102670626706267062670626706
80024267052000001492580010800108001040005014923625026705267051666531668380010800201600202670566118002110910800101005020418342670180000102670626706267062670626706