Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

CBNZ (taken)

Test 1: uops

Code:

  cbnz x0, .+4

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)1e3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0f5f6f7f8fd
100431201822005025100010001000500002056194231810001000100019621890111001100010000019326591036464417197118471989198320851965
10042074151100622510001000100050001198819763181000100010002060195011100110001000001958473992496449196719591977192520692081
10041902150100352510001000100050001198619663181000100010002042199611100110001000001948517940478418194120151907199519772015
100421461500003325100010001000500011992197231810001000100019661864111001100010000121980471822431525186120591987184520171993
10042104140100412510001000100050001203819803181000100010001958210011100110001000001976476960507465204320551965195920552019
10042090150000352510001000100050000196620003181000100010001976211611100110001000002004475816438523192920811979186720092081
10041850150100412510001000100050001199822023181000100010001958192611100110001000201914486954525512193320091977187320672015
100420561401003525100010001000500012020197431810001000100019601930111001100010000019664301004501484215319652095199919991949
10041914140000352510001000100050001197618823181000100010001968199411100110001000632022458988475476191319592071195920431949
10042004150000412510001000100050001187421743181000100010001962196411100110001000031884504916443458189319591971190120111993

Test 2: throughput

Count: 8

Code:

  cbnz x0, .+4
  cbnz x0, .+4
  cbnz x0, .+4
  cbnz x0, .+4
  cbnz x0, .+4
  cbnz x0, .+4
  cbnz x0, .+4
  cbnz x0, .+4
  mov x0, 1

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0102

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int retires (ef)f5f6f7f8fd
8020481035606200000000018139278010580105801074005301497773680812808186108010780207802078091664776118020180100800991001001000000000111808014571081315335808091008080980811808058080980811
8020480806605200000000018139278010580105801074005301497773080816808126108010780207802078090264776218020180100800991001001000000000111807853421041312338808231008081580811808078081580811
8020480814755200000000018145278010580105801074005301497773880822808166108010780207802078090464778118020180100800991001001000000000111807953381053311338808151008081180817808178081580813
8020480812624200000000018139278010580105801074005301497773680820808126108010780207802078091664778518020180100800991001001000000000111807973411107313338808151008081980815808218081780813
8020480814605200000000018424278010580105801074005301497773480824808146108010780207802078090864780118020180100800991001001000000000111807953301047312342808151008081580809808078081780817
8020480808605200000000018145278010580105801074005301497772680816808146108010780207802078090864788118020180100800991001001000000000111807993361073311337808151008082380823808178081180823
80204808126052000000000181392780105801058010740053014977730808088080861080107802078020780904647801180201801008009910010010000000480111807933391111312336808111008081580811808078080780809
8020480806605200000000019145278010580105801074005301497773880816808146108010780207802078092664776118020180100800991001001000000000111808093441071317346808231008081780809808178081780809
8020480810605200000000018145278010580105801074005301497773480812808146108010780207802078092064788118020180100800991001001000000000111807953381095314338808071008081580817808178081580811
8020480808605200000003018147278010580105801074005301497774080812808106108010780207802078090464784118020180100800991001001000000000111807873361051312342808131008081380829808118081380823

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 3.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)daddfetch restart (de)e0eaeb? int retires (ef)f5f6f7f8fd
800242401131798100100000134288001180011800124000581492369680240044240046610800128002280022240044240048118002180010800091010100000000111240021079823160014079951800012400390010240045240054240045240045240045
80024240040179810010000013428800118001180012400058149236958024004424004261080012800228002224004224004211800218001080009101010000108380111240019080001160014079991800022400440010240045240045240045240041240045
80024240040179810110331201217288001180011800124000580492369620240301240042610800608002280028240044240040118002180010800091010100000030111240021080001160014079992800022400410010240043240043240043240300240045
8002424004417981011000120134288001180011800364000580492369620240042240042661800128002280046240034240042118002180010800091010102000000111240021080001160017079992800022400390010240041240043240043240043240043
800242400401798100100000134288001180011800124000580492369620240044240042610800128002280022240042240042118002180010800091010100000000111240021080001160014079993800012400410010240045240043240041240045240045
80024240044180010010000011432880035800118001240005814923696402403012400441810800128004680022240044240042518002180010800091010100000000111240056080002160016079992800032401830010240713240399240298240045240045
80024240044179800010081288176288001180011800124000581492369640240040240044610800128002280022240044240044118002180010800091010100002060111240027080000160140079993800022402610010240045240045240039240045240171
800242400441798300100000176288001180011800304000581492369640240044240042610800128002880022240044240040118002180010800091010100000030111240021080002160016079992800032400410010240045240043240043240043240045
80024240040179831010000016042880017800118001240005814923696202400442400423510800128002880022240042240042118002180010800091010100030000111240021080002160016079993800032400370010240045240094240045240043240045
8002424004417981001000001699288001180011800124000581492369620240044240042610801038002280022240044240044118002180010800091010100000002111240021080001160016079993800032400410010240045240045240045240045240043