Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

TBZ (taken)

Test 1: uops

Code:

  tbz x0, #1, .+4

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)181e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)l1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0f5f6f7f8fd
100431171800003525100010001000500011956193231610001000100020421952111001100010000000001818638968518479206520851983200120352007
1004200215000035251000100010005000118981990318100010001000188219861110011000100000000019364451048465522198919972023186321652011
100419761500003525100010001000500012062196631810001000100018642036111001100010000000001930463988464532196321391989190920592025
100419961500003525100010001000500012058206431610001000100019642094111001100010000000001952502944512443198520751997186520672023
100420101500003525100010001000500012052199431810001000100020541964111001100010000000001946513932531466206919492011201318451981
100419701400003525100010001000500011970186231810001000100019741986111001100010000000001924477968499467192519512021198518551987
100419481500003525100010001000500011884196231810001000100019621962111001100010000000001918463900424470192920651965201120132071
1004197415000035251000100010005000119462008318100010001000195220201110011000100000000019224521048466499187719352007200720352061
100419661510903525100010001000500011970203431810001000100020342010111001100010000000001920426942475510205719452041206119592051
100419721600003525100010001000500011954203631810001000100021581980111001100010000000001846523958472490205519632013194520271843

Test 2: throughput

Count: 8

Code:

  tbz x0, #1, .+4
  tbz x0, #1, .+4
  tbz x0, #1, .+4
  tbz x0, #1, .+4
  tbz x0, #1, .+4
  tbz x0, #1, .+4
  tbz x0, #1, .+4
  tbz x0, #1, .+4
  mov x0, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0095

retire uop (01)cycle (02)03l2 tlb miss data (0b)181e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int retires (ef)f5f6f7f8fd
8020481071605000282780105801058010740053014977684807668076661080107802078020780774647261180201801008009910010010000011180753437685322322807691008075780765807658077580765
80204807826040002827801058010580107400530049776868076880768610801078020780207807666472611802018010080099100100100018011180737310635314311807591008076780769807738078380763
80204807586040006932780105801058010740053004977702807708078061080107802078020780792647481180201801008009910010010000011180757320655319319807671008077780777807738077980775
8020480768605000282780105801058010740053004977676807648076461080107802078020780764647201180201801008009910010010000011180747313645315315807671008076980767807698075980765
8020480762605000282780105801058010740053014977678807668076861080107802078020780756647951180201801008009910010010000011180743314639314311807611008077380765807658076780759
8020480758604000282780105801058010740053004977684807608076461080107802078020780772647201180201801008009910010010000011180747316637311313808511008083980771807658077380763
80204807586050006932780105801058010740053004977694807728076861080107802078020780772647301180201801008009910010010000011180739311643313311807571008076180763807618077180757
8020480764605000282780105801058010740053004977688807628076861080107802078020780760647181180201801008009910010010010011180747315647315414807611008076980765807658075580771
8020480766605000282780105801058010740053004977692807768077861080107802078020780790647401180201801008009910010010000011180745314643318314807671008077780777807738077380775
80204807746050012282780105801058010740053004977686807628076061080107802078020780766647201180201801008009910010010000011180753320653315315807591008077980763807818077180775

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 3.0006

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)6061696a6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)l1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int retires (ef)f5f6f7f8fd
800242401491798100100000228288001180011800124000580049236964240044240044610800128002280022240044240044118002180010800091010100101112400220079836160016800028000424004110240045240048240045240045240045
800242400441798100100000228288001180011800124000580049236964240044240044610800128002280022240044240044118002180010800091010100001112400280080005160018799998000524004710240047240047240047240045240045
8002424004417981001000002282880011800118001240005800492369642400442400442910800128002280022240044240044118002180010800091010100001112400220080003160012800028000424004110240045240081240045240041240045
8002424004417981001000002714288001180011800124000580049236964240044240044610800698002280022240040240044118002180010800091010100001112400220080003160016800028000324004110240043240045240045240045240045
8002424004417981001000002693288001180011800124000581049236964240044240042610800128002280022240044240044118002180010800091010100001112400220080003160016800028000424003910240045240045240045240045240122
8002424004417981001000002693288001180011800124000581049236964240044240044610800128002280056240044240044118002180010800091010100001112400220080003160016800028000424003910240045240041240045240045240045
8002524004417981001000002693288001180011800124000580049236964240044240044610800128002280022240065240044118002180010800091010100001112400300080006160024800058000724004310240049240051240045240043240045
8002424004417981001000002693288001180011800124000581049236964240044240044610800128002280022240044240044118002180010800091010100001112400220080003160016800028000424004110240045240045240045240045240045
8002424004417985001000002693288001180011800124000580049236964240044240044610800128002280022240044240044118002180010800091010100001112400220080003160016800028000424004110240045240045240045240045240045
800242400441798100100000228288001180011800124000580049236964240044240044610800128002280022240044240044118002180010800091010100001112400220080003160016800028000324004110240047240110240106240047240111