Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

BFC (32-bit)

Test 1: uops

Code:

  bfc w0, #3, #7
  mov x0, 1

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d tlb miss (a1)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
100410368047251000100010005999110361036864389410001000100010361641110011000000731161110321000100010371037103710371037
100410367047251000100010005999110361036864389410001000100010361641110011000003731161110321000100010371037103710371037
100410367047251000100010005999110361036864389410001000100010361641110011000000731161110321000100010371037103710371037
100410368047251000100010005999110361036864389410001000100010361641110011000000731161110321000100010371037103710371037
100410837047251000100010005999110361036864389410001000100010361641110011000000731161110321000100010371037103710371037
100410368047251000100010005999110361036864389410001000100010361641110011000000731161110321000100010371037103710371037
100410368047251000100010005999110361036864389410001000100010361641110011000000731161110321000100010371037103710371037
100410368047251000100010005999110361036864389410001000100010361641110011000000731161110321000100010371037103710371037
100410368047251000100010005999110361036864389410001000100010361641110011000000731161110321000100010371037103710371037
100410368047251000100010005999110361036864389410001000100010361641110011000000731161110321000100010371037103710371037

Test 2: Latency 1->1

Code:

  bfc w0, #3, #7
  mov x0, 1

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0036

retire uop (01)cycle (02)031e3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10204100367500246251010010100101006049904969561003610036872178740101001020810208100361621110201100991001010010000111718016001003310000101001003710037100371003710037
1020410036750073251010010100101006049904969561003610036872168739101001020810208100361621110201100991001010010000111718016001003310000101001003710037100371003710037
10204100367500362510100101001010060499049695610036100368721687401010010208102081003616211102011009910010100100310111717016001003310000101001003710037100371003710037
1020410036750036251010010100101006049914969561003610036872168739101001020810208100361621110201100991001010010000111718016001003310000101001003710037100371003710037
10204100367500329251010010100101006049914969561003610036872178740101001020810208100361621110201100991001010010000111718016001003310045101001003710037100371003710037
10204100367500256251010010100101006049914969561003610036872168739101001020810208100361621110201100991001010010000111718016001003310000101001003710037100371003710037
1020410036750036251010010100101006049914969561003610036872168740101001020810208100361621110201100991001010010000111717016001003310000101001003710037100371003710037
10204100367500362510100101001010060499149695610036100368721687401010010208102081003616211102011009910010100100190111717016001007010000101001003710037100371003710037
1020410036750036251010010100101006049914969561003610036872178739101001020810208100361621110201100991001010010000111717016001003310000101001003710037100371003710037
1020410036760035251010010100101006049914969561003610036872178740101001020810208100361621110201100991001010010003111717016001003310000101001003710037100371003710037

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0036

retire uop (01)cycle (02)03191e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
1002410037750201472510010100101001060049149695610036100368736387661001010020100201003616411100211091010010100640216221003210000100101003710037100371003710037
10024100367500472510010100101001060049149695610036100368736387661001010020100201003616411100211091010010100640216221003210000100101003710037100371003710037
10024100367500472510010100101001060049149695610036100368736387661001010020100201003616411100211091010010100640216221003210000100101003710037100371003710037
1002410036750141472510010100101001060049149695610036100368736387661001010020100201003616411100211091010010101640216221003210000100101003710037100371003710037
10024100367500892510010100101001060049149695610036100368736387661001010020100201003616411100211091010010100640216221003210000100101003710037100371003710037
100241003675009062510032100101001060049149695610036100368736387661001010020100201003616411100211091010010100640216221003210000100101003710037100371003710037
1002410036750216472510010100101001060049149695610036100368736387661001010020100201003633511100211091010010100640216221003210000100101003710037100371003710037
1002410036750318472510010100101001060049149695610036100368736387661001010020100201003616411100211091010010100640216221003210000100101003710037100371003710037
1002410036760303472510010100101001060049149695610036100368736387661001010020100201003616411100211091010010100640216221003210000100101003710037100371003710037
100241003675024472510010100101001060049149695610036100368736387661001010020100201003616411100211091010010100640216221003210000100101003710037100371003710037

Test 3: throughput

Count: 8

Code:

  bfc w0, #3, #7
  bfc w1, #3, #7
  bfc w2, #3, #7
  bfc w3, #3, #7
  bfc w4, #3, #7
  bfc w5, #3, #7
  bfc w6, #3, #7
  bfc w7, #3, #7

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0004

retire uop (01)cycle (02)03mmu table walk data (08)1e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
8020480035599000462580100801008010040050014976955800358003569964369993801008020080200800351641180201100991008010010000005110216228003180000801008003680036800368003680036
8020480035599000462580100801008010040050014976955800358003569964369993801008020080200800351641180201100991008010010000005130216228003180045801008003680036800368003680036
802048003559900016532580100801008010040050004976955800358003569964369993801008036780200800351641180201100991008010010000005110216228003180000801008003680036800368003680036
8020480035599000462580100801008010040050004976955800358003569964369993801008020080200800351641180201100991008010010000005110216228003180000801008003680036800368003680036
80204800355990004625801008010080100400500049769558003580035699642069993801008020080200800351641180201100991008010010000005110216228003180034801008003680036800368003680036
8020480035600000622580100801008010040050004976955800358003569964369993801008020080200800351641180201100991008010010000005110216228003180000801008003680036800368003680036
8020480035599000462580100801008010040050004976955800358003569964369993801008020080200800351641180201100991008010010000005110216228003180000801008003680036800368003680036
80204800356000210462580100801008010040050004976955800358003569964869993801008020080200800351641180201100991008010010000005110216228003180000801008003680036800368003680036
8020480035600000462580100801008010040050004976955800358003569964369993801008020080200800351641180201100991008010010000005110216228003180000801008003680036800368003680036
80204800355990003112580100801008010040050004976955800358003569964369993801008020080200800351641180201100991008010010000005110216228003180000801008003680036800368003680036

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0004

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)ea? int retires (ef)f5f6f7f8fd
800248003559900000231046258001080010800104000504976955800358003569986370015800108002080020800351641180021109108001010000000000502011601180032800000800108003680036800368003680071
80024800355990000027046258001080010800104000504976955800358003569986370015800108002080020800351641180021109108001010000000000502011601180032800001800108003680036800368003680036
80024800355990000015046258001080010800104000504976955800358003569986370015800108002080020800351641180021109108001010000000000502011601180032800000800108003680036800368003680036
80024800355990000045046258001080010800104000504976955800358003569986370015800108002080020800351641180021109108001010000000000502011651180032800000800108003680036800368003680073
800248003559900000549046258001080010800104000504976955800358003569986370015800108002080020800351641180021109108001010000000000502011601180032800000800108003680036800368003680036
80024800355990000036046258001080010800104000504976955800358003569986370015800108002080020800351641180021109108001010000000000502011601180032800000800108003680036800368003680036
80024800356000000039046258001080010800104000504976955800358003569986370015800108002080020800351641180021109108001010000000000502011601180032800000800108003680036800368003680036
80024800355990000024046258001080010800104000504976955800358003569986370015800108002080020800351641180021109108001010000000000502011601180032800000800108003680036800368003680036
80024800355990000033046258001080010800104000504976955800358003569986370015800108002080020800351641180021109108001010000000000502011601180032800000800108003680036800368003680036
8002480035599000007235246258001080010800104000504976955800358003569986370015800108002080020800351641180021109108001010000000000502011601180032800000800108003680036800368003680036