Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

BFXIL (32-bit)

Test 1: uops

Code:

  bfxil w0, w1, #3, #7
  mov x0, 1

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
100410368047251000100010005999110361036864389410001000200010361641110011000732162210321000100010371037103710371037
100410367047251000100010005999110361036864389410001000200010361641110011000732162210321000100010371037103710371037
100410368062251000100010005999110361036864389410001000200010361641110011000732162210321000100010371037103710371037
100410368047251000100010005999110361036864389410001000200010361641110011000732162210321000100010371037103710371037
100410367047251000100010005999110361036864389410001000200010361641110011000732162210321000100010371037103710371037
100410367047251000100010005999110361036864389410001000200010361641110011000732162210321000100010371037103710371037
100410368047251000100010005999110361036864389410001000200010361641110011000732162210321000100010371037103710371037
100410367047251000100010005999110361036864389410001000200010361641110011000732162210321000100010371037103710371037
100410368047251000100010005999110361036864389410001000200010361641110011000732162210321000100010371037103710371037
100410367047251000100010005999110361036864389410001000200010361641110011000732162210321000100010371037103710371037

Test 2: Latency 1->1

Code:

  bfxil w0, w1, #3, #7
  mov x0, 1

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0036

retire uop (01)cycle (02)031e3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
102041003676003582510100101001010060499049695610036100368703687401010010208202161003616211102011009910010100100000111718016001003310000101001003710037100371003710037
102041003675003612510100101001010060499049695610036100368721687401010010208202161003616211102011009910010100100000111717016001003310000101001003710037100371003710037
102041003675004022510100101001010060499049695610036100368721687391010010208202161003616211102011009910010100100000111717016001003310000101001003710037100371003710037
102041003675002382510100101001010060499149695610036100368721787401010010208202161003616211102011009910010100100100111717016001003310000101001003710037100371003710037
10204100367500362510100101001010060499149695610036100368721787401010010208202161003616211102011009910010100100030111718016001003310000101001003710037100371003710037
10204100367500362510100101001010060499149695610036100368721687401010010208202161003616211102011009910010100100030111717016001003310022101001003710037100371003710037
10204100367500362510100101001010060499049695610036100368721687391010010208202161003616211102011009910010100100030111718016001003310000101001003710037100371003710037
102041003675004352510100101001010060499149695610036100368721687401010010208202161003616211102011009910010100100100111717016001003310000101001003710037100371003710037
10204100367500352510100101001010060499049695610036100368721687401010010208202161003616211102011009910010100100060111717016001003310000101001003710037100371003710037
10204100367500352510100101001010060499149695610036100368721787401010010208202161003616211102011009910010100100002111717016001003310000101001003710037100371003710037

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0036

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
1002410036750682510010100101001060049496956100361003687363876610010100202002010036164111002110910100101000640216221003210000100101003710037100371003710037
1002410036750472510010100101001060049496956100361003687363876610010100202002010036164111002110910100101000640216221003210000100101003710037100371003710037
1002410036750472510010100101001060049496956100361003687363876610010100202002010036164111002110910100101000640216221003210000100101003710037100371003710037
10024100367501962510010100101001060049496956100361003687363876610010100202002010036164111002110910100101000640216221003210000100101003710037100371003710037
1002410036750472510010100101001060049496956100361003687363876610010100202002010036164111002110910100101000640216221003210000100101003710037100371003710037
1002410036750472510010100101001060049496956100361003687363876610010100202002010036164111002110910100101000640216221003210000100101003710037100371003710037
1002410036750472510010100101001060049496956100361003687363876610010100202002010036164111002110910100101000640216221003210000100101003710037100371003710037
1002410036750472510010100101001060049496956100361003687363876610010100202002010036164111002110910100101016640216221003210000100101003710037100371003710037
1002410036750472510010100101001060049496956100361003687363876610010100202002010036164111002110910100101010640216221003210000100101003710037100371003710037
1002410036750472510010100101001060049496956100361003687363876610010100202002010036164111002110910100101010640216221003210000100101003710037100371003710037

Test 3: Latency 1->2

Chain cycles: 1

Code:

  add x1, x0, x0
  mov x0, 0
  bfxil w0, w1, #3, #7
  mov x0, 1

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)033a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9facbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
302042003515006199512520100201002010612886571491695520035200351616071623620106202114022220035571130201100991003010010001111919116112000720000301002003620036200362003620036
302042003515006199512520100201002010612886571491695520035200351616071623620106202114022220035571130201100991003010010001111919116112000720000301002003620036200362003620036
302042003515006199512520100201002010612886571491695520035200351616071623620106202114022220035571130201100991003010010001111919116112000720000301002003620036200362003620036
302042003515006199512520100201002010612886571491695520035200351616071623620106202114022220035571130201100991003010010001111919116112000720000301002003620036200362003620036
302042003515006199512520100201002010612886571491695520035200351616071623620106202114022220035571130201100991003010010001111919116112000720000301002003620036200362003620036
3020420035150053699512520100201002010612886571491695520035200351616071623620106202114022220035571130201100991003010010031111919116122000720000301002003620036200362003620036
302042003515006199512520100201002010612886571491695520035200351616071623620106202114022220035571130201100991003010010001111919116112000720000301002003620036200362003620036
302042003515006199512520100201002010612886571491695520035200351616071623620106202114022220035571130201100991003010010001111919116112000720000301002003620036200362003620036
3020420035150079999512520100201002010612886571491695520035200351616061623620106202114022220035571130201100991003010010001111919116112000720000301002003620036200362003620036
302042003515006199512520100201002010612886571491695520035200351616071623720106202114022220035571130201100991003010010001111919116112000720000301002003620036200362003620036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 1 chain cycle): 1.0035

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
300242003515000000000774995125200102001020010128786649169552003520035161793162652001020020400202003557113002110910300101000000001890216222000320000300102003620036200362003620036
300242003515000000000126995125200102001020010128786649169552003520035161793162652001020020400202003557113002110910300101000000001890216222000320000300102003620036200362003620036
300242003515000000000126995125200102001020010128786649169552003520035161793162652001020020400202003557113002110910300101001000301890216222000320000300102003620036200362003620036
300242003515000000000424995125200102001020010128786649169552003520035161793162652001020020400202003557113002110910300101000000001890216222000320000300102003620036200362003620036
30024200351500000000061995125200102001020010128786649169552003520035161793162652001020020400202003557113002110910300101000000301890316222000320000300102003620036200362003620036
300242003515000000000536995125200102001020010128786649169552003520035161793162652001020020400202003557113002110910300101000000301890216222000320000300102003620036200362003620036
300242003515000000000147995125200102001020010128786649169552003520035161793162652001020020400202003557113002110910300101000000001890216222000320000300102003620036200362003620036
30024200351500000000061995125200102001020010128786649169552003520035161793162652001020020400202003557113002110910300101000000001890216232000320000300102003620036200362003620036
300242003515000000000126995125200102001020010128786649169552003520035161793162652001020020400202003557113002110910300101000000301890216222000320000300102003620036200362003620036
3002420035150000000001200995125200102001020010128786649169552003520035161793162652001020020400202003557113002110910300101000000001890216222000320000300102003620036200362003620036

Test 4: throughput

Count: 8

Code:

  bfxil w0, w8, #3, #7
  bfxil w1, w8, #3, #7
  bfxil w2, w8, #3, #7
  bfxil w3, w8, #3, #7
  bfxil w4, w8, #3, #7
  bfxil w5, w8, #3, #7
  bfxil w6, w8, #3, #7
  bfxil w7, w8, #3, #7

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0004

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
80204800355990352580100801008010040050014976955080035800356997166998980100802081602168003516411802011009910080100100000301115117016008003280000801008003680036800368003680036
80204800356000352580100801008010040050014976955080035800356997166998980100802081602168003516411802011009910080100100000001115117016008003280000801008003680036800368003680036
802048003559912352580100801008010040050014976955080035800356997166998980100802081602168003516411802011009910080100100000001115117016008003280000801008003680036800368003680036
80204800356000352580100801008010040050014976955080035800356997166998980100802081602168003516411802011009910080100100000001115117016008003280000801008003680036800368003680036
80204800355990352580100801008010040050014976955080035800356997166998980100802001602008003516411802011009910080100100000000005110216228003180000801008003680036800368003680036
80204800356000462580100801008010040050014976955080035800356996436999380100802001602008003516411802011009910080100100000000005110216228003180000801008003680036800368003680036
80204800355990462580100801008010040050014976955080035800356996436999380100802001602008003516411802011009910080100100000000005110216328003180000801008003680036800368003680036
80204800355990462580144801008010040050004976955080035800356996436999380100802001602008003516411802011009910080100100000000005110216228003180000801008003680036800368003680036
80204800356000462580100801008010040070504976955080035800356996436999380100802001602008003516411802011009910080100100000000005110216228003180000801008003680036800368003680036
80204800355990462580100801008010040050004976955080035800356996436999380100802001602008003516411802011009910080100100000000005110216228003180000801008003680036800368003680036

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0004

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e191e3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb miss (a1)l1d cache writeback (a8)acc2branch mispred nonspec (cb)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
80024800355990200711258001080010800104000500049769558003580035699863700158001080020160020800351641180021109108001010000005020000416758003280000800108003680036800368003680036
80024800356000000711258001080010800104000500049769558003580035699863700158001080020160020800351641180021109108001010000005020010216248003280000800108003680036800368003680036
8002480035599000088408001080010800104000500049769558003580035699863700158001080020160020800351641180021109108001010000005020000216538003280000800108003680036800368003680036
80024800356000006646258001080010800104000500049769558003580035699863700158001080020160020800351641180021109108001010000005020000416248003280000800108003680036800368003680036
80024800355990000936258001080010800104000500049769558003580035699863700158001080020160020800351641180021109108001010000005020520216248003280000800108003680036800368003680036
800248003559900004625800108001080010400050004976955800358003569986370015800108002016002080035164118002110910800101000566005020000216448003280000800108003680036800368003680036
8002480035599000046258001080010800104000500049769558003580035699863700158001080020160020800351641180021109108001010000005020000216428003280000800108003680036800368003680036
8002480035599000052258001080010800104000500049769558003580035699863700158001080020160020800351641180021109108001010000005020000416428003280000800108003680036800368003680036
8002480035599000046258001080010800104000500049769558003580035699863700158001080020160020800351641180021109108001010000005020000416428003280000800108003680036800368003680036
8002480035599000046258001080010800104000500049769558003580035699863700158001080020160020800351641180021109108001010000005020000416438003280000800108003680036800368003680036