Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ORR (immediate, 32-bit)

Test 1: uops

Code:

  orr w0, w0, #3
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
100410357012486225100010001000169161103510357283868100010001000103541111001100073141119371000100010361036103610361036
10041035796186225100010001000169161103510357283868100010001000103541111001100073141119371000100010361036103610361036
10041035806186225100010001000169161103510357283868100010001000103541111001100073141119371000100010361036103610361036
10041035806186225100010001000169161103510357283868100010001000103541111001100073141119371000100010361036103610361036
10041035706186225100010001000169161103510357283868100010001000103541111001100073141119371000100010361036103610361036
10041035706186225100010001000169161103510357283868100010001000103541111001100073141119371000100010361036103610361036
10041035706186225100010001000169161103510357283868100010001000103541111001100073141119371000100010361036103610361036
10041035706186225100010001000169161103510357283868100010001000103541111001100073141119371000100010361036103610361036
10041035706186225100010001000169161103510357283868100010001000103541111001100073141119371000100010361036103610361036
10041035706186225100010001000169161103510357283868100010001000103541111001100073141119371000100010361036103610361036

Test 2: Latency 1->2

Code:

  orr w0, w0, #3
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10204100357500000006198772510100101001010088664049695510035100358580387221010010200102001003541111020110099100101001000000000071023722994110000101001003610036100361003610036
10204100357500000006198772510100101001010088664049695510035100358580387221010010200102001003541111020110099100101001000000000071023722994110000101001003610036100361003610036
10204100357500000006198772510100101001010088664049695510035100358580387221010010200102001003541111020110099100101001000000000071023722994110000101001003610036100361003610036
10204100357500000006198772510100101001010088664049695510035100358580387221010010200102001003541111020110099100101001000000000071023722994110000101001003610036100361003610036
10204100357500000006198772510100101001010088664049695510035100358580387221010010200102001003541111020110099100101001000000000071023722994110000101001003610036100361003610036
10204100357500000006198772510100101001010088664049695510035100358580387221010010200102001003541111020110099100101001000000000071023722994110000101001003610036100361003610036
10204100357500000006198772510100101001010088664049695510035100358580387221010010200102001003541111020110099100101001000000000071023722994110000101001003610036100361003610036
10204100357500000006198772510100101001010088664049695510035100358580387221010010200102001003541111020110099100101001000000000071023722994110000101001003610036100361003610036
10204100357500000006198772510100101001010088664049695510035100358580387221010010200102001003541111020110099100101001000000000071023722994110000101001003610036100361003610036
10204100357500000006198772510100101001010088664049695510035100358580387221010010200102001003541111020110099100101001000000000071023722994110000101001003610036100361003610036

1000 unrolls and 10 iterations

Result (median cycles for code): 1.0035

retire uop (01)cycle (02)03mmu table walk instruction (07)3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d tlb access (a0)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
100241003575010398632510010100101001088784049695510035100358602387421001210020100201003541111002110910100101001064024122994010000100101003610036100361003610036
10024100357506198632510010100101001088784049695510035100358602387401001010020100201003541111002110910100101000064024122994010000100101003610036100361003610036
100241003575075998632510010100101001088784049695510035100358602387421001210020100201003541111002110910100101000064024122994010000100101003610036100361003610036
10024100357506198632510010100101001088784049695510035100358602387401001010020100201003541111002110910100101000064023922994010000100101003610036100361003610036
10024100357506198633510010100101001088784049695510035100358602387401001010020100201003541111002110910100101000064024133994010000100101003610036100361003610036
10024100357506198632510012100101001088784049695510035100358602387401001010020100201003541111002110910100101000064024122994010000100101003610036100361003610036
100241003575068898632510010100101001088784149695510035100358602387401001010020100201003541111002110910100101000064064122994010000100101003610036100361003610036
100241003575068198632510010100101001088784149695510035100358602387401001010020100201003541111002110910100101000064024122994010000100101003610036100361003610036
100241003575092098632510010100101001088784049695510035100358602387401001010020100201003541111002110910100101000064063922994110000100101003610036100361003610036
1002410035760193986325100101001010010887841496955100351003586023874010012100201002010035411110021109101001010015064024122994010000100101003610036100361003610036

Test 3: throughput

Count: 8

Code:

  orr w0, w8, #3
  orr w1, w8, #3
  orr w2, w8, #3
  orr w3, w8, #3
  orr w4, w8, #3
  orr w5, w8, #3
  orr w6, w8, #3
  orr w7, w8, #3
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.1675

retire uop (01)cycle (02)03181e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
80204133901000276072278013680136801484007100491031001339013390332663336801488026480264133903911802011009910080100100000001115119216221338780036801001339113391133911339113391
802041339010000028278013680136801484007100491031001339013390332663336801488026480264133903911802011009910080100100000031115119216221338780036801001339113391133911339113391
802041339010000028278013680136801484007101491031001339013390332663336801488026480264133903911802011009910080100100000001115119216231338780036801001339113391133911339113391
802041339010003028278013680136801484007100491031001339013390332663336801488026480264133903911802011009910080100100000001115120216221338780036801001339113391133911339113391
802041339010000028278013680136801484007101491031001339013390332663336801488026480264133903911802011009910080100100000001115120116221338780036801001339113391133911339113391
80204133901000345028278013680136801484007100491031001339013390332663336801488026480264133903911802011009910080100100000001115120216221338780036801001339113391133911339113391
802041339010000028278013680136801484007100491031001339013390332663336801488026480264133903911802011009910080100100000001115120216221338780036801001339113391133911339113391
8020413390100000137278013680136801484007101491031001339013390332663336801488026480264133903911802011009910080100100000001115120216121338780036801001339113391133911339113391
802041339010003028278013680136801484007101491031001339013390332663336801488026480264133903911802011009910080100100000001115120116221338780036801001339113391133911339113391
802041339010000028278013680136801484007101491031001339013390332663361801458026280262134013911802011009910080100100000002225131225231339980033801001340213403134031340213403

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.1671

retire uop (01)cycle (02)033f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
80024133761003525800108001080010400050149102911337113371333033348800108002080020133713911800211091080010101805023161917171336880000800101337213372133721337213372
800241337110050125800108001080010400050149102911337113371333033348800108002080020133713911800211091080010100050285195161336880000800101337213372133721337213372
8002413371100352580010800108001040005014910291133711337133303334880010800208002013371391180021109108001010015502871916161336880000800101337213372133721337213372
800241337110035258001080010800104000501491029113371133713330333488001080020800201337139118002110910800101010502816195161336880000800101337213372133721337213372
8002413371100605258001080010800104000501491029113371133713330333488001080020800201337139118002110910800101000502816197161336880000800101337213372133721337213372
8002413371100402580010800108001040005014910291133711337133303334880010800208002013371391180021109108001010340502816191671336880000800101337213372133721337213372
800241337110035258001080010800104000501491029113371133713330333488001080020800201337139118002110910800101000502816191651336880000800101337213372133721337213372
8002413371100562580010800108001040005004910291133711337133303334880010800208002013371391180021109108001010024502816195161336880000800101337213372133721337213372
80024133711003525800108001080010400050149102911337113371333033348800108002080020133713911800211091080010100050237197161336880000800101337213372133721337213372
80024133711007925800108001080010400050149102911337113371333033348800108002080020133713911800211091080010100050285197161336880000800101337213372133721337213372