Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, 3 regs, 8B)

Test 1: uops

Code:

  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.000

Integer unit issues: 0.001

Load/store unit issues: 3.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)
63005300093019130183000900030003000300013000
63004293373001130003000900030003000300013000
63004294123001130003000900230003000300013000
63004293413001130003000900030003000300013000
63004293673001130003000900030003000300013000
63004293373001130003000900030003000300013000
63004293383001130003000900030003000300013000
63004293383001130003000900030003000300013000
63004293363001130003000900630003000300313000
63004297703001130003000900030003000300013000

Test 2: throughput

Count: 8

Code:

  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  ld1 { v0.8b, v1.8b, v2.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.5007

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
240205120176240135101024003410002400083007442422401082002400122002400121240000100
2402041200562401011010240000100024005630014227582401562002400682002400121240000100
2402041200562401011010240000100024000830019202482401082002400122002400121240000100
2402041201092401011010240000100024000830019203382401082002400122002400121240000100
2402041200562401011010240000100024005630019254942401562002400682002400121240000100
2402041200662401011010240000100024000830019202482401082002400122002400121240000100
2402041200592401011010240000100024005930019248742401592002400712002400121240000100
2402041200562401011010240000100024000830019202482401082002400122002400121240000100
2402041200562401011010240000100024000830019204642401082002400122002400121240000100
2402051203802401311010240030100024000830019202842401082002400122002400121240000100

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.5006

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
2400261205882400711124006010240059309606742400692024007220240012124000010
24002512040024004111240030102400003019203262400102024000020240000124000010
24002412005424001111240000102400003019202182400102024000020240057124000010
24002412006024001111240000102400003019202182400102024000020240072124000010
24002412006024001111240000102400003019202182400102024000020240068124000010
24002512012324004811240037102400563015145582400662024006820240000124000010
24002412005424001111240000102400563017286982400662024006820240068124000010
24002412005724001111240000102400003019202182400102024000020240000124000010
24002512012124004711240036102400003019246462400102024000020240000124000010
24002412005424001111240000102400003019202182400102024000020240000124000010