This is inspired by and based on the x86/x64 SIMD Instruction List by Daytime.
This is not an official reference, and may contain mistakes. It is intended to make it easier to find instructions, and to provide an alternative perspective. While writing SVE code, please refer to the Arm® Exploration Tools, Arm® ARM, or Arm® Intrinsics Reference.
Merging and zeroing predication is typically omitted from the diagrams, but it is shown in operations like BRKN
and LD1RQB
that use the /Z
syntax but have unusual semantics.
This is an ongoing project - dark-red links are missing full descriptions, bright-red links are also missing diagrams and instead link to the documentation in the exploration tools.
Report mistakes or send feedback.
Note: this does not support filtering by vector length, so some unavailable operations may appear available even after selecting a preset.
Warning: this allows contradictory and invalid configurations.
SVE Version | SME Version | Mode | Extensions | Presets |
---|---|---|---|---|
|
|
|
|
128-bit | 64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|---|
zip | svzip1[_{s,u,f}64] svzip2[_{s,u,f}64] |
svzip1[_{s,u,f}32] svzip2[_{s,u,f}32] |
svzip1[_{s,u,f,bf}16] svzip2[_{s,u,f,bf}16] |
svzip1[_{s,u}8] svzip2[_{s,u}8] |
|
unzip | svuzp1[_{s,u,f}64] svuzp2[_{s,u,f}64] |
svuzp1[_{s,u,f}32] svuzp2[_{s,u,f}32] |
svuzp1[_{s,u,f,bf}16] svuzp2[_{s,u,f,bf}16] |
svuzp1[_{s,u}8] svuzp2[_{s,u}8] |
|
transpose | svtrn1[_{s,u,f}64] svtrn2[_{s,u,f}64] |
svtrn1[_{s,u,f}32] svtrn2[_{s,u,f}32] |
svtrn1[_{s,u,f,bf}16] svtrn2[_{s,u,f,bf}16] |
svtrn1[_{s,u}8] svtrn2[_{s,u}8] |
|
broadcast | svdupq_lane |
svdup[_n]_f64_m svdup[_n]_{s,u}64_m svdup[_n]_{s,u}64_z svdup[_n]_f64 svdup_lane[_{s,u,f}64] svdup[_n]_{s,u}64 |
svdup[_n]_f32_m svdup[_n]_{s,u}32_m svdup[_n]_{s,u}32_z svdup[_n]_f32 svdup_lane[_{s,u,f}32] svdup[_n]_{s,u}32 |
svdup[_n]_f16_m svdup[_n]_{s,u}16_m svdup[_n]_{s,u}16_z svdup[_n]_f16 svdup_lane[_{s,u,f,bf}16] svdup[_n]_{s,u}16 |
svdup[_n]_{s,u}8_m svdup[_n]_{s,u}8_z svdup_lane[_{s,u}8] |
reverse vector | svrev[_{s,u,f}64] |
svrev[_{s,u,f}32] |
svrev[_{s,u,f,bf}16] |
svrev[_{s,u}8] |
|
reverse within elements | svrevb[_{s,u}64]_m svrevh[_{s,u}64]_m svrevw[_{s,u}64]_m |
svrevb[_{s,u}32]_m svrevh[_{s,u}32]_m |
svrevb[_{s,u}16]_m |
||
extract | svext[_{s,u}8] svext[_{s,u}8] | ||||
compact | svcompact[_{s,u,f}64] |
svcompact[_{s,u,f}32] |
|||
table lookup (shuffle) | svtbl[_{s,u,f}64] svtbl2[_{s,u,f}64] svtbx[_{s,u,f}64] |
svtbl[_{s,u,f}32] svtbl2[_{s,u,f}32] svtbx[_{s,u,f}32] |
svtbl[_{s,u,f,bf}16] svtbl2[_{s,u,f,bf}16] svtbx[_{s,u,f,bf}16] |
svtbl[_{s,u}8] svtbl2[_{s,u}8] svtbx[_{s,u}8] |
|
splice | svsplice[_{s,u,f}64] svsplice[_{s,u,f}64] |
svsplice[_{s,u,f}32] svsplice[_{s,u,f}32] |
svsplice[_{s,u,f,bf}16] svsplice[_{s,u,f,bf}16] |
svsplice[_{s,u}8] svsplice[_{s,u}8] |
|
extract last active element | svclastb[_n_{s,u,f}64] |
svclastb[_n_{s,u,f}32] |
svclastb[_n_{s,u,f,bf}16] |
||
broadcast last active element | svclastb[_{s,u,f}64] |
svclastb[_{s,u,f}32] |
svclastb[_{s,u,f,bf}16] |
svclastb[_{s,u}8] |
|
extract element after last active | svclasta[_n_{s,u,f}64] |
svclasta[_n_{s,u,f}32] |
svclasta[_n_{s,u,f,bf}16] |
||
broadcast element after last active | svclasta[_{s,u,f}64] |
svclasta[_{s,u,f}32] |
svclasta[_{s,u,f,bf}16] |
svclasta[_{s,u}8] |
|
insert | |||||
make linear sequence | |||||
select | svsel[_{s,u,f}64] |
svsel[_{s,u,f}32] |
svsel[_{s,u,f,bf}16] |
svsel[_{s,u}8] |
|
move to predicate | |||||
move from predicate |
from \ to | Integer | Floating-Point | |||||||
---|---|---|---|---|---|---|---|---|---|
64-bit | 32-bit | 16-bit | 8-bit | double | single | half | BFloat16 | ||
Integer | 64-bit | svqxtnb[_s64] svqxtnt[_s64] svqxtunb[_s64] svqxtunt[_s64] svqxtnb[_u64] svqxtnt[_u64] | svcvt_f64[_s64]_m svcvt_f64[_u64]_m | svcvt_f32[_s64]_m svcvt_f32[_u64]_m | svcvt_f16[_s64]_m svcvt_f16[_u64]_m | ||||
32-bit | svextw[_s64]_m svextw[_u64]_m svunpkhi[_u64] svunpklo[_u64] svunpkhi[_s64] svunpklo[_s64] | svqxtnb[_s32] svqxtnt[_s32] svqxtunb[_s32] svqxtunt[_s32] svqxtnb[_u32] svqxtnt[_u32] | svcvt_f64[_s32]_m svcvt_f64[_u32]_m | svcvt_f32[_s32]_m svcvt_f32[_u32]_m | svcvt_f16[_s32]_m svcvt_f16[_u32]_m | ||||
16-bit | svexth[_s64]_m svexth[_u64]_m | svexth[_s32]_m svexth[_u32]_m svunpkhi[_u32] svunpklo[_u32] svunpkhi[_s32] svunpklo[_s32] | svqxtnb[_s16] svqxtnt[_s16] svqxtunb[_s16] svqxtunt[_s16] svqxtnb[_u16] svqxtnt[_u16] | svcvt_f16[_s16]_m svcvt_f16[_u16]_m | |||||
8-bit | svextb[_s64]_m svextb[_u64]_m | svextb[_s32]_m svextb[_u32]_m | svextb[_s16]_m svextb[_u16]_m svunpkhi[_u16] svunpklo[_u16] svunpkhi[_s16] svunpklo[_s16] | ||||||
Floating-Point | double | svcvt_s64[_f64]_m svcvt_u64[_f64]_m | svcvt_s32[_f64]_m svcvt_u32[_f64]_m | svcvt_f32[_f64]_m svcvtnt_f32[_f64]_m svcvtx_f32[_f64]_m svcvtxnt_f32[_f64]_m | svcvt_f16[_f64]_m | ||||
single | svcvt_s64[_f32]_m svcvt_u64[_f32]_m | svcvt_s32[_f32]_m svcvt_u32[_f32]_m | svcvt_f64[_f32]_m svcvtlt_f64[_f32]_m | svcvt_f16[_f32]_m svcvtnt_f16[_f32]_m | svcvt_bf16[_f32]_m svcvtnt_bf16[_f32]_m | ||||
half | svcvt_s64[_f16]_m svcvt_u64[_f16]_m | svcvt_s32[_f16]_m svcvt_u32[_f16]_m | svcvt_s16[_f16]_m svcvt_u16[_f16]_m | svcvt_f64[_f16]_m | svcvt_f32[_f16]_m svcvtlt_f32[_f16]_m |
Integer | Floating-Point | |||||||
---|---|---|---|---|---|---|---|---|
64-bit | 32-bit | 16-bit | 8-bit | double | single | half | BFloat16 | |
add | svadd[_{s,u}64]_x svadd[_{s,u}64]_m svadd[_n_{s,u}64]_x svqadd[_s64] svqadd[_u64] svqadd[_n_s64] svqadd[_n_u64] svqadd[_s64]_m svqadd[_u64]_m svuqadd[_s64]_m svsqadd[_u64]_m |
svadd[_{s,u}32]_x svadd[_{s,u}32]_m svadd[_n_{s,u}32]_x svqadd[_s32] svqadd[_u32] svqadd[_n_s32] svqadd[_n_u32] svqadd[_s32]_m svqadd[_u32]_m svuqadd[_s32]_m svsqadd[_u32]_m |
svadd[_{s,u}16]_x svadd[_{s,u}16]_m svadd[_n_{s,u}16]_x svqadd[_s16] svqadd[_u16] svqadd[_n_s16] svqadd[_n_u16] svqadd[_s16]_m svqadd[_u16]_m svuqadd[_s16]_m svsqadd[_u16]_m |
svadd[_{s,u}8]_x svadd[_{s,u}8]_m svadd[_n_{s,u}8]_x svqadd[_s8] svqadd[_u8] svqadd[_n_s8] svqadd[_n_u8] svqadd[_s8]_m svqadd[_u8]_m svuqadd[_s8]_m svsqadd[_u8]_m |
svadd[_f64]_x svadd[_f64]_m |
svadd[_f32]_x svadd[_f32]_m |
svadd[_f16]_x svadd[_f16]_m |
|
add (half-width element) | svaddwb[_s64] svaddwt[_s64] svaddwb[_u64] svaddwt[_u64] |
svaddwb[_s32] svaddwt[_s32] svaddwb[_u32] svaddwt[_u32] |
svaddwb[_s16] svaddwt[_s16] svaddwb[_u16] svaddwt[_u16] |
|||||
add (double-width result) | svaddlb[_s64] svaddlt[_s64] svaddlb[_u64] svaddlt[_u64] svaddlbt[_s64] |
svaddlb[_s32] svaddlt[_s32] svaddlb[_u32] svaddlt[_u32] svaddlbt[_s32] |
svaddlb[_s16] svaddlt[_s16] svaddlb[_u16] svaddlt[_u16] svaddlbt[_s16] |
|||||
add (narrowing, high part) | svaddhnb[_{s,u}64] svaddhnt[_{s,u}64] svraddhnb[_{s,u}64] svraddhnt[_{s,u}64] |
svaddhnb[_{s,u}32] svaddhnt[_{s,u}32] svraddhnb[_{s,u}32] svraddhnt[_{s,u}32] |
svaddhnb[_{s,u}16] svaddhnt[_{s,u}16] svraddhnb[_{s,u}16] svraddhnt[_{s,u}16] |
|||||
average (halving add) | svhadd[_s64]_m svhadd[_u64]_m svrhadd[_s64]_m svrhadd[_u64]_m |
svhadd[_s32]_m svhadd[_u32]_m svrhadd[_s32]_m svrhadd[_u32]_m |
svhadd[_s16]_m svhadd[_u16]_m svrhadd[_s16]_m svrhadd[_u16]_m |
svhadd[_s8]_m svhadd[_u8]_m svrhadd[_s8]_m svrhadd[_u8]_m |
||||
add reduction | svaddv[_{s,u}64] |
svaddv[_u32] svaddv[_s32] |
svaddv[_u16] svaddv[_s16] |
svaddv[_u8] svaddv[_s8] |
svaddv[_f64] svadda[_f64] |
svaddv[_f32] svadda[_f32] |
svaddv[_f16] svadda[_f16] |
|
pairwise add | svaddp[_{s,u}64]_m |
svaddp[_{s,u}32]_m |
svaddp[_{s,u}16]_m |
svaddp[_{s,u}8]_m |
svaddp[_f64]_m |
svaddp[_f32]_m |
svaddp[_f16]_m |
|
add pairs to double-width accumulator | svadalp[_s64]_m svadalp[_u64]_m |
svadalp[_s32]_m svadalp[_u32]_m |
svadalp[_s16]_m svadalp[_u16]_m |
|||||
add with carry | svadclb[_u64] svadclt[_u64] |
svadclb[_u32] svadclt[_u32] |
||||||
add with left shift / extend | svadrb[_u64base]_[{s,u}64]offset svadrh[_u64base]_[{s,u}64]index svadrw[_u64base]_[{s,u}64]index svadrd[_u64base]_[{s,u}64]index |
svadrb[_u32base]_[{s,u}32]offset svadrh[_u32base]_[{s,u}32]index svadrw[_u32base]_[{s,u}32]index svadrd[_u32base]_[{s,u}32]index |
||||||
sub | svsub[_{s,u}64]_x svsub[_{s,u}64]_m svsub[_n_{s,u}64]_x svsubr[_{s,u}64]_m svsubr[_n_{s,u}64]_x svqsub[_s64] svqsub[_u64] svqsub[_n_s64] svqsub[_n_u64] svqsub[_s64]_m svqsub[_u64]_m svqsubr[_s64]_m svqsubr[_u64]_m |
svsub[_{s,u}32]_x svsub[_{s,u}32]_m svsub[_n_{s,u}32]_x svsubr[_{s,u}32]_m svsubr[_n_{s,u}32]_x svqsub[_s32] svqsub[_u32] svqsub[_n_s32] svqsub[_n_u32] svqsub[_s32]_m svqsub[_u32]_m svqsubr[_s32]_m svqsubr[_u32]_m |
svsub[_{s,u}16]_x svsub[_{s,u}16]_m svsub[_n_{s,u}16]_x svsubr[_{s,u}16]_m svsubr[_n_{s,u}16]_x svqsub[_s16] svqsub[_u16] svqsub[_n_s16] svqsub[_n_u16] svqsub[_s16]_m svqsub[_u16]_m svqsubr[_s16]_m svqsubr[_u16]_m |
svsub[_{s,u}8]_x svsub[_{s,u}8]_m svsub[_n_{s,u}8]_x svsubr[_{s,u}8]_m svsubr[_n_{s,u}8]_x svqsub[_s8] svqsub[_u8] svqsub[_n_s8] svqsub[_n_u8] svqsub[_s8]_m svqsub[_u8]_m svqsubr[_s8]_m svqsubr[_u8]_m |
svsub[_f64]_x svsub[_f64]_m svsub[_n_f64]_m svsubr[_f64]_m |
svsub[_f32]_x svsub[_f32]_m svsub[_n_f32]_m svsubr[_f32]_m |
svsub[_f16]_x svsub[_f16]_m svsub[_n_f16]_m svsubr[_f16]_m |
|
sub (half-width element) | svsubwb[_s64] svsubwt[_s64] svsubwb[_u64] svsubwt[_u64] |
svsubwb[_s32] svsubwt[_s32] svsubwb[_u32] svsubwt[_u32] |
svsubwb[_s16] svsubwt[_s16] svsubwb[_u16] svsubwt[_u16] |
|||||
sub (double-width result) | svsublb[_s64] svsublt[_s64] svsublb[_u64] svsublt[_u64] svsublbt[_s64] svsubltb[_s64] |
svsublb[_s32] svsublt[_s32] svsublb[_u32] svsublt[_u32] svsublbt[_s32] svsubltb[_s32] |
svsublb[_s16] svsublt[_s16] svsublb[_u16] svsublt[_u16] svsublbt[_s16] svsubltb[_s16] |
|||||
halving sub | svhsub[_s64]_m svhsubr[_s64]_m svhsub[_u64]_m svhsubr[_u64]_m |
svhsub[_s32]_m svhsubr[_s32]_m svhsub[_u32]_m svhsubr[_u32]_m |
svhsub[_s16]_m svhsubr[_s16]_m svhsub[_u16]_m svhsubr[_u16]_m |
svhsub[_s8]_m svhsubr[_s8]_m svhsub[_u8]_m svhsubr[_u8]_m |
||||
sub (narrowing, high part) | svsubhnb[_{s,u}64] svsubhnt[_{s,u}64] svrsubhnb[_{s,u}64] svrsubhnt[_{s,u}64] |
svsubhnb[_{s,u}32] svsubhnt[_{s,u}32] svrsubhnb[_{s,u}32] svrsubhnt[_{s,u}32] |
svsubhnb[_{s,u}16] svsubhnt[_{s,u}16] svrsubhnb[_{s,u}16] svrsubhnt[_{s,u}16] |
|||||
sub with carry | svsbclb[_u64] svsbclt[_u64] |
svsbclb[_u32] svsbclt[_u32] |
||||||
mul | svmul[_n_{s,u}64]_x svmul_lane[_{s,u}64] svmul[_{s,u}64]_m |
svmul[_n_{s,u}32]_x svmul_lane[_{s,u}32] svmul[_{s,u}32]_m |
svmul[_n_{s,u}16]_x svmul_lane[_{s,u}16] svmul[_{s,u}16]_m |
svmul[_n_{s,u}8]_x svmul[_{s,u}8]_m |
svmul[_f64]_x svmul_lane[_f64] svmul[_f64]_m |
svmul[_f32]_x svmul_lane[_f32] svmul[_f32]_m |
svmul[_f16]_x svmul_lane[_f16] svmul[_f16]_m |
|
mul (high) | svmulh[_s64]_m svmulh[_u64]_m |
svmulh[_s32]_m svmulh[_u32]_m |
svmulh[_s16]_m svmulh[_u16]_m |
svmulh[_s8]_m svmulh[_u8]_m |
||||
mul (double-width result) | svmullb[_s64] svmullb_lane[_s64] svmullt[_s64] svmullt_lane[_s64] svmullb[_u64] svmullb_lane[_u64] svmullt[_u64] svmullt_lane[_u64] |
svmullb[_s32] svmullb_lane[_s32] svmullt[_s32] svmullt_lane[_s32] svmullb[_u32] svmullb_lane[_u32] svmullt[_u32] svmullt_lane[_u32] |
svmullb[_s16] svmullt[_s16] svmullb[_u16] svmullt[_u16] |
|||||
div | svdiv[_s64]_m svdiv[_u64]_m svdivr[_s64]_m svdivr[_u64]_m |
svdiv[_s32]_m svdiv[_u32]_m svdivr[_s32]_m svdivr[_u32]_m |
svdiv[_f64]_m svdivr[_f64]_m |
svdiv[_f32]_m svdivr[_f32]_m |
svdiv[_f16]_m svdivr[_f16]_m |
|||
neg | svneg[_s64]_m svqneg[_s64]_m |
svneg[_s32]_m svqneg[_s32]_m |
svneg[_s16]_m svqneg[_s16]_m |
svneg[_s8]_m svqneg[_s8]_m |
svneg[_f64]_m |
svneg[_f32]_m |
svneg[_f16]_m |
|
abs | svabs[_s64]_m svqabs[_s64]_m |
svabs[_s32]_m svqabs[_s32]_m |
svabs[_s16]_m svqabs[_s16]_m |
svabs[_s8]_m svqabs[_s8]_m |
svabs[_f64]_m |
svabs[_f32]_m |
svabs[_f16]_m |
|
clamp | ||||||||
min | svmin[_s64]_m svmin[_u64]_m svmin[_n_s64]_x svmin[_n_u64]_x |
svmin[_s32]_m svmin[_u32]_m svmin[_n_s32]_x svmin[_n_u32]_x |
svmin[_s16]_m svmin[_u16]_m svmin[_n_s16]_x svmin[_n_u16]_x |
svmin[_n_s8]_x svmin[_n_u8]_x |
svmin[_f64]_m svmin[_n_f64]_m svminnm[_f64]_m svminnm[_n_f64]_m |
svmin[_f32]_m svmin[_n_f32]_m svminnm[_f32]_m svminnm[_n_f32]_m |
svmin[_f16]_m svmin[_n_f16]_m svminnm[_f16]_m svminnm[_n_f16]_m |
|
pairwise min | svminp[_s64]_m svminp[_u64]_m |
svminp[_s32]_m svminp[_u32]_m |
svminp[_s16]_m svminp[_u16]_m |
svminp[_s8]_m svminp[_u8]_m |
svminp[_f64]_m svminnmp[_f64]_m |
svminp[_f32]_m svminnmp[_f32]_m |
svminp[_f16]_m svminnmp[_f16]_m |
|
min reduction | svminv[_s64] svminv[_u64] |
svminv[_s32] svminv[_u32] |
svminv[_s16] svminv[_u16] |
svminv[_s8] svminv[_u8] |
svminv[_f64] svminnmv[_f64] |
svminv[_f32] svminnmv[_f32] |
svminv[_f16] svminnmv[_f16] |
|
max | svmax[_s64]_m svmax[_u64]_m svmax[_n_s64]_x svmax[_n_u64]_x |
svmax[_s32]_m svmax[_u32]_m svmax[_n_s32]_x svmax[_n_u32]_x |
svmax[_s16]_m svmax[_u16]_m svmax[_n_s16]_x svmax[_n_u16]_x |
svmax[_n_s8]_x svmax[_n_u8]_x |
svmax[_f64]_m svmax[_n_f64]_m svmaxnm[_f64]_m svmaxnm[_n_f64]_m |
svmax[_f32]_m svmax[_n_f32]_m svmaxnm[_f32]_m svmaxnm[_n_f32]_m |
svmax[_f16]_m svmax[_n_f16]_m svmaxnm[_f16]_m svmaxnm[_n_f16]_m |
|
pairwise max | svmaxp[_s64]_m svmaxp[_u64]_m |
svmaxp[_s32]_m svmaxp[_u32]_m |
svmaxp[_s16]_m svmaxp[_u16]_m |
svmaxp[_s8]_m svmaxp[_u8]_m |
svmaxp[_f64]_m svmaxnmp[_f64]_m |
svmaxp[_f32]_m svmaxnmp[_f32]_m |
svmaxp[_f16]_m svmaxnmp[_f16]_m |
|
max reduction | svmaxv[_s64] svmaxv[_u64] |
svmaxv[_s32] svmaxv[_u32] |
svmaxv[_s16] svmaxv[_u16] |
svmaxv[_s8] svmaxv[_u8] |
svmaxv[_f64] svmaxnmv[_f64] |
svmaxv[_f32] svmaxnmv[_f32] |
svmaxv[_f16] svmaxnmv[_f16] |
|
fused multiply add / sub | svmla[_{s,u}64]_m svmla_lane[_{s,u}64] svmad[_{s,u}64]_m svmls[_{s,u}64]_m svmls_lane[_{s,u}64] svmsb[_{s,u}64]_m |
svmla[_{s,u}32]_m svmla_lane[_{s,u}32] svmad[_{s,u}32]_m svmls[_{s,u}32]_m svmls_lane[_{s,u}32] svmsb[_{s,u}32]_m |
svmla[_{s,u}16]_m svmla_lane[_{s,u}16] svmad[_{s,u}16]_m svmls[_{s,u}16]_m svmls_lane[_{s,u}16] svmsb[_{s,u}16]_m |
svmla[_{s,u}8]_m svmad[_{s,u}8]_m svmls[_{s,u}8]_m svmsb[_{s,u}8]_m |
svmla[_f64]_m svmla_lane[_f64] svmad[_f64]_m svmls[_f64]_m svmls_lane[_f64] svmsb[_f64]_m |
svmla[_f32]_m svmla_lane[_f32] svmad[_f32]_m svmls[_f32]_m svmls_lane[_f32] svmsb[_f32]_m |
svmla[_f16]_m svmla_lane[_f16] svmad[_f16]_m svmls[_f16]_m svmls_lane[_f16] svmsb[_f16]_m |
|
negated fused multiply add / sub | svnmla[_f64]_m svnmad[_f64]_m svnmls[_f64]_m svnmsb[_f64]_m |
svnmla[_f32]_m svnmad[_f32]_m svnmls[_f32]_m svnmsb[_f32]_m |
svnmla[_f16]_m svnmad[_f16]_m svnmls[_f16]_m svnmsb[_f16]_m |
|||||
fused multiply add / sub (double-width result) | svmlalb[_u64] svmlalb_lane[_u64] svmlalt[_u64] svmlalt_lane[_u64] svmlslb[_u64] svmlslb_lane[_u64] svmlslt[_u64] svmlslt_lane[_u64] svmlalb[_s64] svmlalb_lane[_s64] svmlalt[_s64] svmlalt_lane[_s64] svmlslb[_s64] svmlslb_lane[_s64] svmlslt[_s64] svmlslt_lane[_s64] |
svmlalb[_u32] svmlalb_lane[_u32] svmlalt[_u32] svmlalt_lane[_u32] svmlslb[_u32] svmlslb_lane[_u32] svmlslt[_u32] svmlslt_lane[_u32] svmlalb[_s32] svmlalb_lane[_s32] svmlalt[_s32] svmlalt_lane[_s32] svmlslb[_s32] svmlslb_lane[_s32] svmlslt[_s32] svmlslt_lane[_s32] |
svmlalb[_u16] svmlalt[_u16] svmlslb[_u16] svmlslt[_u16] svmlalb[_s16] svmlalt[_s16] svmlslb[_s16] svmlslt[_s16] |
svmlalb[_f32] svmlalb_lane[_f32] svmlalt[_f32] svmlalt_lane[_f32] svmlslb[_f32] svmlslb_lane[_f32] svmlslt[_f32] svmlslt_lane[_f32] |
svbfmlalb[_f32] svbfmlalb_lane[_f32] svbfmlalt[_f32] svbfmlalt_lane[_f32] |
|||
matrix multiply-add | svmmla[_s32] svmmla[_u32] svusmmla[_s32] |
svmmla[_f64] |
svmmla[_f32] |
svbfmmla[_f32] |
||||
dot product (multiply-add) | svdot[_s64] svdot_lane[_s64] svdot[_u64] svdot_lane[_u64] |
svdot[_s32] svdot_lane[_s32] svdot[_u32] svdot_lane[_u32] svsudot[_s32] svusdot[_s32] svusdot_lane[_s32] svsudot_lane[_s32] |
svbfdot[_f32] svbfdot_lane[_f32] |
|||||
absolute difference | svabd[_s64]_m svabd[_u64]_m |
svabd[_s32]_m svabd[_u32]_m |
svabd[_s16]_m svabd[_u16]_m |
svabd[_f64]_m |
svabd[_f32]_m |
svabd[_f16]_m |
||
absolute difference (double-width result) | svabdlb[_s64] svabdlt[_s64] svabdlb[_u64] svabdlt[_u64] |
svabdlb[_s32] svabdlt[_s32] svabdlb[_u32] svabdlt[_u32] |
svabdlb[_s16] svabdlt[_s16] svabdlb[_u16] svabdlt[_u16] |
|||||
add absolute difference | svaba[_s64] svaba[_u64] |
svaba[_s32] svaba[_u32] |
svaba[_s16] svaba[_u16] |
svaba[_s8] svaba[_u8] |
||||
add absolute difference (double-width accumulator) | svabalb[_s64] svabalt[_s64] svabalb[_u64] svabalt[_u64] |
svabalb[_s32] svabalt[_s32] svabalb[_u32] svabalt[_u32] |
svabalb[_s16] svabalt[_s16] svabalb[_u16] svabalt[_u16] |
|||||
multiply nth power of 2 | svscale[_f64]_m |
svscale[_f32]_m |
svscale[_f16]_m |
|||||
round | svrinta[_f64]_m svrinti[_f64]_m svrintm[_f64]_m svrintn[_f64]_m svrintp[_f64]_m svrintx[_f64]_m svrintz[_f64]_m |
svrinta[_f32]_m svrinti[_f32]_m svrintm[_f32]_m svrintn[_f32]_m svrintp[_f32]_m svrintx[_f32]_m svrintz[_f32]_m |
svrinta[_f16]_m svrinti[_f16]_m svrintm[_f16]_m svrintn[_f16]_m svrintp[_f16]_m svrintx[_f16]_m svrintz[_f16]_m |
|||||
square root | svsqrt[_f64]_m |
svsqrt[_f32]_m |
svsqrt[_f16]_m |
|||||
recpriocal square root | svrsqrte[_u32]_m |
svrsqrte[_f64] svrsqrts[_f64] |
svrsqrte[_f32] svrsqrts[_f32] |
svrsqrte[_f16] svrsqrts[_f16] |
||||
reciprocal | svrecpe[_u32]_m |
svrecpe[_f64] svrecps[_f64] |
svrecpe[_f32] svrecps[_f32] |
svrecpe[_f16] svrecps[_f16] |
||||
trigonometric acceleration | svtsmul[_f64] svtssel[_f64] |
svtsmul[_f32] svtssel[_f32] |
svtsmul[_f16] svtssel[_f16] |
|||||
exponential acceleration | svexpa[_f64] |
svexpa[_f32] |
svexpa[_f16] |
|||||
normalization | svrecpx[_f64]_m svmulx[_f64]_m |
svrecpx[_f32]_m svmulx[_f32]_m |
svrecpx[_f16]_m svmulx[_f16]_m |
|||||
log base 2 (integer) | svlogb[_f64]_m |
svlogb[_f32]_m |
svlogb[_f16]_m |
|||||
doubling mul high | svqdmulh[_s64] svqdmulh_lane[_s64] svqrdmulh[_s64] svqrdmulh_lane[_s64] |
svqdmulh[_s32] svqdmulh_lane[_s32] svqrdmulh[_s32] svqrdmulh_lane[_s32] |
svqdmulh[_s16] svqdmulh_lane[_s16] svqrdmulh[_s16] svqrdmulh_lane[_s16] |
svqdmulh[_s8] svqrdmulh[_s8] |
||||
doubling mul and add high | svqrdmlah[_s64] svqrdmlah_lane[_s64] |
svqrdmlah[_s32] svqrdmlah_lane[_s32] |
svqrdmlah[_s16] svqrdmlah_lane[_s16] |
svqrdmlah[_s8] |
||||
doubling mul and sub high | svqrdmlsh[_s64] svqrdmlsh_lane[_s64] |
svqrdmlsh[_s32] svqrdmlsh_lane[_s32] |
svqrdmlsh[_s16] svqrdmlsh_lane[_s16] |
svqrdmlsh[_s8] |
||||
doubling mul (double-width result) | svqdmullb[_s64] svqdmullb_lane[_s64] svqdmullt[_s64] svqdmullt_lane[_s64] |
svqdmullb[_s32] svqdmullb_lane[_s32] svqdmullt[_s32] svqdmullt_lane[_s32] |
svqdmullb[_s16] svqdmullt[_s16] |
|||||
doubling mul and add (double-width result) | svqdmlalb[_s64] svqdmlalb_lane[_s64] svqdmlalt[_s64] svqdmlalt_lane[_s64] svqdmlalbt[_s64] |
svqdmlalb[_s32] svqdmlalb_lane[_s32] svqdmlalt[_s32] svqdmlalt_lane[_s32] svqdmlalbt[_s32] |
svqdmlalb[_s16] svqdmlalt[_s16] svqdmlalbt[_s16] |
|||||
doubling mul and sub (double-width result) | svqdmlslb[_s64] svqdmlslt[_s64] svqdmlslbt[_s64] svqdmlslb_lane[_s64] svqdmlslt_lane[_s64] |
svqdmlslb[_s32] svqdmlslt[_s32] svqdmlslbt[_s32] svqdmlslb_lane[_s32] svqdmlslt_lane[_s32] |
svqdmlslb[_s16] svqdmlslt[_s16] svqdmlslbt[_s16] |
Integer | Floating-Point | |||||||
---|---|---|---|---|---|---|---|---|
64-bit | 32-bit | 16-bit | 8-bit | double | single | half | ||
complex add | svcadd[_{s,u}64] |
svcadd[_{s,u}32] |
svcadd[_{s,u}16] |
svcadd[_{s,u}8] |
||||
complex multiply-add | svcmla[_{s,u}32] |
svcmla[_{s,u}16] |
svcmla[_f32]_m |
svcmla[_f16]_m |
||||
complex doubling mul and add high | svqrdcmlah[_s32] |
svqrdcmlah[_s16] |
||||||
complex dot-product | svcdot[_s64] |
svcdot[_s32] |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
and | svand[_{s,u}{8,16,32,64}]_x | |||
svand[_{s,u}64]_m svand[_n_{s,u}64]_x |
svand[_{s,u}32]_m svand[_n_{s,u}32]_x |
svand[_{s,u}16]_m svand[_n_{s,u}16]_x |
svand[_{s,u}8]_m svand[_n_{s,u}8]_x |
|
or | svorr[_{s,u}{8,16,32,64}]_x | |||
svorr[_{s,u}64]_m svorr[_n_{s,u}64]_x |
svorr[_{s,u}32]_m svorr[_n_{s,u}32]_x |
svorr[_{s,u}16]_m svorr[_n_{s,u}16]_x |
svorr[_{s,u}8]_m svorr[_n_{s,u}8]_x |
|
xor | sveor[_{s,u}{8,16,32,64}]_x | |||
sveor[_{s,u}64]_m sveor[_n_{s,u}64]_x sveorbt[_{s,u}64] sveortb[_{s,u}64] |
sveor[_{s,u}32]_m sveor[_n_{s,u}32]_x sveorbt[_{s,u}32] sveortb[_{s,u}32] |
sveor[_{s,u}16]_m sveor[_n_{s,u}16]_x sveorbt[_{s,u}16] sveortb[_{s,u}16] |
sveor[_{s,u}8]_m sveor[_n_{s,u}8]_x sveorbt[_{s,u}8] sveortb[_{s,u}8] |
|
not | svnot[_{s,u}64]_m |
svnot[_{s,u}32]_m |
svnot[_{s,u}16]_m |
svnot[_{s,u}8]_m |
and not | svbic[_{s,u}{8,16,32,64}]_x | |||
svbic[_{s,u}64]_m |
svbic[_{s,u}32]_m |
svbic[_{s,u}16]_m |
svbic[_{s,u}8]_m |
|
3-way xor | sveor3[_{s,u}{8,16,32,64}] | |||
and not, xor | svbcax[_{s,u}{8,16,32,64}] | |||
bitwise select | svbsl[_{s,u}{8,16,32,64}] svbsl1n[_{s,u}{8,16,32,64}] svbsl2n[_{s,u}{8,16,32,64}] svnbsl[_{s,u}{8,16,32,64}] | |||
and reduction | svandv[_{s,u}64] |
svandv[_{s,u}32] |
svandv[_{s,u}16] |
svandv[_{s,u}8] |
or reduction | svorv[_{s,u}64] |
svorv[_{s,u}32] |
svorv[_{s,u}16] |
svorv[_{s,u}8] |
xor reduction | sveorv[_{s,u}64] |
sveorv[_{s,u}32] |
sveorv[_{s,u}16] |
sveorv[_{s,u}8] |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
shift left | svlsl[_n_{s,u}64]_m svlsl[_n_{s,u}64]_x svlsl[_{s,u}64]_m svqshl[_n_s64]_m svqshl[_n_u64]_m svqshlu[_n_s64]_m |
svlsl[_n_{s,u}32]_m svlsl[_n_{s,u}32]_x svlsl[_{s,u}32]_m svqshl[_n_s32]_m svqshl[_n_u32]_m svqshlu[_n_s32]_m svlsl_wide[_{s,u}32]_m svlsl_wide[_{s,u}32]_x |
svlsl[_n_{s,u}16]_m svlsl[_n_{s,u}16]_x svlsl[_{s,u}16]_m svqshl[_n_s16]_m svqshl[_n_u16]_m svqshlu[_n_s16]_m svlsl_wide[_{s,u}16]_m svlsl_wide[_{s,u}16]_x |
svlsl[_n_{s,u}8]_m svlsl[_n_{s,u}8]_x svlsl[_{s,u}8]_m svqshl[_n_s8]_m svqshl[_n_u8]_m svqshlu[_n_s8]_m svlsl_wide[_{s,u}8]_m svlsl_wide[_{s,u}8]_x |
shift right logical | svlsr[_n_u64]_m svlsr[_n_u64]_x svlsr[_u64]_m svrshr[_n_u64]_m |
svlsr[_n_u32]_m svlsr[_n_u32]_x svlsr[_u32]_m svrshr[_n_u32]_m svlsr_wide[_u32]_m svlsr_wide[_u32]_x |
svlsr[_n_u16]_m svlsr[_n_u16]_x svlsr[_u16]_m svrshr[_n_u16]_m svlsr_wide[_u16]_m svlsr_wide[_u16]_x |
svlsr[_n_u8]_m svlsr[_n_u8]_x svlsr[_u8]_m svrshr[_n_u8]_m svlsr_wide[_u8]_m svlsr_wide[_u8]_x |
shift right arithmetic | svasr[_n_s64]_m svasr[_n_s64]_x svasr[_s64]_m svrshr[_n_s64]_m |
svasr[_n_s32]_m svasr[_n_s32]_x svasr[_s32]_m svrshr[_n_s32]_m svasr_wide[_s32]_m svasr_wide[_s32]_x |
svasr[_n_s16]_m svasr[_n_s16]_x svasr[_s16]_m svrshr[_n_s16]_m svasr_wide[_s16]_m svasr_wide[_s16]_x |
svasr[_n_s8]_m svasr[_n_s8]_x svasr[_s8]_m svrshr[_n_s8]_m svasr_wide[_s8]_m svasr_wide[_s8]_x |
shift right arithmetic, rounding towards zero | svasrd[_n_s64]_m |
svasrd[_n_s32]_m |
svasrd[_n_s16]_m |
svasrd[_n_s8]_m |
bidirectional shifts | svqshl[_s64]_m svqshl[_u64]_m svrshl[_s64]_m svrshl[_u64]_m svqrshl[_s64]_m svqrshl[_u64]_m |
svqshl[_s32]_m svqshl[_u32]_m svrshl[_s32]_m svrshl[_u32]_m svqrshl[_s32]_m svqrshl[_u32]_m |
svqshl[_s16]_m svqshl[_u16]_m svrshl[_s16]_m svrshl[_u16]_m svqrshl[_s16]_m svqrshl[_u16]_m |
svqshl[_s8]_m svqshl[_u8]_m svrshl[_s8]_m svrshl[_u8]_m svqrshl[_s8]_m svqrshl[_u8]_m |
narrowing right shift | svshrnb[_n_{s,u}64] svshrnt[_n_{s,u}64] svrshrnb[_n_{s,u}64] svrshrnt[_n_{s,u}64] svqrshrnb[_n_s64] svqrshrnt[_n_s64] svqrshrnb[_n_u64] svqrshrnt[_n_u64] svqshrnb[_n_s64] svqshrnt[_n_s64] svqshrnb[_n_u64] svqshrnt[_n_u64] svqrshrunb[_n_s64] svqrshrunt[_n_s64] svqshrunb[_n_s64] svqshrunt[_n_s64] |
svshrnb[_n_{s,u}32] svshrnt[_n_{s,u}32] svrshrnb[_n_{s,u}32] svrshrnt[_n_{s,u}32] svqrshrnb[_n_s32] svqrshrnt[_n_s32] svqrshrnb[_n_u32] svqrshrnt[_n_u32] svqshrnb[_n_s32] svqshrnt[_n_s32] svqshrnb[_n_u32] svqshrnt[_n_u32] svqrshrunb[_n_s32] svqrshrunt[_n_s32] svqshrunb[_n_s32] svqshrunt[_n_s32] |
svshrnb[_n_{s,u}16] svshrnt[_n_{s,u}16] svrshrnb[_n_{s,u}16] svrshrnt[_n_{s,u}16] svqrshrnb[_n_s16] svqrshrnt[_n_s16] svqrshrnb[_n_u16] svqrshrnt[_n_u16] svqshrnb[_n_s16] svqshrnt[_n_s16] svqshrnb[_n_u16] svqshrnt[_n_u16] svqrshrunb[_n_s16] svqrshrunt[_n_s16] svqshrunb[_n_s16] svqshrunt[_n_s16] |
|
interleaving narrowing right shift | ||||
widening shift left | svshllb[_n_s64] svshllb[_n_u64] svshllt[_n_s64] svshllt[_n_u64] |
svshllb[_n_s32] svshllb[_n_u32] svshllt[_n_s32] svshllt[_n_u32] |
svshllb[_n_s16] svshllb[_n_u16] svshllt[_n_s16] svshllt[_n_u16] |
|
shift right and add | svsra[_n_s64] svsra[_n_u64] svrsra[_n_s64] svrsra[_n_u64] |
svsra[_n_s32] svsra[_n_u32] svrsra[_n_s32] svrsra[_n_u32] |
svsra[_n_s16] svsra[_n_u16] svrsra[_n_s16] svrsra[_n_u16] |
svsra[_n_s8] svsra[_n_u8] svrsra[_n_s8] svrsra[_n_u8] |
shift right and insert | svsri[_n_{s,u}64] |
svsri[_n_{s,u}32] |
svsri[_n_{s,u}16] |
svsri[_n_{s,u}8] |
shift left and insert | svsli[_n_{s,u}64] |
svsli[_n_{s,u}32] |
svsli[_n_{s,u}16] |
svsli[_n_{s,u}8] |
xor and rotate | svxar[_n_{s,u}64] |
svxar[_n_{s,u}32] |
svxar[_n_{s,u}16] |
svxar[_n_{s,u}8] |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
logical not | svcnot[_{s,u}64]_m |
svcnot[_{s,u}32]_m |
svcnot[_{s,u}16]_m |
svcnot[_{s,u}8]_m |
count leading zeros | svclz[_{s,u}64]_m |
svclz[_{s,u}32]_m |
svclz[_{s,u}16]_m |
svclz[_{s,u}8]_m |
count leading sign bits | svcls[_s64]_m |
svcls[_s32]_m |
svcls[_s16]_m |
svcls[_s8]_m |
count non-zero bits | svcnt[_{s,u,f}64]_m |
svcnt[_{s,u,f}32]_m |
svcnt[_{s,u,f,bf}16]_m |
svcnt[_{s,u}8]_m |
reverse bits | svrbit[_{s,u}64]_m |
svrbit[_{s,u}32]_m |
svrbit[_{s,u}16]_m |
svrbit[_{s,u}8]_m |
bit deposit | svbdep[_u64] |
svbdep[_u32] |
svbdep[_u16] |
svbdep[_u8] |
bit extract | svbext[_u64] |
svbext[_u32] |
svbext[_u16] |
svbext[_u8] |
bit group | svbgrp[_u64] |
svbgrp[_u32] |
svbgrp[_u16] |
svbgrp[_u8] |
polynomial multiply | svpmul[_u8] |
|||
widening polynomial multiply | svpmullb_pair[_u64] svpmullt_pair[_u64] |
svpmullb[_u64] svpmullb_pair[_u32] svpmullt[_u64] svpmullt_pair[_u32] |
svpmullb[_u16] svpmullb_pair[_u8] svpmullt[_u16] svpmullt_pair[_u8] |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
find matching elements | svmatch[_{s,u}16] |
svmatch[_{s,u}8] |
||
find non-matching elements | svnmatch[_{s,u}16] |
svnmatch[_{s,u}8] |
||
count matching elements (within 128-bit segments) | svhistseg[_{s,u}8] |
|||
count matching elements (prefix-inclusive) | svhistcnt[_{s,u}64]_z |
svhistcnt[_{s,u}32]_z |
Integer | Floating-Point | |||||||
---|---|---|---|---|---|---|---|---|
64-bit | 32-bit | 16-bit | 8-bit | double | single | half | ||
compare for == | svcmpeq[_n_{s,u}64] svcmpeq[_{s,u}64] |
svcmpeq[_n_{s,u}32] svcmpeq[_{s,u}32] svcmpeq_wide[_s32] |
svcmpeq[_n_{s,u}16] svcmpeq[_{s,u}16] svcmpeq_wide[_s16] |
svcmpeq[_n_{s,u}8] svcmpeq[_{s,u}8] svcmpeq_wide[_s8] |
svcmpeq[_f64] svcmpeq[_n_f64] |
svcmpeq[_f32] svcmpeq[_n_f32] |
svcmpeq[_f16] svcmpeq[_n_f16] |
|
compare for != | svcmpne[_n_{s,u}64] svcmpne[_{s,u}64] |
svcmpne[_n_{s,u}32] svcmpne[_{s,u}32] svcmpne_wide[_s32] |
svcmpne[_n_{s,u}16] svcmpne[_{s,u}16] svcmpne_wide[_s16] |
svcmpne[_n_{s,u}8] svcmpne[_{s,u}8] svcmpne_wide[_s8] |
svcmpne[_f64] svcmpne[_n_f64] |
svcmpne[_f32] svcmpne[_n_f32] |
svcmpne[_f16] svcmpne[_n_f16] |
|
compare for < | svcmplt[_n_s64] svcmplt[_n_u64] |
svcmplt[_n_s32] svcmplt_wide[_s32] svcmplt[_n_u32] svcmplt_wide[_u32] |
svcmplt[_n_s16] svcmplt_wide[_s16] svcmplt[_n_u16] svcmplt_wide[_u16] |
svcmplt[_n_s8] svcmplt_wide[_s8] svcmplt[_n_u8] svcmplt_wide[_u8] |
svcmplt[_n_f64] |
svcmplt[_n_f32] |
svcmplt[_n_f16] |
|
compare for > | svcmpgt[_n_s64] svcmpgt[_s64] svcmpgt[_n_u64] svcmpgt[_u64] |
svcmpgt[_n_s32] svcmpgt[_s32] svcmpgt_wide[_s32] svcmpgt[_n_u32] svcmpgt[_u32] svcmpgt_wide[_u32] |
svcmpgt[_n_s16] svcmpgt[_s16] svcmpgt_wide[_s16] svcmpgt[_n_u16] svcmpgt[_u16] svcmpgt_wide[_u16] |
svcmpgt[_n_s8] svcmpgt_wide[_s8] svcmpgt[_n_u8] svcmpgt_wide[_u8] |
svcmpgt[_f64] svcmpgt[_n_f64] |
svcmpgt[_f32] svcmpgt[_n_f32] |
svcmpgt[_f16] svcmpgt[_n_f16] |
|
compare for ≤ | svcmple[_n_s64] svcmple[_n_u64] |
svcmple[_n_s32] svcmple_wide[_s32] svcmple[_n_u32] svcmple_wide[_u32] |
svcmple[_n_s16] svcmple_wide[_s16] svcmple[_n_u16] svcmple_wide[_u16] |
svcmple[_n_s8] svcmple_wide[_s8] svcmple[_n_u8] svcmple_wide[_u8] |
svcmple[_n_f64] |
svcmple[_n_f32] |
svcmple[_n_f16] |
|
compare for ≥ | svcmpge[_n_s64] svcmpge[_s64] svcmpge[_n_u64] svcmpge[_u64] |
svcmpge[_n_s32] svcmpge[_s32] svcmpge_wide[_s32] svcmpge[_n_u32] svcmpge[_u32] svcmpge_wide[_u32] |
svcmpge[_n_s16] svcmpge[_s16] svcmpge_wide[_s16] svcmpge[_n_u16] svcmpge[_u16] svcmpge_wide[_u16] |
svcmpge[_n_s8] svcmpge_wide[_s8] svcmpge[_n_u8] svcmpge_wide[_u8] |
svcmpge[_f64] svcmpge[_n_f64] |
svcmpge[_f32] svcmpge[_n_f32] |
svcmpge[_f16] svcmpge[_n_f16] |
|
compare for unordered | svcmpuo[_f64] |
svcmpuo[_f32] |
svcmpuo[_f16] |
|||||
compare absolute value for > | ||||||||
compare absolute value for ≥ |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
set predicate register | svptrue_pat_b64 |
svptrue_pat_b32 |
svptrue_pat_b16 |
svptrue_pat_b8 |
zero predicate register | svpfalse[_b] | |||
zip | svzip1_b64 svzip2_b64 |
svzip1_b32 svzip2_b32 |
svzip1_b16 svzip2_b16 |
|
unzip | svuzp1_b64 svuzp2_b64 |
svuzp1_b32 svuzp2_b32 |
svuzp1_b16 svuzp2_b16 |
|
transpose | svtrn1_b64 svtrn2_b64 |
svtrn1_b32 svtrn2_b32 |
svtrn1_b16 svtrn2_b16 |
|
reverse | svrev_b64 |
svrev_b32 |
svrev_b16 |
svrev_b8 |
unpack | svunpkhi[_b] svunpklo[_b] | |||
extract predicate from predicate-as-counter |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
test | svptest_any svptest_first svptest_last | |||
and | ||||
or | ||||
xor | ||||
and not | ||||
or not | ||||
not and | svnand[_b]_z | |||
not or | ||||
select |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
while < (signed) | svwhilelt_b64[_s32] svwhilelt_b64[_s64] |
svwhilelt_b32[_s32] svwhilelt_b32[_s64] |
svwhilelt_b16[_s32] svwhilelt_b16[_s64] |
svwhilelt_b8[_s32] svwhilelt_b8[_s64] |
while > (signed) | svwhilegt_b64[_s32] svwhilegt_b64[_s64] |
svwhilegt_b32[_s32] svwhilegt_b32[_s64] |
svwhilegt_b16[_s32] svwhilegt_b16[_s64] |
svwhilegt_b8[_s32] svwhilegt_b8[_s64] |
while ≤ (signed) | svwhilele_b64[_s32] svwhilele_b64[_s64] |
svwhilele_b32[_s32] svwhilele_b32[_s64] |
svwhilele_b16[_s32] svwhilele_b16[_s64] |
svwhilele_b8[_s32] svwhilele_b8[_s64] |
while ≥ (signed) | svwhilege_b64[_s32] svwhilege_b64[_s64] |
svwhilege_b32[_s32] svwhilege_b32[_s64] |
svwhilege_b16[_s32] svwhilege_b16[_s64] |
svwhilege_b8[_s32] svwhilege_b8[_s64] |
while < (unsigned) | svwhilelt_b64[_u32] svwhilelt_b64[_u64] |
svwhilelt_b32[_u32] svwhilelt_b32[_u64] |
svwhilelt_b16[_u32] svwhilelt_b16[_u64] |
svwhilelt_b8[_u32] svwhilelt_b8[_u64] |
while > (unsigned) | svwhilegt_b64[_u32] svwhilegt_b64[_u64] |
svwhilegt_b32[_u32] svwhilegt_b32[_u64] |
svwhilegt_b16[_u32] svwhilegt_b16[_u64] |
svwhilegt_b8[_u32] svwhilegt_b8[_u64] |
while ≤ (unsigned) | svwhilele_b64[_u32] svwhilele_b64[_u64] |
svwhilele_b32[_u32] svwhilele_b32[_u64] |
svwhilele_b16[_u32] svwhilele_b16[_u64] |
svwhilele_b8[_u32] svwhilele_b8[_u64] |
while ≥ (unsigned) | svwhilege_b64[_u32] svwhilege_b64[_u64] |
svwhilege_b32[_u32] svwhilege_b32[_u64] |
svwhilege_b16[_u32] svwhilege_b16[_u64] |
svwhilege_b8[_u32] svwhilege_b8[_u64] |
while no read-after-write conflict | svwhilerw[_{s,u,f}64] |
svwhilerw[_{s,u,f}32] |
svwhilerw[_{s,u,f,bf}16] |
svwhilerw[_{s,u}8] |
while no write-after-read/write conflict | svwhilewr[_{s,u,f}64] |
svwhilewr[_{s,u,f}32] |
svwhilewr[_{s,u,f,bf}16] |
svwhilewr[_{s,u}8] |
break after | svbrka[_b]_m svbrka[_b]_z | |||
break after (propagating) | svbrkpa[_b]_z | |||
break before | svbrkb[_b]_m svbrkb[_b]_z | |||
break before (propagating) | svbrkpb[_b]_z | |||
propagate break |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
find first active element | svpfirst[_b] | |||
find next active element | svpnext_b64 |
svpnext_b32 |
svpnext_b16 |
svpnext_b8 |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
and with indexed bit | ||||
count predicate | svcntp_b64 |
svcntp_b32 |
svcntp_b16 |
svcntp_b8 |
increment by predicate count | svqincp[_n_s32]_b64 svqincp[_n_s64]_b64 svqincp[_n_u32]_b64 svqincp[_n_u64]_b64 |
svqincp[_n_s32]_b32 svqincp[_n_s64]_b32 svqincp[_n_u32]_b32 svqincp[_n_u64]_b32 |
svqincp[_n_s32]_b16 svqincp[_n_s64]_b16 svqincp[_n_u32]_b16 svqincp[_n_u64]_b16 |
svqincp[_n_s32]_b8 svqincp[_n_s64]_b8 svqincp[_n_u32]_b8 svqincp[_n_u64]_b8 |
decrement by predicate count | svqdecp[_n_s32]_b64 svqdecp[_n_s64]_b64 svqdecp[_n_u32]_b64 svqdecp[_n_u64]_b64 |
svqdecp[_n_s32]_b32 svqdecp[_n_s64]_b32 svqdecp[_n_u32]_b32 svqdecp[_n_u64]_b32 |
svqdecp[_n_s32]_b16 svqdecp[_n_s64]_b16 svqdecp[_n_u32]_b16 svqdecp[_n_u64]_b16 |
svqdecp[_n_s32]_b8 svqdecp[_n_s64]_b8 svqdecp[_n_u32]_b8 svqdecp[_n_u64]_b8 |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
count predicate constraint | ||||
increment by predicate constraint count | svqincd_pat[_s64] svqincd_pat[_u64] svqincd_pat[_n_s32] svqincd_pat[_n_s64] svqincd_pat[_n_u32] svqincd_pat[_n_u64] |
svqincw_pat[_s32] svqincw_pat[_u32] svqincw_pat[_n_s32] svqincw_pat[_n_s64] svqincw_pat[_n_u32] svqincw_pat[_n_u64] |
svqinch_pat[_s16] svqinch_pat[_u16] svqinch_pat[_n_s32] svqinch_pat[_n_s64] svqinch_pat[_n_u32] svqinch_pat[_n_u64] |
svqincb_pat[_n_s32] svqincb_pat[_n_s64] svqincb_pat[_n_u32] svqincb_pat[_n_u64] |
decrement vector by predicate constraint count | svqdecd_pat[_s64] svqdecd_pat[_u64] svqdecd_pat[_n_s32] svqdecd_pat[_n_s64] svqdecd_pat[_n_u32] svqdecd_pat[_n_u64] |
svqdecw_pat[_s32] svqdecw_pat[_u32] svqdecw_pat[_n_s32] svqdecw_pat[_n_s64] svqdecw_pat[_n_u32] svqdecw_pat[_n_u64] |
svqdech_pat[_s16] svqdech_pat[_u16] svqdech_pat[_n_s32] svqdech_pat[_n_s64] svqdech_pat[_n_u32] svqdech_pat[_n_u64] |
svqdecb_pat[_n_s32] svqdecb_pat[_n_s64] svqdecb_pat[_n_u32] svqdecb_pat[_n_u64] |
128-bit | 64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|---|
load (unpredicated) | |||||
load (predicated) | svld1ub_vnum_{s,u}64 svld1ub_{s,u}64 svld1sb_vnum_{s,u}64 svld1sb_{s,u}64 svld1uh_vnum_{s,u}64 svld1sh_vnum_{s,u}64 svld1uw_vnum_{s,u}64 svld1sw_vnum_{s,u}64 svld1_vnum[_{s,u,f}64] svld1[_{s,u,f}64] |
svld1ub_vnum_{s,u}32 svld1ub_{s,u}32 svld1sb_vnum_{s,u}32 svld1sb_{s,u}32 svld1uh_vnum_{s,u}32 svld1sh_vnum_{s,u}32 svld1_vnum[_{s,u,f}32] svld1[_{s,u,f}32] |
svld1ub_vnum_{s,u}16 svld1ub_{s,u}16 svld1sb_vnum_{s,u}16 svld1sb_{s,u}16 svld1_vnum[_{s,u,f,bf}16] svld1[_{s,u,f,bf}16] |
svld1_vnum[_{s,u}8] svld1[_{s,u}8] |
|
load (predicated by counter) | |||||
load and deinterleave | svld2_vnum[_{s,u,f}64] svld3_vnum[_{s,u,f}64] svld4_vnum[_{s,u,f}64] |
svld2_vnum[_{s,u,f}32] svld3_vnum[_{s,u,f}32] svld4_vnum[_{s,u,f}32] |
svld2_vnum[_{s,u,f,bf}16] svld2[_{s,u,f,bf}16] svld3_vnum[_{s,u,f,bf}16] svld3[_{s,u,f,bf}16] svld4_vnum[_{s,u,f,bf}16] |
svld3_vnum[_{s,u}8] svld4_vnum[_{s,u}8] |
|
load and broadcast | |||||
load and replicate 128-bit segment | |||||
load and replicate 256-bit segment |
register lane size | ||||
---|---|---|---|---|
128-bit | 64-bit | 32-bit | ||
memory element size | 128-bit | |||
64-bit | svld1_gather[_u64base]_offset_{s,u,f}64 svld1_gather_[{s,u}64]index[_{s,u,f}64] svld1_gather_[{s,u}64]offset[_{s,u,f}64] |
|||
32-bit | svld1uw_gather[_u64base]_offset_{s,u}64 svld1uw_gather_[{s,u}64]index_{s,u}64 svld1uw_gather_[{s,u}64]offset_{s,u}64 svld1sw_gather[_u64base]_offset_{s,u}64 svld1sw_gather_[{s,u}64]index_{s,u}64 svld1sw_gather_[{s,u}64]offset_{s,u}64 |
svld1_gather[_u32base]_offset_{s,u,f}32 svld1_gather_[{s,u}32]index[_{s,u,f}32] svld1_gather_[{s,u}32]offset[_{s,u,f}32] |
||
16-bit | svld1uh_gather[_u64base]_offset_{s,u}64 svld1uh_gather_[{s,u}64]index_{s,u}64 svld1uh_gather_[{s,u}64]offset_{s,u}64 svld1sh_gather[_u64base]_offset_{s,u}64 svld1sh_gather_[{s,u}64]index_{s,u}64 svld1sh_gather_[{s,u}64]offset_{s,u}64 |
svld1uh_gather[_u32base]_offset_{s,u}32 svld1uh_gather_[{s,u}32]index_{s,u}32 svld1uh_gather_[{s,u}32]offset_{s,u}32 svld1sh_gather[_u32base]_offset_{s,u}32 svld1sh_gather_[{s,u}32]index_{s,u}32 svld1sh_gather_[{s,u}32]offset_{s,u}32 |
||
8-bit | svld1ub_gather[_u64base]_offset_{s,u}64 svld1ub_gather_[{s,u}64]offset_{s,u}64 svld1sb_gather[_u64base]_offset_{s,u}64 svld1sb_gather_[{s,u}64]offset_{s,u}64 |
svld1ub_gather[_u32base]_offset_{s,u}32 svld1ub_gather_[{s,u}32]offset_{s,u}32 svld1sb_gather[_u32base]_offset_{s,u}32 svld1sb_gather_[{s,u}32]offset_{s,u}32 |
128-bit | 64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|---|
store (unpredicated) | |||||
store (predicated) | svst1b_vnum[_{s,u}64] svst1b[_{s,u}64] svst1h_vnum[_{s,u}64] svst1h[_{s,u}64] svst1w_vnum[_{s,u}64] svst1w[_{s,u}64] svst1_vnum[_{s,u,f}64] svst1[_{s,u,f}64] |
svst1b_vnum[_{s,u}32] svst1b[_{s,u}32] svst1h_vnum[_{s,u}32] svst1h[_{s,u}32] svst1_vnum[_{s,u,f}32] svst1[_{s,u,f}32] |
svst1b_vnum[_{s,u}16] svst1b[_{s,u}16] svst1_vnum[_{s,u,f,bf}16] svst1[_{s,u,f,bf}16] |
svst1_vnum[_{s,u}8] svst1[_{s,u}8] |
|
store (predicated by counter) | |||||
store and interleave | svst2_vnum[_{s,u,f}64] svst3_vnum[_{s,u,f}64] svst4_vnum[_{s,u,f}64] |
svst2_vnum[_{s,u,f}32] svst3_vnum[_{s,u,f}32] svst4_vnum[_{s,u,f}32] |
svst2_vnum[_{s,u,f,bf}16] svst2[_{s,u,f,bf}16] svst3_vnum[_{s,u,f,bf}16] svst3[_{s,u,f,bf}16] svst4_vnum[_{s,u,f,bf}16] svst4[_{s,u,f,bf}16] |
svst2_vnum[_{s,u}8] svst2[_{s,u}8] svst4_vnum[_{s,u}8] |
register lane size | ||||
---|---|---|---|---|
128-bit | 64-bit | 32-bit | ||
memory element size | 128-bit | |||
64-bit | svst1_scatter[_u64base]_offset[_{s,u,f}64] svst1_scatter_[{s,u}64]index[_{s,u,f}64] svst1_scatter_[{s,u}64]offset[_{s,u,f}64] |
|||
32-bit | svst1w_scatter[_u64base]_offset[_{s,u}64] svst1w_scatter_[{s,u}64]index[_{s,u}64] svst1w_scatter_[{s,u}64]offset[_{s,u}64] |
svst1_scatter[_u32base]_offset[_{s,u,f}32] svst1_scatter_[{s,u}32]index[_{s,u,f}32] svst1_scatter_[{s,u}32]offset[_{s,u,f}32] |
||
16-bit | svst1h_scatter[_u64base]_offset[_{s,u}64] svst1h_scatter_[{s,u}64]index[_{s,u}64] svst1h_scatter_[{s,u}64]offset[_{s,u}64] |
svst1h_scatter[_u32base]_offset[_{s,u}32] svst1h_scatter_[{s,u}32]index[_{s,u}32] svst1h_scatter_[{s,u}32]offset[_{s,u}32] |
||
8-bit | svst1b_scatter[_u64base]_offset[_{s,u}64] svst1b_scatter_[{s,u}64]offset[_{s,u}64] |
svst1b_scatter[_u32base]_offset[_{s,u}32] svst1b_scatter_[{s,u}32]offset[_{s,u}32] |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
load first-fault | svldff1ub_{s,u}64 svldff1sb_{s,u}64 svldff1[_{s,u,f}64] |
svldff1ub_{s,u}32 svldff1sb_{s,u}32 svldff1[_{s,u,f}32] |
svldff1ub_{s,u}16 svldff1sb_{s,u}16 svldff1[_{s,u,f,bf}16] |
svldff1[_{s,u}8] |
load non-fault | svldnf1ub_vnum_{s,u}64 svldnf1sb_vnum_{s,u}64 svldnf1uh_vnum_{s,u}64 svldnf1sh_vnum_{s,u}64 svldnf1uw_vnum_{s,u}64 svldnf1sw_vnum_{s,u}64 svldnf1_vnum[_{s,u,f}64] |
svldnf1ub_vnum_{s,u}32 svldnf1sb_vnum_{s,u}32 svldnf1uh_vnum_{s,u}32 svldnf1sh_vnum_{s,u}32 svldnf1_vnum[_{s,u,f}32] |
svldnf1ub_vnum_{s,u}16 svldnf1sb_vnum_{s,u}16 svldnf1_vnum[_{s,u,f,bf}16] |
svldnf1_vnum[_{s,u}8] |
gather first-fault | svldff1ub_gather[_u64base]_offset_{s,u}64 svldff1sb_gather[_u64base]_offset_{s,u}64 svldff1uh_gather[_u64base]_offset_{s,u}64 svldff1sh_gather[_u64base]_offset_{s,u}64 svldff1uw_gather[_u64base]_offset_{s,u}64 svldff1sw_gather[_u64base]_offset_{s,u}64 svldff1_gather[_u64base]_offset_{s,u,f}64 svldff1ub_gather_[{s,u}64]offset_{s,u}64 svldff1sb_gather_[{s,u}64]offset_{s,u}64 svldff1uh_gather_[{s,u}64]index_{s,u}64 svldff1uh_gather_[{s,u}64]offset_{s,u}64 svldff1sh_gather_[{s,u}64]index_{s,u}64 svldff1sh_gather_[{s,u}64]offset_{s,u}64 svldff1uw_gather_[{s,u}64]index_{s,u}64 svldff1uw_gather_[{s,u}64]offset_{s,u}64 svldff1sw_gather_[{s,u}64]index_{s,u}64 svldff1sw_gather_[{s,u}64]offset_{s,u}64 svldff1_gather_[{s,u}64]index[_{s,u,f}64] svldff1_gather_[{s,u}64]offset[_{s,u,f}64] |
svldff1ub_gather[_u32base]_offset_{s,u}32 svldff1sb_gather[_u32base]_offset_{s,u}32 svldff1uh_gather[_u32base]_offset_{s,u}32 svldff1sh_gather[_u32base]_offset_{s,u}32 svldff1_gather[_u32base]_offset_{s,u,f}32 svldff1ub_gather_[{s,u}32]offset_{s,u}32 svldff1sb_gather_[{s,u}32]offset_{s,u}32 svldff1uh_gather_[{s,u}32]index_{s,u}32 svldff1uh_gather_[{s,u}32]offset_{s,u}32 svldff1sh_gather_[{s,u}32]index_{s,u}32 svldff1sh_gather_[{s,u}32]offset_{s,u}32 svldff1_gather_[{s,u}32]index[_{s,u,f}32] svldff1_gather_[{s,u}32]offset[_{s,u,f}32] |
||
read first-fault register | svrdffr svrdffr_z | |||
write first-fault register | svwrffr |
64-bit | 32-bit | 16-bit | 8-bit | |
---|---|---|---|---|
load non-temporal | svldnt1_vnum[_{s,u,f}64] svldnt1[_{s,u,f}64] |
svldnt1_vnum[_{s,u,f}32] svldnt1[_{s,u,f}32] |
svldnt1_vnum[_{s,u,f,bf}16] svldnt1[_{s,u,f,bf}16] |
svldnt1_vnum[_{s,u}8] svldnt1[_{s,u}8] |
store non-temporal | svstnt1_vnum[_{s,u,f}64] svstnt1[_{s,u,f}64] |
svstnt1_vnum[_{s,u,f}32] svstnt1[_{s,u,f}32] |
svstnt1_vnum[_{s,u,f,bf}16] svstnt1[_{s,u,f,bf}16] |
svstnt1_vnum[_{s,u}8] svstnt1[_{s,u}8] |
load non-temporal (predicated by counter) | ||||
store non-temporal (predicated by counter) | ||||
gather non-temporal | svldnt1ub_gather_[{s,u}64]offset_{s,u}64 svldnt1ub_gather[_u64base]_offset_{s,u}64 svldnt1sb_gather_[{s,u}64]offset_{s,u}64 svldnt1sb_gather[_u64base]_offset_{s,u}64 svldnt1uh_gather_[{s,u}64]offset_{s,u}64 svldnt1uh_gather[_u64base]_offset_{s,u}64 svldnt1sh_gather_[{s,u}64]offset_{s,u}64 svldnt1sh_gather[_u64base]_offset_{s,u}64 svldnt1uw_gather_[{s,u}64]offset_{s,u}64 svldnt1uw_gather[_u64base]_offset_{s,u}64 svldnt1sw_gather_[{s,u}64]offset_{s,u}64 svldnt1sw_gather[_u64base]_offset_{s,u}64 svldnt1_gather_[{s,u}64]offset[_{s,u,f}64] svldnt1_gather[_u64base]_offset_{s,u,f}64 |
svldnt1ub_gather_[u32]offset_{s,u}32 svldnt1ub_gather[_u32base]_offset_{s,u}32 svldnt1sb_gather_[u32]offset_{s,u}32 svldnt1sb_gather[_u32base]_offset_{s,u}32 svldnt1uh_gather_[u32]offset_{s,u}32 svldnt1uh_gather[_u32base]_offset_{s,u}32 svldnt1sh_gather_[u32]offset_{s,u}32 svldnt1sh_gather[_u32base]_offset_{s,u}32 svldnt1_gather_[u32]offset[_{s,u,f}32] svldnt1_gather[_u32base]_offset_{s,u,f}32 |
||
scatter non-temporal | svstnt1b_scatter_[{s,u}64]offset[_{s,u}64] svstnt1b_scatter[_u64base]_offset[_{s,u}64] svstnt1h_scatter_[{s,u}64]offset[_{s,u}64] svstnt1h_scatter[_u64base]_offset[_{s,u}64] svstnt1w_scatter_[{s,u}64]offset[_{s,u}64] svstnt1w_scatter[_u64base]_offset[_{s,u}64] svstnt1_scatter_[{s,u}64]offset[_{s,u,f}64] svstnt1_scatter[_u64base]_offset[_{s,u,f}64] |
svstnt1b_scatter_[u32]offset[_{s,u}32] svstnt1b_scatter[_u32base]_offset[_{s,u}32] svstnt1h_scatter_[u32]offset[_{s,u}32] svstnt1h_scatter[_u32base]_offset[_{s,u}32] svstnt1_scatter_[u32]offset[_{s,u,f}32] svstnt1_scatter[_u32base]_offset[_{s,u,f}32] |
||
prefetch (gather) | svprfb_gather_[{s,u}64]offset svprfb_gather[_u64base]_offset svprfh_gather_[{s,u}64]index svprfw_gather_[{s,u}64]index svprfd_gather_[{s,u}64]index |
svprfb_gather_[s32]offset svprfb_gather_[u32]offset svprfb_gather[_u32base]_offset svprfh_gather_[s32]index svprfh_gather_[u32]index svprfw_gather_[s32]index svprfw_gather_[u32]index svprfd_gather_[s32]index svprfd_gather_[u32]index |
||
prefetch (contiguous) |
svaese[_u8] | Perform a single round of AES encryption |
svaesd[_u8] | Perform a single round of AES decryption |
svaesmc[_u8] | Perform a single round of the AES "mix columns" transformation |
svaesimc[_u8] | Perform a single round of the AES "inverse mix columns" transformation |
svsm4e[_u32] | Perform four rounds of SM4 encryption |
svsm4ekey[_u32] | Derive four rounds of SM4 key values |
svrax1[_{s,u}64] | Rotate 64-bit values left by 1-bit, then xor |
Add multiple of vector length in bytes | |
Add multiple of predicate length in bytes | |
Get multiple of predicate length in bytes |
Compare and terminate loop |
Move operations that may only be used as prefixes to certain instructions |