summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2021-12-20 10:25:40 -0800
committerRichard Henderson <richard.henderson@linaro.org>2021-12-20 10:25:40 -0800
commitc7d773ae49688463b59ade6989f8d612fecb973d (patch)
treed987ce236bc938d81cb5bc247bd8ef655a70fbb7
parent212a33d3b0c65ae2583bb1d06cb140cd0890894c (diff)
parent7e322a7f23a60b0e181b55ef722fdf390ec4e463 (diff)
downloadqemu-c7d773ae49688463b59ade6989f8d612fecb973d.zip
Merge tag 'pull-riscv-to-apply-20211220-1' of github.com:alistair23/qemu into staging
First RISC-V PR for QEMU 7.0 - Add support for ratified 1.0 Vector extension - Drop support for draft 0.7.1 Vector extension - Support Zfhmin and Zfh extensions - Improve kernel loading for non-Linux platforms # gpg: Signature made Sun 19 Dec 2021 08:56:08 PM PST # gpg: using RSA key F6C4AC46D4934868D3B8CE8F21E10D29DF977054 # gpg: Good signature from "Alistair Francis <alistair@alistair23.me>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: F6C4 AC46 D493 4868 D3B8 CE8F 21E1 0D29 DF97 7054 * tag 'pull-riscv-to-apply-20211220-1' of github.com:alistair23/qemu: (88 commits) hw/riscv: Use load address rather than entry point for fw_dynamic next_addr target/riscv: Enable bitmanip Zb[abcs] instructions riscv: Set 5.4 as minimum kernel version for riscv32 target/riscv: rvv-1.0: Add ELEN checks for widening and narrowing instructions target/riscv: rvv-1.0: update opivv_vadc_check() comment target/riscv: rvv-1.0: rename vmandnot.mm and vmornot.mm to vmandn.mm and vmorn.mm target/riscv: rvv-1.0: add vector unit-stride mask load/store insns target/riscv: rvv-1.0: add evl parameter to vext_ldst_us() target/riscv: rvv-1.0: add vsetivli instruction target/riscv: rvv-1.0: rename r2_zimm to r2_zimm11 target/riscv: rvv-1.0: floating-point reciprocal estimate instruction target/riscv: rvv-1.0: floating-point reciprocal square-root estimate instruction target/riscv: gdb: support vector registers for rv64 & rv32 target/riscv: rvv-1.0: trigger illegal instruction exception if frm is not valid target/riscv: rvv-1.0: implement vstart CSR target/riscv: rvv-1.0: relax RV_VLEN_MAX to 1024-bits target/riscv: rvv-1.0: narrowing floating-point/integer type-convert target/riscv: add "set round to odd" rounding mode helper function target/riscv: rvv-1.0: widening floating-point/integer type-convert target/riscv: rvv-1.0: floating-point/integer type-convert instructions ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--hw/riscv/boot.c13
-rw-r--r--linux-user/riscv/target_syscall.h3
-rw-r--r--target/riscv/cpu.c28
-rw-r--r--target/riscv/cpu.h63
-rw-r--r--target/riscv/cpu_bits.h10
-rw-r--r--target/riscv/cpu_helper.c39
-rw-r--r--target/riscv/csr.c63
-rw-r--r--target/riscv/fpu_helper.c197
-rw-r--r--target/riscv/gdbstub.c184
-rw-r--r--target/riscv/helper.h464
-rw-r--r--target/riscv/insn32.decode332
-rw-r--r--target/riscv/insn_trans/trans_rvv.c.inc2429
-rw-r--r--target/riscv/insn_trans/trans_rvzfh.c.inc537
-rw-r--r--target/riscv/internals.h40
-rw-r--r--target/riscv/translate.c93
-rw-r--r--target/riscv/vector_helper.c3601
16 files changed, 4997 insertions, 3099 deletions
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 519fa455a1..f67264374e 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -151,12 +151,19 @@ target_ulong riscv_load_kernel(const char *kernel_filename,
target_ulong kernel_start_addr,
symbol_fn_t sym_cb)
{
- uint64_t kernel_entry;
+ uint64_t kernel_load_base, kernel_entry;
+ /*
+ * NB: Use low address not ELF entry point to ensure that the fw_dynamic
+ * behaviour when loading an ELF matches the fw_payload, fw_jump and BBL
+ * behaviour, as well as fw_dynamic with a raw binary, all of which jump to
+ * the (expected) load address load address. This allows kernels to have
+ * separate SBI and ELF entry points (used by FreeBSD, for example).
+ */
if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
- &kernel_entry, NULL, NULL, NULL, 0,
+ NULL, &kernel_load_base, NULL, NULL, 0,
EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
- return kernel_entry;
+ return kernel_load_base;
}
if (load_uimage_as(kernel_filename, &kernel_entry, NULL, NULL,
diff --git a/linux-user/riscv/target_syscall.h b/linux-user/riscv/target_syscall.h
index dc597c8972..9b13161324 100644
--- a/linux-user/riscv/target_syscall.h
+++ b/linux-user/riscv/target_syscall.h
@@ -45,10 +45,11 @@ struct target_pt_regs {
#ifdef TARGET_RISCV32
#define UNAME_MACHINE "riscv32"
+#define UNAME_MINIMUM_RELEASE "5.4.0"
#else
#define UNAME_MACHINE "riscv64"
-#endif
#define UNAME_MINIMUM_RELEASE "4.15.0"
+#endif
#define TARGET_MINSIGSTKSZ 2048
#define TARGET_MCL_CURRENT 1
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index f812998123..6ef3314bce 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -523,7 +523,7 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
ext |= RVH;
}
if (cpu->cfg.ext_v) {
- int vext_version = VEXT_VERSION_0_07_1;
+ int vext_version = VEXT_VERSION_1_00_0;
ext |= RVV;
if (!is_power_of_2(cpu->cfg.vlen)) {
error_setg(errp,
@@ -548,8 +548,8 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
return;
}
if (cpu->cfg.vext_spec) {
- if (!g_strcmp0(cpu->cfg.vext_spec, "v0.7.1")) {
- vext_version = VEXT_VERSION_0_07_1;
+ if (!g_strcmp0(cpu->cfg.vext_spec, "v1.0")) {
+ vext_version = VEXT_VERSION_1_00_0;
} else {
error_setg(errp,
"Unsupported vector spec version '%s'",
@@ -558,7 +558,7 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
}
} else {
qemu_log("vector version is not specified, "
- "use the default value v0.7.1\n");
+ "use the default value v1.0\n");
}
set_vext_version(env, vext_version);
}
@@ -626,25 +626,27 @@ static Property riscv_cpu_properties[] = {
DEFINE_PROP_BOOL("c", RISCVCPU, cfg.ext_c, true),
DEFINE_PROP_BOOL("s", RISCVCPU, cfg.ext_s, true),
DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true),
+ DEFINE_PROP_BOOL("v", RISCVCPU, cfg.ext_v, false),
DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true),
DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
+ DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false),
+ DEFINE_PROP_BOOL("Zfhmin", RISCVCPU, cfg.ext_zfhmin, false),
DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true),
DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
+ DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
+ DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
+ DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
/* These are experimental so mark with 'x-' */
- DEFINE_PROP_BOOL("x-zba", RISCVCPU, cfg.ext_zba, false),
- DEFINE_PROP_BOOL("x-zbb", RISCVCPU, cfg.ext_zbb, false),
- DEFINE_PROP_BOOL("x-zbc", RISCVCPU, cfg.ext_zbc, false),
- DEFINE_PROP_BOOL("x-zbs", RISCVCPU, cfg.ext_zbs, false),
+ DEFINE_PROP_BOOL("zba", RISCVCPU, cfg.ext_zba, true),
+ DEFINE_PROP_BOOL("zbb", RISCVCPU, cfg.ext_zbb, true),
+ DEFINE_PROP_BOOL("zbc", RISCVCPU, cfg.ext_zbc, true),
+ DEFINE_PROP_BOOL("zbs", RISCVCPU, cfg.ext_zbs, true),
DEFINE_PROP_BOOL("x-h", RISCVCPU, cfg.ext_h, false),
DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
- DEFINE_PROP_BOOL("x-v", RISCVCPU, cfg.ext_v, false),
- DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
- DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
- DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
/* ePMP 0.9.3 */
DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
@@ -673,6 +675,8 @@ static const char *riscv_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname)
if (strcmp(xmlname, "riscv-csr.xml") == 0) {
return cpu->dyn_csr_xml;
+ } else if (strcmp(xmlname, "riscv-vector.xml") == 0) {
+ return cpu->dyn_vreg_xml;
}
return NULL;
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 0760c0af93..dc10f27093 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -81,7 +81,7 @@ enum {
#define PRIV_VERSION_1_10_0 0x00011000
#define PRIV_VERSION_1_11_0 0x00011100
-#define VEXT_VERSION_0_07_1 0x00000701
+#define VEXT_VERSION_1_00_0 0x00010000
enum {
TRANSLATE_SUCCESS,
@@ -100,12 +100,14 @@ typedef struct CPURISCVState CPURISCVState;
#include "pmp.h"
#endif
-#define RV_VLEN_MAX 256
+#define RV_VLEN_MAX 1024
-FIELD(VTYPE, VLMUL, 0, 2)
-FIELD(VTYPE, VSEW, 2, 3)
-FIELD(VTYPE, VEDIV, 5, 2)
-FIELD(VTYPE, RESERVED, 7, sizeof(target_ulong) * 8 - 9)
+FIELD(VTYPE, VLMUL, 0, 3)
+FIELD(VTYPE, VSEW, 3, 3)
+FIELD(VTYPE, VTA, 6, 1)
+FIELD(VTYPE, VMA, 7, 1)
+FIELD(VTYPE, VEDIV, 8, 2)
+FIELD(VTYPE, RESERVED, 10, sizeof(target_ulong) * 8 - 11)
FIELD(VTYPE, VILL, sizeof(target_ulong) * 8 - 1, 1)
struct CPURISCVState {
@@ -289,6 +291,7 @@ struct RISCVCPU {
CPURISCVState env;
char *dyn_csr_xml;
+ char *dyn_vreg_xml;
/* Configuration Settings */
struct {
@@ -312,6 +315,8 @@ struct RISCVCPU {
bool ext_counters;
bool ext_ifencei;
bool ext_icsr;
+ bool ext_zfh;
+ bool ext_zfhmin;
char *priv_spec;
char *user_spec;
@@ -350,6 +355,7 @@ int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
bool riscv_cpu_fp_enabled(CPURISCVState *env);
+bool riscv_cpu_vector_enabled(CPURISCVState *env);
bool riscv_cpu_virt_enabled(CPURISCVState *env);
void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool enable);
bool riscv_cpu_two_stage_lookup(int mmu_idx);
@@ -393,23 +399,27 @@ void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
#define TB_FLAGS_PRIV_MMU_MASK 3
#define TB_FLAGS_PRIV_HYP_ACCESS_MASK (1 << 2)
#define TB_FLAGS_MSTATUS_FS MSTATUS_FS
+#define TB_FLAGS_MSTATUS_VS MSTATUS_VS
typedef CPURISCVState CPUArchState;
typedef RISCVCPU ArchCPU;
#include "exec/cpu-all.h"
FIELD(TB_FLAGS, MEM_IDX, 0, 3)
-FIELD(TB_FLAGS, VL_EQ_VLMAX, 3, 1)
-FIELD(TB_FLAGS, LMUL, 4, 2)
+FIELD(TB_FLAGS, LMUL, 3, 3)
FIELD(TB_FLAGS, SEW, 6, 3)
-FIELD(TB_FLAGS, VILL, 9, 1)
+/* Skip MSTATUS_VS (0x600) bits */
+FIELD(TB_FLAGS, VL_EQ_VLMAX, 11, 1)
+FIELD(TB_FLAGS, VILL, 12, 1)
+/* Skip MSTATUS_FS (0x6000) bits */
/* Is a Hypervisor instruction load/store allowed? */
-FIELD(TB_FLAGS, HLSX, 10, 1)
-FIELD(TB_FLAGS, MSTATUS_HS_FS, 11, 2)
+FIELD(TB_FLAGS, HLSX, 15, 1)
+FIELD(TB_FLAGS, MSTATUS_HS_FS, 16, 2)
+FIELD(TB_FLAGS, MSTATUS_HS_VS, 18, 2)
/* The combination of MXL/SXL/UXL that applies to the current cpu mode. */
-FIELD(TB_FLAGS, XL, 13, 2)
+FIELD(TB_FLAGS, XL, 20, 2)
/* If PointerMasking should be applied */
-FIELD(TB_FLAGS, PM_ENABLED, 15, 1)
+FIELD(TB_FLAGS, PM_ENABLED, 22, 1)
#ifdef TARGET_RISCV32
#define riscv_cpu_mxl(env) ((void)(env), MXL_RV32)
@@ -421,18 +431,27 @@ static inline RISCVMXL riscv_cpu_mxl(CPURISCVState *env)
#endif
/*
- * A simplification for VLMAX
- * = (1 << LMUL) * VLEN / (8 * (1 << SEW))
- * = (VLEN << LMUL) / (8 << SEW)
- * = (VLEN << LMUL) >> (SEW + 3)
- * = VLEN >> (SEW + 3 - LMUL)
+ * Encode LMUL to lmul as follows:
+ * LMUL vlmul lmul
+ * 1 000 0
+ * 2 001 1
+ * 4 010 2
+ * 8 011 3
+ * - 100 -
+ * 1/8 101 -3
+ * 1/4 110 -2
+ * 1/2 111 -1
+ *
+ * then, we can calculate VLMAX = vlen >> (vsew + 3 - lmul)
+ * e.g. vlen = 256 bits, SEW = 16, LMUL = 1/8
+ * => VLMAX = vlen >> (1 + 3 - (-3))
+ * = 256 >> 7
+ * = 2
*/
static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype)
{
- uint8_t sew, lmul;
-
- sew = FIELD_EX64(vtype, VTYPE, VSEW);
- lmul = FIELD_EX64(vtype, VTYPE, VLMUL);
+ uint8_t sew = FIELD_EX64(vtype, VTYPE, VSEW);
+ int8_t lmul = sextract32(FIELD_EX64(vtype, VTYPE, VLMUL), 0, 3);
return cpu->cfg.vlen >> (sew + 3 - lmul);
}
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 9913fa9f77..1e31f4d35f 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -60,8 +60,16 @@
#define CSR_VSTART 0x008
#define CSR_VXSAT 0x009
#define CSR_VXRM 0x00a
+#define CSR_VCSR 0x00f
#define CSR_VL 0xc20
#define CSR_VTYPE 0xc21
+#define CSR_VLENB 0xc22
+
+/* VCSR fields */
+#define VCSR_VXSAT_SHIFT 0
+#define VCSR_VXSAT (0x1 << VCSR_VXSAT_SHIFT)
+#define VCSR_VXRM_SHIFT 1
+#define VCSR_VXRM (0x3 << VCSR_VXRM_SHIFT)
/* User Timers and Counters */
#define CSR_CYCLE 0xc00
@@ -375,6 +383,7 @@
#define MSTATUS_UBE 0x00000040
#define MSTATUS_MPIE 0x00000080
#define MSTATUS_SPP 0x00000100
+#define MSTATUS_VS 0x00000600
#define MSTATUS_MPP 0x00001800
#define MSTATUS_FS 0x00006000
#define MSTATUS_XS 0x00018000
@@ -408,6 +417,7 @@ typedef enum {
#define SSTATUS_UPIE 0x00000010
#define SSTATUS_SPIE 0x00000020
#define SSTATUS_SPP 0x00000100
+#define SSTATUS_VS 0x00000600
#define SSTATUS_FS 0x00006000
#define SSTATUS_XS 0x00018000
#define SSTATUS_SUM 0x00040000 /* since: priv-1.10 */
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 9eeed38c7e..10f3baba53 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -75,12 +75,22 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
*cs_base = 0;
if (riscv_has_ext(env, RVV)) {
+ /*
+ * If env->vl equals to VLMAX, we can use generic vector operation
+ * expanders (GVEC) to accerlate the vector operations.
+ * However, as LMUL could be a fractional number. The maximum
+ * vector size can be operated might be less than 8 bytes,
+ * which is not supported by GVEC. So we set vl_eq_vlmax flag to true
+ * only when maxsz >= 8 bytes.
+ */
uint32_t vlmax = vext_get_vlmax(env_archcpu(env), env->vtype);
- bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl);
+ uint32_t sew = FIELD_EX64(env->vtype, VTYPE, VSEW);
+ uint32_t maxsz = vlmax << sew;
+ bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl) &&
+ (maxsz >= 8);
flags = FIELD_DP32(flags, TB_FLAGS, VILL,
FIELD_EX64(env->vtype, VTYPE, VILL));
- flags = FIELD_DP32(flags, TB_FLAGS, SEW,
- FIELD_EX64(env->vtype, VTYPE, VSEW));
+ flags = FIELD_DP32(flags, TB_FLAGS, SEW, sew);
flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
FIELD_EX64(env->vtype, VTYPE, VLMUL));
flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
@@ -90,12 +100,17 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
#ifdef CONFIG_USER_ONLY
flags |= TB_FLAGS_MSTATUS_FS;
+ flags |= TB_FLAGS_MSTATUS_VS;
#else
flags |= cpu_mmu_index(env, 0);
if (riscv_cpu_fp_enabled(env)) {
flags |= env->mstatus & MSTATUS_FS;
}
+ if (riscv_cpu_vector_enabled(env)) {
+ flags |= env->mstatus & MSTATUS_VS;
+ }
+
if (riscv_has_ext(env, RVH)) {
if (env->priv == PRV_M ||
(env->priv == PRV_S && !riscv_cpu_virt_enabled(env)) ||
@@ -106,6 +121,9 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
flags = FIELD_DP32(flags, TB_FLAGS, MSTATUS_HS_FS,
get_field(env->mstatus_hs, MSTATUS_FS));
+
+ flags = FIELD_DP32(flags, TB_FLAGS, MSTATUS_HS_VS,
+ get_field(env->mstatus_hs, MSTATUS_VS));
}
if (riscv_has_ext(env, RVJ)) {
int priv = flags & TB_FLAGS_PRIV_MMU_MASK;
@@ -189,11 +207,24 @@ bool riscv_cpu_fp_enabled(CPURISCVState *env)
return false;
}
+/* Return true is vector support is currently enabled */
+bool riscv_cpu_vector_enabled(CPURISCVState *env)
+{
+ if (env->mstatus & MSTATUS_VS) {
+ if (riscv_cpu_virt_enabled(env) && !(env->mstatus_hs & MSTATUS_VS)) {
+ return false;
+ }
+ return true;
+ }
+
+ return false;
+}
+
void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env)
{
uint64_t mstatus_mask = MSTATUS_MXR | MSTATUS_SUM | MSTATUS_FS |
MSTATUS_SPP | MSTATUS_SPIE | MSTATUS_SIE |
- MSTATUS64_UXL;
+ MSTATUS64_UXL | MSTATUS_VS;
bool current_virt = riscv_cpu_virt_enabled(env);
g_assert(riscv_has_ext(env, RVH));
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 9f41954894..146447eac5 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -38,10 +38,6 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops)
static RISCVException fs(CPURISCVState *env, int csrno)
{
#if !defined(CONFIG_USER_ONLY)
- /* loose check condition for fcsr in vector extension */
- if ((csrno == CSR_FCSR) && (env->misa_ext & RVV)) {
- return RISCV_EXCP_NONE;
- }
if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
return RISCV_EXCP_ILLEGAL_INST;
}
@@ -52,6 +48,11 @@ static RISCVException fs(CPURISCVState *env, int csrno)
static RISCVException vs(CPURISCVState *env, int csrno)
{
if (env->misa_ext & RVV) {
+#if !defined(CONFIG_USER_ONLY)
+ if (!env->debugger && !riscv_cpu_vector_enabled(env)) {
+ return RISCV_EXCP_ILLEGAL_INST;
+ }
+#endif
return RISCV_EXCP_NONE;
}
return RISCV_EXCP_ILLEGAL_INST;
@@ -261,10 +262,6 @@ static RISCVException read_fcsr(CPURISCVState *env, int csrno,
{
*val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
| (env->frm << FSR_RD_SHIFT);
- if (vs(env, csrno) >= 0) {
- *val |= (env->vxrm << FSR_VXRM_SHIFT)
- | (env->vxsat << FSR_VXSAT_SHIFT);
- }
return RISCV_EXCP_NONE;
}
@@ -275,10 +272,6 @@ static RISCVException write_fcsr(CPURISCVState *env, int csrno,
env->mstatus |= MSTATUS_FS;
#endif
env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
- if (vs(env, csrno) >= 0) {
- env->vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT;
- env->vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
- }
riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
return RISCV_EXCP_NONE;
}
@@ -297,6 +290,12 @@ static RISCVException read_vl(CPURISCVState *env, int csrno,
return RISCV_EXCP_NONE;
}
+static int read_vlenb(CPURISCVState *env, int csrno, target_ulong *val)
+{
+ *val = env_archcpu(env)->cfg.vlen >> 3;
+ return RISCV_EXCP_NONE;
+}
+
static RISCVException read_vxrm(CPURISCVState *env, int csrno,
target_ulong *val)
{
@@ -307,6 +306,9 @@ static RISCVException read_vxrm(CPURISCVState *env, int csrno,
static RISCVException write_vxrm(CPURISCVState *env, int csrno,
target_ulong val)
{
+#if !defined(CONFIG_USER_ONLY)
+ env->mstatus |= MSTATUS_VS;
+#endif
env->vxrm = val;
return RISCV_EXCP_NONE;
}
@@ -321,6 +323,9 @@ static RISCVException read_vxsat(CPURISCVState *env, int csrno,
static RISCVException write_vxsat(CPURISCVState *env, int csrno,
target_ulong val)
{
+#if !defined(CONFIG_USER_ONLY)
+ env->mstatus |= MSTATUS_VS;
+#endif
env->vxsat = val;
return RISCV_EXCP_NONE;
}
@@ -335,7 +340,30 @@ static RISCVException read_vstart(CPURISCVState *env, int csrno,
static RISCVException write_vstart(CPURISCVState *env, int csrno,
target_ulong val)
{
- env->vstart = val;
+#if !defined(CONFIG_USER_ONLY)
+ env->mstatus |= MSTATUS_VS;
+#endif
+ /*
+ * The vstart CSR is defined to have only enough writable bits
+ * to hold the largest element index, i.e. lg2(VLEN) bits.
+ */
+ env->vstart = val & ~(~0ULL << ctzl(env_archcpu(env)->cfg.vlen));
+ return RISCV_EXCP_NONE;
+}
+
+static int read_vcsr(CPURISCVState *env, int csrno, target_ulong *val)
+{
+ *val = (env->vxrm << VCSR_VXRM_SHIFT) | (env->vxsat << VCSR_VXSAT_SHIFT);
+ return RISCV_EXCP_NONE;
+}
+
+static int write_vcsr(CPURISCVState *env, int csrno, target_ulong val)
+{
+#if !defined(CONFIG_USER_ONLY)
+ env->mstatus |= MSTATUS_VS;
+#endif
+ env->vxrm = (val & VCSR_VXRM) >> VCSR_VXRM_SHIFT;
+ env->vxsat = (val & VCSR_VXSAT) >> VCSR_VXSAT_SHIFT;
return RISCV_EXCP_NONE;
}
@@ -453,7 +481,7 @@ static const target_ulong vs_delegable_excps = DELEGABLE_EXCPS &
(1ULL << (RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT)));
static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE |
SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS |
- SSTATUS_SUM | SSTATUS_MXR;
+ SSTATUS_SUM | SSTATUS_MXR | SSTATUS_VS;
static const target_ulong sip_writable_mask = SIP_SSIP | MIP_USIP | MIP_UEIP;
static const target_ulong hip_writable_mask = MIP_VSSIP;
static const target_ulong hvip_writable_mask = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP;
@@ -492,6 +520,7 @@ static RISCVException read_mhartid(CPURISCVState *env, int csrno,
static uint64_t add_status_sd(RISCVMXL xl, uint64_t status)
{
if ((status & MSTATUS_FS) == MSTATUS_FS ||
+ (status & MSTATUS_VS) == MSTATUS_VS ||
(status & MSTATUS_XS) == MSTATUS_XS) {
switch (xl) {
case MXL_RV32:
@@ -535,7 +564,7 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno,
mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM |
MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
- MSTATUS_TW;
+ MSTATUS_TW | MSTATUS_VS;
if (riscv_cpu_mxl(env) != MXL_RV32) {
/*
@@ -632,7 +661,7 @@ static RISCVException write_misa(CPURISCVState *env, int csrno,
val &= env->misa_ext_mask;
/* Mask extensions that are not supported by QEMU */
- val &= (RVI | RVE | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
+ val &= (RVI | RVE | RVM | RVA | RVF | RVD | RVC | RVS | RVU | RVV);
/* 'D' depends on 'F', so clear 'D' if 'F' is not present */
if ((val & RVD) && !(val & RVF)) {
@@ -1818,8 +1847,10 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
[CSR_VSTART] = { "vstart", vs, read_vstart, write_vstart },
[CSR_VXSAT] = { "vxsat", vs, read_vxsat, write_vxsat },
[CSR_VXRM] = { "vxrm", vs, read_vxrm, write_vxrm },
+ [CSR_VCSR] = { "vcsr", vs, read_vcsr, write_vcsr },
[CSR_VL] = { "vl", vs, read_vl },
[CSR_VTYPE] = { "vtype", vs, read_vtype },
+ [CSR_VLENB] = { "vlenb", vs, read_vlenb },
/* User Timers and Counters */
[CSR_CYCLE] = { "cycle", ctr, read_instret },
[CSR_INSTRET] = { "instret", ctr, read_instret },
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
index d62f470900..4a5982d594 100644
--- a/target/riscv/fpu_helper.c
+++ b/target/riscv/fpu_helper.c
@@ -55,23 +55,23 @@ void helper_set_rounding_mode(CPURISCVState *env, uint32_t rm)
{
int softrm;
- if (rm == 7) {
+ if (rm == RISCV_FRM_DYN) {
rm = env->frm;
}
switch (rm) {
- case 0:
+ case RISCV_FRM_RNE:
softrm = float_round_nearest_even;
break;
- case 1:
+ case RISCV_FRM_RTZ:
softrm = float_round_to_zero;
break;
- case 2:
+ case RISCV_FRM_RDN:
softrm = float_round_down;
break;
- case 3:
+ case RISCV_FRM_RUP:
softrm = float_round_up;
break;
- case 4:
+ case RISCV_FRM_RMM:
softrm = float_round_ties_away;
break;
default:
@@ -81,6 +81,20 @@ void helper_set_rounding_mode(CPURISCVState *env, uint32_t rm)
set_float_rounding_mode(softrm, &env->fp_status);
}
+void helper_set_rod_rounding_mode(CPURISCVState *env)
+{
+ set_float_rounding_mode(float_round_to_odd, &env->fp_status);
+}
+
+static uint64_t do_fmadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2,
+ uint64_t rs3, int flags)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ float16 frs2 = check_nanbox_h(rs2);
+ float16 frs3 = check_nanbox_h(rs3);
+ return nanbox_h(float16_muladd(frs1, frs2, frs3, flags, &env->fp_status));
+}
+
static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2,
uint64_t rs3, int flags)
{
@@ -102,6 +116,12 @@ uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
return float64_muladd(frs1, frs2, frs3, 0, &env->fp_status);
}
+uint64_t helper_fmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
+ uint64_t frs3)
+{
+ return do_fmadd_h(env, frs1, frs2, frs3, 0);
+}
+
uint64_t helper_fmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
uint64_t frs3)
{
@@ -115,6 +135,12 @@ uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
&env->fp_status);
}
+uint64_t helper_fmsub_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
+ uint64_t frs3)
+{
+ return do_fmadd_h(env, frs1, frs2, frs3, float_muladd_negate_c);
+}
+
uint64_t helper_fnmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
uint64_t frs3)
{
@@ -128,6 +154,12 @@ uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
&env->fp_status);
}
+uint64_t helper_fnmsub_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
+ uint64_t frs3)
+{
+ return do_fmadd_h(env, frs1, frs2, frs3, float_muladd_negate_product);
+}
+
uint64_t helper_fnmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
uint64_t frs3)
{
@@ -142,6 +174,13 @@ uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
float_muladd_negate_product, &env->fp_status);
}
+uint64_t helper_fnmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
+ uint64_t frs3)
+{
+ return do_fmadd_h(env, frs1, frs2, frs3,
+ float_muladd_negate_c | float_muladd_negate_product);
+}
+
uint64_t helper_fadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
float32 frs1 = check_nanbox_s(rs1);
@@ -374,3 +413,149 @@ target_ulong helper_fclass_d(uint64_t frs1)
{
return fclass_d(frs1);
}
+
+uint64_t helper_fadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ float16 frs2 = check_nanbox_h(rs2);
+ return nanbox_h(float16_add(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fsub_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ float16 frs2 = check_nanbox_h(rs2);
+ return nanbox_h(float16_sub(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fmul_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ float16 frs2 = check_nanbox_h(rs2);
+ return nanbox_h(float16_mul(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fdiv_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ float16 frs2 = check_nanbox_h(rs2);
+ return nanbox_h(float16_div(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fmin_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ float16 frs2 = check_nanbox_h(rs2);
+ return nanbox_h(env->priv_ver < PRIV_VERSION_1_11_0 ?
+ float16_minnum(frs1, frs2, &env->fp_status) :
+ float16_minimum_number(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fmax_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ float16 frs2 = check_nanbox_h(rs2);
+ return nanbox_h(env->priv_ver < PRIV_VERSION_1_11_0 ?
+ float16_maxnum(frs1, frs2, &env->fp_status) :
+ float16_maximum_number(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fsqrt_h(CPURISCVState *env, uint64_t rs1)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ return nanbox_h(float16_sqrt(frs1, &env->fp_status));
+}
+
+target_ulong helper_fle_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ float16 frs2 = check_nanbox_h(rs2);
+ return float16_le(frs1, frs2, &env->fp_status);
+}
+
+target_ulong helper_flt_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ float16 frs2 = check_nanbox_h(rs2);
+ return float16_lt(frs1, frs2, &env->fp_status);
+}
+
+target_ulong helper_feq_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ float16 frs2 = check_nanbox_h(rs2);
+ return float16_eq_quiet(frs1, frs2, &env->fp_status);
+}
+
+target_ulong helper_fclass_h(uint64_t rs1)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ return fclass_h(frs1);
+}
+
+target_ulong helper_fcvt_w_h(CPURISCVState *env, uint64_t rs1)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ return float16_to_int32(frs1, &env->fp_status);
+}
+
+target_ulong helper_fcvt_wu_h(CPURISCVState *env, uint64_t rs1)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ return (int32_t)float16_to_uint32(frs1, &env->fp_status);
+}
+
+target_ulong helper_fcvt_l_h(CPURISCVState *env, uint64_t rs1)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ return float16_to_int64(frs1, &env->fp_status);
+}
+
+target_ulong helper_fcvt_lu_h(CPURISCVState *env, uint64_t rs1)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ return float16_to_uint64(frs1, &env->fp_status);
+}
+
+uint64_t helper_fcvt_h_w(CPURISCVState *env, target_ulong rs1)
+{
+ return nanbox_h(int32_to_float16((int32_t)rs1, &env->fp_status));
+}
+
+uint64_t helper_fcvt_h_wu(CPURISCVState *env, target_ulong rs1)
+{
+ return nanbox_h(uint32_to_float16((uint32_t)rs1, &env->fp_status));
+}
+
+uint64_t helper_fcvt_h_l(CPURISCVState *env, target_ulong rs1)
+{
+ return nanbox_h(int64_to_float16(rs1, &env->fp_status));
+}
+
+uint64_t helper_fcvt_h_lu(CPURISCVState *env, target_ulong rs1)
+{
+ return nanbox_h(uint64_to_float16(rs1, &env->fp_status));
+}
+
+uint64_t helper_fcvt_h_s(CPURISCVState *env, uint64_t rs1)
+{
+ float32 frs1 = check_nanbox_s(rs1);
+ return nanbox_h(float32_to_float16(frs1, true, &env->fp_status));
+}
+
+uint64_t helper_fcvt_s_h(CPURISCVState *env, uint64_t rs1)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ return nanbox_s(float16_to_float32(frs1, true, &env->fp_status));
+}
+
+uint64_t helper_fcvt_h_d(CPURISCVState *env, uint64_t rs1)
+{
+ return nanbox_h(float64_to_float16(rs1, true, &env->fp_status));
+}
+
+uint64_t helper_fcvt_d_h(CPURISCVState *env, uint64_t rs1)
+{
+ float16 frs1 = check_nanbox_h(rs1);
+ return float16_to_float64(frs1, true, &env->fp_status);
+}
diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c
index 23429179e2..881ab33392 100644
--- a/target/riscv/gdbstub.c
+++ b/target/riscv/gdbstub.c
@@ -20,6 +20,32 @@
#include "exec/gdbstub.h"
#include "cpu.h"
+struct TypeSize {
+ const char *gdb_type;
+ const char *id;
+ int size;
+ const char suffix;
+};
+
+static const struct TypeSize vec_lanes[] = {
+ /* quads */
+ { "uint128", "quads", 128, 'q' },
+ /* 64 bit */
+ { "uint64", "longs", 64, 'l' },
+ /* 32 bit */
+ { "uint32", "words", 32, 'w' },
+ /* 16 bit */
+ { "uint16", "shorts", 16, 's' },
+ /*
+ * TODO: currently there is no reliable way of telling
+ * if the remote gdb actually understands ieee_half so
+ * we don't expose it in the target description for now.
+ * { "ieee_half", 16, 'h', 'f' },
+ */
+ /* bytes */
+ { "uint8", "bytes", 8, 'b' },
+};
+
int riscv_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
{
RISCVCPU *cpu = RISCV_CPU(cs);
@@ -101,6 +127,96 @@ static int riscv_gdb_set_fpu(CPURISCVState *env, uint8_t *mem_buf, int n)
return 0;
}
+/*
+ * Convert register index number passed by GDB to the correspond
+ * vector CSR number. Vector CSRs are defined after vector registers
+ * in dynamic generated riscv-vector.xml, thus the starting register index
+ * of vector CSRs is 32.
+ * Return 0 if register index number is out of range.
+ */
+static int riscv_gdb_vector_csrno(int num_regs)
+{
+ /*
+ * The order of vector CSRs in the switch case
+ * should match with the order defined in csr_ops[].
+ */
+ switch (num_regs) {
+ case 32:
+ return CSR_VSTART;
+ case 33:
+ return CSR_VXSAT;
+ case 34:
+ return CSR_VXRM;
+ case 35:
+ return CSR_VCSR;
+ case 36:
+ return CSR_VL;
+ case 37:
+ return CSR_VTYPE;
+ case 38:
+ return CSR_VLENB;
+ default:
+ /* Unknown register. */
+ return 0;
+ }
+}
+
+static int riscv_gdb_get_vector(CPURISCVState *env, GByteArray *buf, int n)
+{
+ uint16_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
+ if (n < 32) {
+ int i;
+ int cnt = 0;
+ for (i = 0; i < vlenb; i += 8) {
+ cnt += gdb_get_reg64(buf,
+ env->vreg[(n * vlenb + i) / 8]);
+ }
+ return cnt;
+ }
+
+ int csrno = riscv_gdb_vector_csrno(n);
+
+ if (!csrno) {
+ return 0;
+ }
+
+ target_ulong val = 0;
+ int result = riscv_csrrw_debug(env, csrno, &val, 0, 0);
+
+ if (result == 0) {
+ return gdb_get_regl(buf, val);
+ }
+
+ return 0;
+}
+
+static int riscv_gdb_set_vector(CPURISCVState *env, uint8_t *mem_buf, int n)
+{
+ uint16_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
+ if (n < 32) {
+ int i;
+ for (i = 0; i < vlenb; i += 8) {
+ env->vreg[(n * vlenb + i) / 8] = ldq_p(mem_buf + i);
+ }
+ return vlenb;
+ }
+
+ int csrno = riscv_gdb_vector_csrno(n);
+
+ if (!csrno) {
+ return 0;
+ }
+
+ target_ulong val = ldtul_p(mem_buf);
+ int result = riscv_csrrw_debug(env, csrno, NULL, val, -1);
+
+ if (result == 0) {
+ return sizeof(target_ulong);
+ }
+
+ return 0;
+}
+
static int riscv_gdb_get_csr(CPURISCVState *env, GByteArray *buf, int n)
{
if (n < CSR_TABLE_SIZE) {
@@ -187,6 +303,68 @@ static int riscv_gen_dynamic_csr_xml(CPUState *cs, int base_reg)
return CSR_TABLE_SIZE;
}
+static int ricsv_gen_dynamic_vector_xml(CPUState *cs, int base_reg)
+{
+ RISCVCPU *cpu = RISCV_CPU(cs);
+ GString *s = g_string_new(NULL);
+ g_autoptr(GString) ts = g_string_new("");
+ int reg_width = cpu->cfg.vlen;
+ int num_regs = 0;
+ int i;
+
+ g_string_printf(s, "<?xml version=\"1.0\"?>");
+ g_string_append_printf(s, "<!DOCTYPE target SYSTEM \"gdb-target.dtd\">");
+ g_string_append_printf(s, "<feature name=\"org.gnu.gdb.riscv.vector\">");
+
+ /* First define types and totals in a whole VL */
+ for (i = 0; i < ARRAY_SIZE(vec_lanes); i++) {
+ int count = reg_width / vec_lanes[i].size;
+ g_string_printf(ts, "%s", vec_lanes[i].id);
+ g_string_append_printf(s,
+ "<vector id=\"%s\" type=\"%s\" count=\"%d\"/>",
+ ts->str, vec_lanes[i].gdb_type, count);
+ }
+
+ /* Define unions */
+ g_string_append_printf(s, "<union id=\"riscv_vector\">");
+ for (i = 0; i < ARRAY_SIZE(vec_lanes); i++) {
+ g_string_append_printf(s, "<field name=\"%c\" type=\"%s\"/>",
+ vec_lanes[i].suffix,
+ vec_lanes[i].id);
+ }
+ g_string_append(s, "</union>");
+
+ /* Define vector registers */
+ for (i = 0; i < 32; i++) {
+ g_string_append_printf(s,
+ "<reg name=\"v%d\" bitsize=\"%d\""
+ " regnum=\"%d\" group=\"vector\""
+ " type=\"riscv_vector\"/>",
+ i, reg_width, base_reg++);
+ num_regs++;
+ }
+
+ /* Define vector CSRs */
+ const char *vector_csrs[7] = {
+ "vstart", "vxsat", "vxrm", "vcsr",
+ "vl", "vtype", "vlenb"
+ };
+
+ for (i = 0; i < 7; i++) {
+ g_string_append_printf(s,
+ "<reg name=\"%s\" bitsize=\"%d\""
+ " regnum=\"%d\" group=\"vector\""
+ " type=\"int\"/>",
+ vector_csrs[i], TARGET_LONG_BITS, base_reg++);
+ num_regs++;
+ }
+
+ g_string_append_printf(s, "</feature>");
+
+ cpu->dyn_vreg_xml = g_string_free(s, false);
+ return num_regs;
+}
+
void riscv_cpu_register_gdb_regs_for_features(CPUState *cs)
{
RISCVCPU *cpu = RISCV_CPU(cs);
@@ -198,6 +376,12 @@ void riscv_cpu_register_gdb_regs_for_features(CPUState *cs)
gdb_register_coprocessor(cs, riscv_gdb_get_fpu, riscv_gdb_set_fpu,
36, "riscv-32bit-fpu.xml", 0);
}
+ if (env->misa_ext & RVV) {
+ gdb_register_coprocessor(cs, riscv_gdb_get_vector, riscv_gdb_set_vector,
+ ricsv_gen_dynamic_vector_xml(cs,
+ cs->gdb_num_regs),
+ "riscv-vector.xml", 0);
+ }
#if defined(TARGET_RISCV32)
gdb_register_coprocessor(cs, riscv_gdb_get_virtual, riscv_gdb_set_virtual,
1, "riscv-32bit-virtual.xml", 0);
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index c7a5376227..c15497e4a1 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -3,16 +3,21 @@ DEF_HELPER_2(raise_exception, noreturn, env, i32)
/* Floating Point - rounding mode */
DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_FLAGS_1(set_rod_rounding_mode, TCG_CALL_NO_WG, void, env)
/* Floating Point - fused */
DEF_HELPER_FLAGS_4(fmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(fmadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fmadd_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(fmsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(fmsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fmsub_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(fnmsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(fnmsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fnmsub_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(fnmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(fnmadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fnmadd_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
/* Floating Point - Single Precision */
DEF_HELPER_FLAGS_3(fadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64)
@@ -62,6 +67,31 @@ DEF_HELPER_FLAGS_1(fclass_d, TCG_CALL_NO_RWG_SE, tl, i64)
DEF_HELPER_FLAGS_2(clmul, TCG_CALL_NO_RWG_SE, tl, tl, tl)
DEF_HELPER_FLAGS_2(clmulr, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+/* Floating Point - Half Precision */
+DEF_HELPER_FLAGS_3(fadd_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fsub_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmul_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fdiv_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmin_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmax_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_2(fsqrt_h, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_3(fle_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_3(flt_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_3(feq_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_2(fcvt_s_h, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_h_s, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_d_h, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_h_d, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_w_h, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_wu_h, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_l_h, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_lu_h, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_h_w, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_h_wu, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_h_l, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_h_lu, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_1(fclass_h, TCG_CALL_NO_RWG_SE, tl, i64)
+
/* Special functions */
DEF_HELPER_2(csrr, tl, env, int)
DEF_HELPER_3(csrw, void, env, int, tl)
@@ -83,195 +113,89 @@ DEF_HELPER_2(hyp_hlvx_wu, tl, env, tl)
/* Vector functions */
DEF_HELPER_3(vsetvl, tl, env, tl, tl)
-DEF_HELPER_5(vlb_v_b, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlb_v_b_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlb_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlb_v_h_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlb_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlb_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlb_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlb_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlh_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlh_v_h_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlh_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlh_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlh_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlh_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlw_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlw_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlw_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlw_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vle_v_b, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vle_v_b_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vle_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vle_v_h_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vle_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vle_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vle_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vle_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbu_v_b, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbu_v_b_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbu_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbu_v_h_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbu_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbu_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbu_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbu_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhu_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhu_v_h_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhu_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhu_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhu_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhu_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlwu_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlwu_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlwu_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlwu_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsb_v_b, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsb_v_b_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsb_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsb_v_h_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsb_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsb_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsb_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsb_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsh_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsh_v_h_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsh_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsh_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsh_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsh_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsw_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsw_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsw_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vsw_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vse_v_b, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vse_v_b_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vse_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vse_v_h_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vse_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vse_v_w_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vse_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vse_v_d_mask, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_6(vlsb_v_b, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsb_v_h, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsb_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsb_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsh_v_h, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsh_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsh_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsw_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsw_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlse_v_b, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlse_v_h, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlse_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlse_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsbu_v_b, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsbu_v_h, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsbu_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlsbu_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlshu_v_h, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlshu_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlshu_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlswu_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlswu_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vssb_v_b, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vssb_v_h, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vssb_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vssb_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vssh_v_h, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vssh_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vssh_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vssw_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vssw_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vsse_v_b, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vsse_v_h, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vsse_v_w, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vsse_v_d, void, ptr, ptr, tl, tl, env, i32)
-DEF_HELPER_6(vlxb_v_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxb_v_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxb_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxb_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxh_v_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxh_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxh_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxe_v_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxe_v_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxe_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxe_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxbu_v_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxbu_v_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxbu_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxbu_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxhu_v_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxhu_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxhu_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxwu_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vlxwu_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxb_v_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxb_v_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxb_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxb_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxh_v_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxh_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxh_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxe_v_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxe_v_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxe_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vsxe_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_5(vlbff_v_b, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbff_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbff_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbff_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhff_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhff_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhff_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlwff_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlwff_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vleff_v_b, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vleff_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vleff_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vleff_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbuff_v_b, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbuff_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbuff_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlbuff_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhuff_v_h, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32)
-DEF_HELPER_6(vamoswapw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoswapd_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoaddw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoaddd_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoxorw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoxord_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoandw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoandd_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoorw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoord_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamominw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamomind_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamomaxw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamomaxd_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamominuw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamominud_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamomaxuw_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamomaxud_v_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoswapw_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoaddw_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoxorw_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoandw_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamoorw_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_5(vle8_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle16_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle32_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle64_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle8_v_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle16_v_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle32_v_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle64_v_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse8_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse16_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse32_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse64_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse8_v_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse16_v_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse32_v_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vse64_v_mask, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vlm_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vsm_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_6(vlse8_v, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlse16_v, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlse32_v, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlse64_v, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vsse8_v, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vsse16_v, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vsse32_v, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vsse64_v, void, ptr, ptr, tl, tl, env, i32)
+DEF_HELPER_6(vlxei8_8_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei8_16_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei8_32_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei8_64_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei16_8_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei16_16_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei16_32_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei16_64_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei32_8_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei32_16_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei32_32_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei32_64_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei64_8_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei64_16_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei64_32_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vlxei64_64_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei8_8_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei8_16_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei8_32_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei8_64_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei16_8_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei16_16_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei16_32_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei16_64_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei32_8_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei32_16_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei32_32_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei32_64_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei64_8_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei64_16_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei64_32_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsxei64_64_v, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_5(vle8ff_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle16ff_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle32ff_v, void, ptr, ptr, tl, env, i32)
+DEF_HELPER_5(vle64ff_v, void, ptr, ptr, tl, env, i32)
+
+DEF_HELPER_4(vl1re8_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl1re16_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl1re32_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl1re64_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl2re8_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl2re16_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl2re32_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl2re64_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl4re8_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl4re16_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl4re32_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl4re64_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl8re8_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl8re16_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl8re32_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vl8re64_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vs1r_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vs2r_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vs4r_v, void, ptr, tl, env, i32)
+DEF_HELPER_4(vs8r_v, void, ptr, tl, env, i32)
DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
@@ -430,18 +354,18 @@ DEF_HELPER_6(vsra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vsra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vsra_vx_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnsra_vx_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnsra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnsra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsrl_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsrl_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsrl_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsra_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsra_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsra_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnsrl_wx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsrl_wx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsrl_wx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsra_wx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsra_wx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnsra_wx_w, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vmseq_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vmseq_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
@@ -727,18 +651,34 @@ DEF_HELPER_6(vaadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vaadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vaadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vaadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vasub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vasub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vasub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vasub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vasubu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vaadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vaadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vaadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vaadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vaaddu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vasubu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vsmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vsmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
@@ -749,28 +689,6 @@ DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vwsmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vwsmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vwsmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vwsmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vwsmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vwsmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vwsmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vwsmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vwsmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32)
-
DEF_HELPER_6(vssrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vssrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vssrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
@@ -788,18 +706,18 @@ DEF_HELPER_6(vssra_vx_h, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vssra_vx_w, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vssra_vx_d, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnclip_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnclip_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnclip_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnclipu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnclipu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnclipu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vnclipu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnclipu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnclipu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnclip_vx_b, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnclip_vx_h, void, ptr, ptr, tl, ptr, env, i32)
-DEF_HELPER_6(vnclip_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclip_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclip_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclip_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclipu_wv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclipu_wv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclipu_wv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vnclipu_wx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclipu_wx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclipu_wx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclip_wx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclip_wx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vnclip_wx_w, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vfadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
@@ -925,6 +843,14 @@ DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfrsqrt7_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfrsqrt7_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfrsqrt7_v_d, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_5(vfrec7_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfrec7_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfrec7_v_d, void, ptr, ptr, ptr, env, i32)
+
DEF_HELPER_6(vfmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
@@ -987,12 +913,6 @@ DEF_HELPER_6(vmfgt_vf_d, void, ptr, ptr, i64, ptr, env, i32)
DEF_HELPER_6(vmfge_vf_h, void, ptr, ptr, i64, ptr, env, i32)
DEF_HELPER_6(vmfge_vf_w, void, ptr, ptr, i64, ptr, env, i32)
DEF_HELPER_6(vmfge_vf_d, void, ptr, ptr, i64, ptr, env, i32)
-DEF_HELPER_6(vmford_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vmford_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vmford_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vmford_vf_h, void, ptr, ptr, i64, ptr, env, i32)
-DEF_HELPER_6(vmford_vf_w, void, ptr, ptr, i64, ptr, env, i32)
-DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32)
DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32)
@@ -1019,23 +939,27 @@ DEF_HELPER_5(vfwcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfwcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfwcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfwcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_f_xu_v_b, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfwcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfwcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvt_f_x_v_b, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_5(vfncvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_5(vfncvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_5(vfncvt_x_f_v_h, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_5(vfncvt_x_f_v_w, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_5(vfncvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_5(vfncvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_5(vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_5(vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_5(vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_5(vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_xu_f_w_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_xu_f_w_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_xu_f_w_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_x_f_w_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_x_f_w_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_x_f_w_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_xu_w_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_xu_w_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_x_w_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_x_w_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_f_w_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_f_w_w, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
@@ -1092,16 +1016,16 @@ DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vmand_mm, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vmnand_mm, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vmandnot_mm, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmandn_mm, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vmxor_mm, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vmor_mm, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vmnor_mm, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmorn_mm, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32)
+DEF_HELPER_4(vcpop_m, tl, ptr, ptr, env, i32)
-DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32)
+DEF_HELPER_4(vfirst_m, tl, ptr, ptr, env, i32)
DEF_HELPER_5(vmsbf_m, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(vmsif_m, void, ptr, ptr, ptr, env, i32)
@@ -1134,10 +1058,21 @@ DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vfslide1up_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfslide1up_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfslide1up_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfslide1down_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfslide1down_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfslide1down_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+
DEF_HELPER_6(vrgather_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vrgather_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vrgather_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vrgather_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrgatherei16_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrgatherei16_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrgatherei16_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vrgatherei16_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vrgather_vx_b, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vrgather_vx_h, void, ptr, ptr, tl, ptr, env, i32)
DEF_HELPER_6(vrgather_vx_w, void, ptr, ptr, tl, ptr, env, i32)
@@ -1147,3 +1082,22 @@ DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_4(vmv1r_v, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vmv2r_v, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vmv4r_v, void, ptr, ptr, env, i32)
+DEF_HELPER_4(vmv8r_v, void, ptr, ptr, env, i32)
+
+DEF_HELPER_5(vzext_vf2_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vzext_vf2_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vzext_vf2_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vzext_vf4_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vzext_vf4_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vzext_vf8_d, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_5(vsext_vf2_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsext_vf2_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsext_vf2_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsext_vf4_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsext_vf4_d, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vsext_vf8_d, void, ptr, ptr, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 2f251dac1b..8617307b29 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -49,7 +49,6 @@
&atomic aq rl rs2 rs1 rd
&rmrr vm rd rs1 rs2
&rmr vm rd rs2
-&rwdvm vm wd rd rs1 rs2
&r2nfvm vm rd rs1 nf
&rnfvm vm rd rs1 rs2 nf
@@ -79,8 +78,8 @@
@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
@r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd
@r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd
-@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd
-@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
+@r2_zimm11 . zimm:11 ..... ... ..... ....... %rs1 %rd
+@r2_zimm10 .. zimm:10 ..... ... ..... ....... %rs1 %rd
@r2_s ....... ..... ..... ... ..... ....... %rs2 %rs1
@hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1
@@ -296,60 +295,69 @@ hlv_d 0110110 00000 ..... 100 ..... 1110011 @r2
hsv_d 0110111 ..... ..... 100 00000 1110011 @r2_s
# *** Vector loads and stores are encoded within LOADFP/STORE-FP ***
-vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm
-vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm
-vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm
-vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
-vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
-vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
-vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
-vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm
-vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm
-vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm
-vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm
-vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm
-vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm
-vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm
-vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
-vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
-vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
-vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm
-
-vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm
-vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm
-vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm
-vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm
-vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm
-vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm
-vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm
-vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm
-vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm
-vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm
-vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm
-
-vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm
-vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm
-vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm
-vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm
-vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm
-vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm
-vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm
+# Vector unit-stride load/store insns.
+vle8_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
+vle16_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
+vle32_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
+vle64_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
+vse8_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
+vse16_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
+vse32_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
+vse64_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm
+
+# Vector unit-stride mask load/store insns.
+vlm_v 000 000 1 01011 ..... 000 ..... 0000111 @r2
+vsm_v 000 000 1 01011 ..... 000 ..... 0100111 @r2
+
+# Vector strided insns.
+vlse8_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm
+vlse16_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm
+vlse32_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm
+vlse64_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm
+vsse8_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm
+vsse16_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm
+vsse32_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm
+vsse64_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm
+
+# Vector ordered-indexed and unordered-indexed load insns.
+vlxei8_v ... 0-1 . ..... ..... 000 ..... 0000111 @r_nfvm
+vlxei16_v ... 0-1 . ..... ..... 101 ..... 0000111 @r_nfvm
+vlxei32_v ... 0-1 . ..... ..... 110 ..... 0000111 @r_nfvm
+vlxei64_v ... 0-1 . ..... ..... 111 ..... 0000111 @r_nfvm
+
# Vector ordered-indexed and unordered-indexed store insns.
-vsxb_v ... -11 . ..... ..... 000 ..... 0100111 @r_nfvm
-vsxh_v ... -11 . ..... ..... 101 ..... 0100111 @r_nfvm
-vsxw_v ... -11 . ..... ..... 110 ..... 0100111 @r_nfvm
-vsxe_v ... -11 . ..... ..... 111 ..... 0100111 @r_nfvm
-
-#*** Vector AMO operations are encoded under the standard AMO major opcode ***
-vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm
-vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm
-vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm
-vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm
-vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm
-vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm
-vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm
-vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm
-vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
+vsxei8_v ... 0-1 . ..... ..... 000 ..... 0100111 @r_nfvm
+vsxei16_v ... 0-1 . ..... ..... 101 ..... 0100111 @r_nfvm
+vsxei32_v ... 0-1 . ..... ..... 110 ..... 0100111 @r_nfvm
+vsxei64_v ... 0-1 . ..... ..... 111 ..... 0100111 @r_nfvm
+
+# Vector unit-stride fault-only-first load insns.
+vle8ff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm
+vle16ff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm
+vle32ff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm
+vle64ff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm
+
+# Vector whole register insns
+vl1re8_v 000 000 1 01000 ..... 000 ..... 0000111 @r2
+vl1re16_v 000 000 1 01000 ..... 101 ..... 0000111 @r2
+vl1re32_v 000 000 1 01000 ..... 110 ..... 0000111 @r2
+vl1re64_v 000 000 1 01000 ..... 111 ..... 0000111 @r2
+vl2re8_v 001 000 1 01000 ..... 000 ..... 0000111 @r2
+vl2re16_v 001 000 1 01000 ..... 101 ..... 0000111 @r2
+vl2re32_v 001 000 1 01000 ..... 110 ..... 0000111 @r2
+vl2re64_v 001 000 1 01000 ..... 111 ..... 0000111 @r2
+vl4re8_v 011 000 1 01000 ..... 000 ..... 0000111 @r2
+vl4re16_v 011 000 1 01000 ..... 101 ..... 0000111 @r2
+vl4re32_v 011 000 1 01000 ..... 110 ..... 0000111 @r2
+vl4re64_v 011 000 1 01000 ..... 111 ..... 0000111 @r2
+vl8re8_v 111 000 1 01000 ..... 000 ..... 0000111 @r2
+vl8re16_v 111 000 1 01000 ..... 101 ..... 0000111 @r2
+vl8re32_v 111 000 1 01000 ..... 110 ..... 0000111 @r2
+vl8re64_v 111 000 1 01000 ..... 111 ..... 0000111 @r2
+vs1r_v 000 000 1 01000 ..... 000 ..... 0100111 @r2
+vs2r_v 001 000 1 01000 ..... 000 ..... 0100111 @r2
+vs4r_v 011 000 1 01000 ..... 000 ..... 0100111 @r2
+vs8r_v 111 000 1 01000 ..... 000 ..... 0100111 @r2
# *** new major opcode OP-V ***
vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm
@@ -375,16 +383,16 @@ vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm
vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm
vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm
vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm
-vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r_vm_1
-vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r_vm_1
-vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r_vm_1
-vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r_vm_1
-vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r_vm_1
-vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r_vm_1
-vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r_vm_1
-vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r_vm_1
-vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r_vm_1
-vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r_vm_1
+vadc_vvm 010000 0 ..... ..... 000 ..... 1010111 @r_vm_1
+vadc_vxm 010000 0 ..... ..... 100 ..... 1010111 @r_vm_1
+vadc_vim 010000 0 ..... ..... 011 ..... 1010111 @r_vm_1
+vmadc_vvm 010001 . ..... ..... 000 ..... 1010111 @r_vm
+vmadc_vxm 010001 . ..... ..... 100 ..... 1010111 @r_vm
+vmadc_vim 010001 . ..... ..... 011 ..... 1010111 @r_vm
+vsbc_vvm 010010 0 ..... ..... 000 ..... 1010111 @r_vm_1
+vsbc_vxm 010010 0 ..... ..... 100 ..... 1010111 @r_vm_1
+vmsbc_vvm 010011 . ..... ..... 000 ..... 1010111 @r_vm
+vmsbc_vxm 010011 . ..... ..... 100 ..... 1010111 @r_vm
vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm
vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm
vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm
@@ -403,12 +411,12 @@ vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm
vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm
vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm
vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm
-vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm
-vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm
-vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm
-vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm
-vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm
-vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm
+vnsrl_wv 101100 . ..... ..... 000 ..... 1010111 @r_vm
+vnsrl_wx 101100 . ..... ..... 100 ..... 1010111 @r_vm
+vnsrl_wi 101100 . ..... ..... 011 ..... 1010111 @r_vm
+vnsra_wv 101101 . ..... ..... 000 ..... 1010111 @r_vm
+vnsra_wx 101101 . ..... ..... 100 ..... 1010111 @r_vm
+vnsra_wi 101101 . ..... ..... 011 ..... 1010111 @r_vm
vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm
vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm
vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm
@@ -471,9 +479,9 @@ vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm
vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm
vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm
vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm
-vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm
-vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm
-vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm
+vwmaccsu_vv 111111 . ..... ..... 010 ..... 1010111 @r_vm
+vwmaccsu_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm
+vwmaccus_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm
vmv_v_v 010111 1 00000 ..... 000 ..... 1010111 @r2
vmv_v_x 010111 1 00000 ..... 100 ..... 1010111 @r2
vmv_v_i 010111 1 00000 ..... 011 ..... 1010111 @r2
@@ -490,32 +498,28 @@ vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm
vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm
vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm
vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm
-vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm
-vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm
-vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm
-vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm
-vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm
+vaadd_vv 001001 . ..... ..... 010 ..... 1010111 @r_vm
+vaadd_vx 001001 . ..... ..... 110 ..... 1010111 @r_vm
+vaaddu_vv 001000 . ..... ..... 010 ..... 1010111 @r_vm
+vaaddu_vx 001000 . ..... ..... 110 ..... 1010111 @r_vm
+vasub_vv 001011 . ..... ..... 010 ..... 1010111 @r_vm
+vasub_vx 001011 . ..... ..... 110 ..... 1010111 @r_vm
+vasubu_vv 001010 . ..... ..... 010 ..... 1010111 @r_vm
+vasubu_vx 001010 . ..... ..... 110 ..... 1010111 @r_vm
vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm
vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm
-vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm
-vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm
-vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm
-vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm
-vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm
-vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm
-vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm
vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm
vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm
vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm
vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm
vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm
vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm
-vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm
-vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm
-vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm
-vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm
-vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm
-vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm
+vnclipu_wv 101110 . ..... ..... 000 ..... 1010111 @r_vm
+vnclipu_wx 101110 . ..... ..... 100 ..... 1010111 @r_vm
+vnclipu_wi 101110 . ..... ..... 011 ..... 1010111 @r_vm
+vnclip_wv 101111 . ..... ..... 000 ..... 1010111 @r_vm
+vnclip_wx 101111 . ..... ..... 100 ..... 1010111 @r_vm
+vnclip_wi 101111 . ..... ..... 011 ..... 1010111 @r_vm
vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm
vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm
vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm
@@ -560,7 +564,9 @@ vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm
vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm
vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm
vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm
-vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm
+vfsqrt_v 010011 . ..... 00000 001 ..... 1010111 @r2_vm
+vfrsqrt7_v 010011 . ..... 00100 001 ..... 1010111 @r2_vm
+vfrec7_v 010011 . ..... 00101 001 ..... 1010111 @r2_vm
vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm
vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm
vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm
@@ -571,6 +577,8 @@ vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm
vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm
vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm
vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm
+vfslide1up_vf 001110 . ..... ..... 101 ..... 1010111 @r_vm
+vfslide1down_vf 001111 . ..... ..... 101 ..... 1010111 @r_vm
vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm
vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm
vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm
@@ -581,25 +589,34 @@ vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm
vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm
vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm
vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm
-vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm
-vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm
-vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm
+vfclass_v 010011 . ..... 10000 001 ..... 1010111 @r2_vm
vfmerge_vfm 010111 0 ..... ..... 101 ..... 1010111 @r_vm_0
vfmv_v_f 010111 1 00000 ..... 101 ..... 1010111 @r2
-vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm
-vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm
-vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm
-vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm
-vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm
-vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm
-vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm
-vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm
-vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm
-vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm
-vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm
-vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm
-vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm
-vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm
+
+vfcvt_xu_f_v 010010 . ..... 00000 001 ..... 1010111 @r2_vm
+vfcvt_x_f_v 010010 . ..... 00001 001 ..... 1010111 @r2_vm
+vfcvt_f_xu_v 010010 . ..... 00010 001 ..... 1010111 @r2_vm
+vfcvt_f_x_v 010010 . ..... 00011 001 ..... 1010111 @r2_vm
+vfcvt_rtz_xu_f_v 010010 . ..... 00110 001 ..... 1010111 @r2_vm
+vfcvt_rtz_x_f_v 010010 . ..... 00111 001 ..... 1010111 @r2_vm
+
+vfwcvt_xu_f_v 010010 . ..... 01000 001 ..... 1010111 @r2_vm
+vfwcvt_x_f_v 010010 . ..... 01001 001 ..... 1010111 @r2_vm
+vfwcvt_f_xu_v 010010 . ..... 01010 001 ..... 1010111 @r2_vm
+vfwcvt_f_x_v 010010 . ..... 01011 001 ..... 1010111 @r2_vm
+vfwcvt_f_f_v 010010 . ..... 01100 001 ..... 1010111 @r2_vm
+vfwcvt_rtz_xu_f_v 010010 . ..... 01110 001 ..... 1010111 @r2_vm
+vfwcvt_rtz_x_f_v 010010 . ..... 01111 001 ..... 1010111 @r2_vm
+
+vfncvt_xu_f_w 010010 . ..... 10000 001 ..... 1010111 @r2_vm
+vfncvt_x_f_w 010010 . ..... 10001 001 ..... 1010111 @r2_vm
+vfncvt_f_xu_w 010010 . ..... 10010 001 ..... 1010111 @r2_vm
+vfncvt_f_x_w 010010 . ..... 10011 001 ..... 1010111 @r2_vm
+vfncvt_f_f_w 010010 . ..... 10100 001 ..... 1010111 @r2_vm
+vfncvt_rod_f_f_w 010010 . ..... 10101 001 ..... 1010111 @r2_vm
+vfncvt_rtz_xu_f_w 010010 . ..... 10110 001 ..... 1010111 @r2_vm
+vfncvt_rtz_x_f_w 010010 . ..... 10111 001 ..... 1010111 @r2_vm
+
vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm
vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm
vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm
@@ -618,23 +635,23 @@ vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
vfwredsum_vs 1100-1 . ..... ..... 001 ..... 1010111 @r_vm
vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r
vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r
-vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r
+vmandn_mm 011000 - ..... ..... 010 ..... 1010111 @r
vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r
vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r
vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r
-vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r
+vmorn_mm 011100 - ..... ..... 010 ..... 1010111 @r
vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r
-vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm
-vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm
-vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm
-vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm
-vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm
-viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm
-vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm
-vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r
-vmv_s_x 001101 1 00000 ..... 110 ..... 1010111 @r2
-vfmv_f_s 001100 1 ..... 00000 001 ..... 1010111 @r2rd
-vfmv_s_f 001101 1 00000 ..... 101 ..... 1010111 @r2
+vcpop_m 010000 . ..... 10000 010 ..... 1010111 @r2_vm
+vfirst_m 010000 . ..... 10001 010 ..... 1010111 @r2_vm
+vmsbf_m 010100 . ..... 00001 010 ..... 1010111 @r2_vm
+vmsif_m 010100 . ..... 00011 010 ..... 1010111 @r2_vm
+vmsof_m 010100 . ..... 00010 010 ..... 1010111 @r2_vm
+viota_m 010100 . ..... 10000 010 ..... 1010111 @r2_vm
+vid_v 010100 . 00000 10001 010 ..... 1010111 @r1_vm
+vmv_x_s 010000 1 ..... 00000 010 ..... 1010111 @r2rd
+vmv_s_x 010000 1 00000 ..... 110 ..... 1010111 @r2
+vfmv_f_s 010000 1 ..... 00000 001 ..... 1010111 @r2rd
+vfmv_s_f 010000 1 00000 ..... 101 ..... 1010111 @r2
vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm
vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm
vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm
@@ -642,24 +659,27 @@ vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm
vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm
vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm
vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm
+vrgatherei16_vv 001110 . ..... ..... 000 ..... 1010111 @r_vm
vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm
vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm
vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r
-
-vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
+vmv1r_v 100111 1 ..... 00000 011 ..... 1010111 @r2rd
+vmv2r_v 100111 1 ..... 00001 011 ..... 1010111 @r2rd
+vmv4r_v 100111 1 ..... 00011 011 ..... 1010111 @r2rd
+vmv8r_v 100111 1 ..... 00111 011 ..... 1010111 @r2rd
+
+# Vector Integer Extension
+vzext_vf2 010010 . ..... 00110 010 ..... 1010111 @r2_vm
+vzext_vf4 010010 . ..... 00100 010 ..... 1010111 @r2_vm
+vzext_vf8 010010 . ..... 00010 010 ..... 1010111 @r2_vm
+vsext_vf2 010010 . ..... 00111 010 ..... 1010111 @r2_vm
+vsext_vf4 010010 . ..... 00101 010 ..... 1010111 @r2_vm
+vsext_vf8 010010 . ..... 00011 010 ..... 1010111 @r2_vm
+
+vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm11
+vsetivli 11 .......... ..... 111 ..... 1010111 @r2_zimm10
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
-#*** Vector AMO operations (in addition to Zvamo) ***
-vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm
-
# *** RV32 Zba Standard Extension ***
sh1add 0010000 .......... 010 ..... 0110011 @r
sh2add 0010000 .......... 100 ..... 0110011 @r
@@ -726,3 +746,41 @@ binv 0110100 .......... 001 ..... 0110011 @r
binvi 01101. ........... 001 ..... 0010011 @sh
bset 0010100 .......... 001 ..... 0110011 @r
bseti 00101. ........... 001 ..... 0010011 @sh
+
+# *** RV32 Zfh Extension ***
+flh ............ ..... 001 ..... 0000111 @i
+fsh ....... ..... ..... 001 ..... 0100111 @s
+fmadd_h ..... 10 ..... ..... ... ..... 1000011 @r4_rm
+fmsub_h ..... 10 ..... ..... ... ..... 1000111 @r4_rm
+fnmsub_h ..... 10 ..... ..... ... ..... 1001011 @r4_rm
+fnmadd_h ..... 10 ..... ..... ... ..... 1001111 @r4_rm
+fadd_h 0000010 ..... ..... ... ..... 1010011 @r_rm
+fsub_h 0000110 ..... ..... ... ..... 1010011 @r_rm
+fmul_h 0001010 ..... ..... ... ..... 1010011 @r_rm
+fdiv_h 0001110 ..... ..... ... ..... 1010011 @r_rm
+fsqrt_h 0101110 00000 ..... ... ..... 1010011 @r2_rm
+fsgnj_h 0010010 ..... ..... 000 ..... 1010011 @r
+fsgnjn_h 0010010 ..... ..... 001 ..... 1010011 @r
+fsgnjx_h 0010010 ..... ..... 010 ..... 1010011 @r
+fmin_h 0010110 ..... ..... 000 ..... 1010011 @r
+fmax_h 0010110 ..... ..... 001 ..... 1010011 @r
+fcvt_h_s 0100010 00000 ..... ... ..... 1010011 @r2_rm
+fcvt_s_h 0100000 00010 ..... ... ..... 1010011 @r2_rm
+fcvt_h_d 0100010 00001 ..... ... ..... 1010011 @r2_rm
+fcvt_d_h 0100001 00010 ..... ... ..... 1010011 @r2_rm
+fcvt_w_h 1100010 00000 ..... ... ..... 1010011 @r2_rm
+fcvt_wu_h 1100010 00001 ..... ... ..... 1010011 @r2_rm
+fmv_x_h 1110010 00000 ..... 000 ..... 1010011 @r2
+feq_h 1010010 ..... ..... 010 ..... 1010011 @r
+flt_h 1010010 ..... ..... 001 ..... 1010011 @r
+fle_h 1010010 ..... ..... 000 ..... 1010011 @r
+fclass_h 1110010 00000 ..... 001 ..... 1010011 @r2
+fcvt_h_w 1101010 00000 ..... ... ..... 1010011 @r2_rm
+fcvt_h_wu 1101010 00001 ..... ... ..... 1010011 @r2_rm
+fmv_h_x 1111010 00000 ..... 000 ..... 1010011 @r2
+
+# *** RV64 Zfh Extension (in addition to RV32 Zfh) ***
+fcvt_l_h 1100010 00010 ..... ... ..... 1010011 @r2_rm
+fcvt_lu_h 1100010 00011 ..... ... ..... 1010011 @r2_rm
+fcvt_h_l 1101010 00010 ..... ... ..... 1010011 @r2_rm
+fcvt_h_lu 1101010 00011 ..... ... ..... 1010011 @r2_rm
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 17ee3babef..5e3f7fdb77 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1,5 +1,4 @@
/*
- * RISC-V translation routines for the RVV Standard Extension.
*
* Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
*
@@ -19,59 +18,187 @@
#include "tcg/tcg-gvec-desc.h"
#include "internals.h"
-static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a)
+static inline bool is_overlapped(const int8_t astart, int8_t asize,
+ const int8_t bstart, int8_t bsize)
{
- TCGv s1, s2, dst;
+ const int8_t aend = astart + asize;
+ const int8_t bend = bstart + bsize;
- if (!has_ext(ctx, RVV)) {
+ return MAX(aend, bend) - MIN(astart, bstart) < asize + bsize;
+}
+
+static bool require_rvv(DisasContext *s)
+{
+ return s->mstatus_vs != 0;
+}
+
+static bool require_rvf(DisasContext *s)
+{
+ if (s->mstatus_fs == 0) {
return false;
}
- s2 = get_gpr(ctx, a->rs2, EXT_ZERO);
- dst = dest_gpr(ctx, a->rd);
+ switch (s->sew) {
+ case MO_16:
+ case MO_32:
+ return has_ext(s, RVF);
+ case MO_64:
+ return has_ext(s, RVD);
+ default:
+ return false;
+ }
+}
- /* Using x0 as the rs1 register specifier, encodes an infinite AVL */
- if (a->rs1 == 0) {
+static bool require_scale_rvf(DisasContext *s)
+{
+ if (s->mstatus_fs == 0) {
+ return false;
+ }
+
+ switch (s->sew) {
+ case MO_8:
+ case MO_16:
+ return has_ext(s, RVF);
+ case MO_32:
+ return has_ext(s, RVD);
+ default:
+ return false;
+ }
+}
+
+/* Destination vector register group cannot overlap source mask register. */
+static bool require_vm(int vm, int vd)
+{
+ return (vm != 0 || vd != 0);
+}
+
+static bool require_nf(int vd, int nf, int lmul)
+{
+ int size = nf << MAX(lmul, 0);
+ return size <= 8 && vd + size <= 32;
+}
+
+/*
+ * Vector register should aligned with the passed-in LMUL (EMUL).
+ * If LMUL < 0, i.e. fractional LMUL, any vector register is allowed.
+ */
+static bool require_align(const int8_t val, const int8_t lmul)
+{
+ return lmul <= 0 || extract32(val, 0, lmul) == 0;
+}
+
+/*
+ * A destination vector register group can overlap a source vector
+ * register group only if one of the following holds:
+ * 1. The destination EEW equals the source EEW.
+ * 2. The destination EEW is smaller than the source EEW and the overlap
+ * is in the lowest-numbered part of the source register group.
+ * 3. The destination EEW is greater than the source EEW, the source EMUL
+ * is at least 1, and the overlap is in the highest-numbered part of
+ * the destination register group.
+ * (Section 5.2)
+ *
+ * This function returns true if one of the following holds:
+ * * Destination vector register group does not overlap a source vector
+ * register group.
+ * * Rule 3 met.
+ * For rule 1, overlap is allowed so this function doesn't need to be called.
+ * For rule 2, (vd == vs). Caller has to check whether: (vd != vs) before
+ * calling this function.
+ */
+static bool require_noover(const int8_t dst, const int8_t dst_lmul,
+ const int8_t src, const int8_t src_lmul)
+{
+ int8_t dst_size = dst_lmul <= 0 ? 1 : 1 << dst_lmul;
+ int8_t src_size = src_lmul <= 0 ? 1 : 1 << src_lmul;
+
+ /* Destination EEW is greater than the source EEW, check rule 3. */
+ if (dst_size > src_size) {
+ if (dst < src &&
+ src_lmul >= 0 &&
+ is_overlapped(dst, dst_size, src, src_size) &&
+ !is_overlapped(dst, dst_size, src + src_size, src_size)) {
+ return true;
+ }
+ }
+
+ return !is_overlapped(dst, dst_size, src, src_size);
+}
+
+static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2)
+{
+ TCGv s1, dst;
+
+ if (!require_rvv(s) || !has_ext(s, RVV)) {
+ return false;
+ }
+
+ dst = dest_gpr(s, rd);
+
+ if (rd == 0 && rs1 == 0) {
+ s1 = tcg_temp_new();
+ tcg_gen_mov_tl(s1, cpu_vl);
+ } else if (rs1 == 0) {
/* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
s1 = tcg_constant_tl(RV_VLEN_MAX);
} else {
- s1 = get_gpr(ctx, a->rs1, EXT_ZERO);
+ s1 = get_gpr(s, rs1, EXT_ZERO);
}
+
gen_helper_vsetvl(dst, cpu_env, s1, s2);
- gen_set_gpr(ctx, a->rd, dst);
+ gen_set_gpr(s, rd, dst);
+ mark_vs_dirty(s);
- tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
+ tcg_gen_movi_tl(cpu_pc, s->pc_succ_insn);
tcg_gen_lookup_and_goto_ptr();
- ctx->base.is_jmp = DISAS_NORETURN;
+ s->base.is_jmp = DISAS_NORETURN;
+
+ if (rd == 0 && rs1 == 0) {
+ tcg_temp_free(s1);
+ }
+
return true;
}
-static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a)
+static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2)
{
- TCGv s1, s2, dst;
+ TCGv dst;
- if (!has_ext(ctx, RVV)) {
+ if (!require_rvv(s) || !has_ext(s, RVV)) {
return false;
}
- s2 = tcg_constant_tl(a->zimm);
- dst = dest_gpr(ctx, a->rd);
+ dst = dest_gpr(s, rd);
- /* Using x0 as the rs1 register specifier, encodes an infinite AVL */
- if (a->rs1 == 0) {
- /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
- s1 = tcg_constant_tl(RV_VLEN_MAX);
- } else {
- s1 = get_gpr(ctx, a->rs1, EXT_ZERO);
- }
gen_helper_vsetvl(dst, cpu_env, s1, s2);
- gen_set_gpr(ctx, a->rd, dst);
+ gen_set_gpr(s, rd, dst);
+ mark_vs_dirty(s);
+ tcg_gen_movi_tl(cpu_pc, s->pc_succ_insn);
+ tcg_gen_lookup_and_goto_ptr();
+ s->base.is_jmp = DISAS_NORETURN;
- gen_goto_tb(ctx, 0, ctx->pc_succ_insn);
- ctx->base.is_jmp = DISAS_NORETURN;
return true;
}
+static bool trans_vsetvl(DisasContext *s, arg_vsetvl *a)
+{
+ TCGv s2 = get_gpr(s, a->rs2, EXT_ZERO);
+ return do_vsetvl(s, a->rd, a->rs1, s2);
+}
+
+static bool trans_vsetvli(DisasContext *s, arg_vsetvli *a)
+{
+ TCGv s2 = tcg_constant_tl(a->zimm);
+ return do_vsetvl(s, a->rd, a->rs1, s2);
+}
+
+static bool trans_vsetivli(DisasContext *s, arg_vsetivli *a)
+{
+ TCGv s1 = tcg_const_tl(a->rs1);
+ TCGv s2 = tcg_const_tl(a->zimm);
+ return do_vsetivli(s, a->rd, s1, s2);
+}
+
/* vector register offset from env */
static uint32_t vreg_ofs(DisasContext *s, int reg)
{
@@ -81,70 +208,372 @@ static uint32_t vreg_ofs(DisasContext *s, int reg)
/* check functions */
/*
- * In cpu_get_tb_cpu_state(), set VILL if RVV was not present.
- * So RVV is also be checked in this function.
+ * Vector unit-stride, strided, unit-stride segment, strided segment
+ * store check function.
+ *
+ * Rules to be checked here:
+ * 1. EMUL must within the range: 1/8 <= EMUL <= 8. (Section 7.3)
+ * 2. Destination vector register number is multiples of EMUL.
+ * (Section 3.4.2, 7.3)
+ * 3. The EMUL setting must be such that EMUL * NFIELDS ≤ 8. (Section 7.8)
+ * 4. Vector register numbers accessed by the segment load or store
+ * cannot increment past 31. (Section 7.8)
*/
-static bool vext_check_isa_ill(DisasContext *s)
+static bool vext_check_store(DisasContext *s, int vd, int nf, uint8_t eew)
{
- return !s->vill;
+ int8_t emul = eew - s->sew + s->lmul;
+ return (emul >= -3 && emul <= 3) &&
+ require_align(vd, emul) &&
+ require_nf(vd, nf, emul);
}
/*
- * There are two rules check here.
+ * Vector unit-stride, strided, unit-stride segment, strided segment
+ * load check function.
*
- * 1. Vector register numbers are multiples of LMUL. (Section 3.2)
+ * Rules to be checked here:
+ * 1. All rules applies to store instructions are applies
+ * to load instructions.
+ * 2. Destination vector register group for a masked vector
+ * instruction cannot overlap the source mask register (v0).
+ * (Section 5.3)
+ */
+static bool vext_check_load(DisasContext *s, int vd, int nf, int vm,
+ uint8_t eew)
+{
+ return vext_check_store(s, vd, nf, eew) && require_vm(vm, vd);
+}
+
+/*
+ * Vector indexed, indexed segment store check function.
*
- * 2. For all widening instructions, the destination LMUL value must also be
- * a supported LMUL value. (Section 11.2)
+ * Rules to be checked here:
+ * 1. EMUL must within the range: 1/8 <= EMUL <= 8. (Section 7.3)
+ * 2. Index vector register number is multiples of EMUL.
+ * (Section 3.4.2, 7.3)
+ * 3. Destination vector register number is multiples of LMUL.
+ * (Section 3.4.2, 7.3)
+ * 4. The EMUL setting must be such that EMUL * NFIELDS ≤ 8. (Section 7.8)
+ * 5. Vector register numbers accessed by the segment load or store
+ * cannot increment past 31. (Section 7.8)
*/
-static bool vext_check_reg(DisasContext *s, uint32_t reg, bool widen)
+static bool vext_check_st_index(DisasContext *s, int vd, int vs2, int nf,
+ uint8_t eew)
{
- /*
- * The destination vector register group results are arranged as if both
- * SEW and LMUL were at twice their current settings. (Section 11.2).
- */
- int legal = widen ? 2 << s->lmul : 1 << s->lmul;
+ int8_t emul = eew - s->sew + s->lmul;
+ return (emul >= -3 && emul <= 3) &&
+ require_align(vs2, emul) &&
+ require_align(vd, s->lmul) &&
+ require_nf(vd, nf, s->lmul);
+}
- return !((s->lmul == 0x3 && widen) || (reg % legal));
+/*
+ * Vector indexed, indexed segment load check function.
+ *
+ * Rules to be checked here:
+ * 1. All rules applies to store instructions are applies
+ * to load instructions.
+ * 2. Destination vector register group for a masked vector
+ * instruction cannot overlap the source mask register (v0).
+ * (Section 5.3)
+ * 3. Destination vector register cannot overlap a source vector
+ * register (vs2) group.
+ * (Section 5.2)
+ * 4. Destination vector register groups cannot overlap
+ * the source vector register (vs2) group for
+ * indexed segment load instructions. (Section 7.8.3)
+ */
+static bool vext_check_ld_index(DisasContext *s, int vd, int vs2,
+ int nf, int vm, uint8_t eew)
+{
+ int8_t seg_vd;
+ int8_t emul = eew - s->sew + s->lmul;
+ bool ret = vext_check_st_index(s, vd, vs2, nf, eew) &&
+ require_vm(vm, vd);
+
+ /* Each segment register group has to follow overlap rules. */
+ for (int i = 0; i < nf; ++i) {
+ seg_vd = vd + (1 << MAX(s->lmul, 0)) * i;
+
+ if (eew > s->sew) {
+ if (seg_vd != vs2) {
+ ret &= require_noover(seg_vd, s->lmul, vs2, emul);
+ }
+ } else if (eew < s->sew) {
+ ret &= require_noover(seg_vd, s->lmul, vs2, emul);
+ }
+
+ /*
+ * Destination vector register groups cannot overlap
+ * the source vector register (vs2) group for
+ * indexed segment load instructions.
+ */
+ if (nf > 1) {
+ ret &= !is_overlapped(seg_vd, 1 << MAX(s->lmul, 0),
+ vs2, 1 << MAX(emul, 0));
+ }
+ }
+ return ret;
+}
+
+static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm)
+{
+ return require_vm(vm, vd) &&
+ require_align(vd, s->lmul) &&
+ require_align(vs, s->lmul);
+}
+
+/*
+ * Check function for vector instruction with format:
+ * single-width result and single-width sources (SEW = SEW op SEW)
+ *
+ * Rules to be checked here:
+ * 1. Destination vector register group for a masked vector
+ * instruction cannot overlap the source mask register (v0).
+ * (Section 5.3)
+ * 2. Destination vector register number is multiples of LMUL.
+ * (Section 3.4.2)
+ * 3. Source (vs2, vs1) vector register number are multiples of LMUL.
+ * (Section 3.4.2)
+ */
+static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm)
+{
+ return vext_check_ss(s, vd, vs2, vm) &&
+ require_align(vs1, s->lmul);
+}
+
+static bool vext_check_ms(DisasContext *s, int vd, int vs)
+{
+ bool ret = require_align(vs, s->lmul);
+ if (vd != vs) {
+ ret &= require_noover(vd, 0, vs, s->lmul);
+ }
+ return ret;
+}
+
+/*
+ * Check function for maskable vector instruction with format:
+ * single-width result and single-width sources (SEW = SEW op SEW)
+ *
+ * Rules to be checked here:
+ * 1. Source (vs2, vs1) vector register number are multiples of LMUL.
+ * (Section 3.4.2)
+ * 2. Destination vector register cannot overlap a source vector
+ * register (vs2, vs1) group.
+ * (Section 5.2)
+ * 3. The destination vector register group for a masked vector
+ * instruction cannot overlap the source mask register (v0),
+ * unless the destination vector register is being written
+ * with a mask value (e.g., comparisons) or the scalar result
+ * of a reduction. (Section 5.3)
+ */
+static bool vext_check_mss(DisasContext *s, int vd, int vs1, int vs2)
+{
+ bool ret = vext_check_ms(s, vd, vs2) &&
+ require_align(vs1, s->lmul);
+ if (vd != vs1) {
+ ret &= require_noover(vd, 0, vs1, s->lmul);
+ }
+ return ret;
}
/*
- * There are two rules check here.
+ * Common check function for vector widening instructions
+ * of double-width result (2*SEW).
*
- * 1. The destination vector register group for a masked vector instruction can
- * only overlap the source mask register (v0) when LMUL=1. (Section 5.3)
+ * Rules to be checked here:
+ * 1. The largest vector register group used by an instruction
+ * can not be greater than 8 vector registers (Section 5.2):
+ * => LMUL < 8.
+ * => SEW < 64.
+ * 2. Double-width SEW cannot greater than ELEN.
+ * 3. Destination vector register number is multiples of 2 * LMUL.
+ * (Section 3.4.2)
+ * 4. Destination vector register group for a masked vector
+ * instruction cannot overlap the source mask register (v0).
+ * (Section 5.3)
+ */
+static bool vext_wide_check_common(DisasContext *s, int vd, int vm)
+{
+ return (s->lmul <= 2) &&
+ (s->sew < MO_64) &&
+ ((s->sew + 1) <= (s->elen >> 4)) &&
+ require_align(vd, s->lmul + 1) &&
+ require_vm(vm, vd);
+}
+
+/*
+ * Common check function for vector narrowing instructions
+ * of single-width result (SEW) and double-width source (2*SEW).
*
- * 2. In widen instructions and some other insturctions, like vslideup.vx,
- * there is no need to check whether LMUL=1.
+ * Rules to be checked here:
+ * 1. The largest vector register group used by an instruction
+ * can not be greater than 8 vector registers (Section 5.2):
+ * => LMUL < 8.
+ * => SEW < 64.
+ * 2. Double-width SEW cannot greater than ELEN.
+ * 3. Source vector register number is multiples of 2 * LMUL.
+ * (Section 3.4.2)
+ * 4. Destination vector register number is multiples of LMUL.
+ * (Section 3.4.2)
+ * 5. Destination vector register group for a masked vector
+ * instruction cannot overlap the source mask register (v0).
+ * (Section 5.3)
*/
-static bool vext_check_overlap_mask(DisasContext *s, uint32_t vd, bool vm,
- bool force)
+static bool vext_narrow_check_common(DisasContext *s, int vd, int vs2,
+ int vm)
{
- return (vm != 0 || vd != 0) || (!force && (s->lmul == 0));
+ return (s->lmul <= 2) &&
+ (s->sew < MO_64) &&
+ ((s->sew + 1) <= (s->elen >> 4)) &&
+ require_align(vs2, s->lmul + 1) &&
+ require_align(vd, s->lmul) &&
+ require_vm(vm, vd);
}
-/* The LMUL setting must be such that LMUL * NFIELDS <= 8. (Section 7.8) */
-static bool vext_check_nf(DisasContext *s, uint32_t nf)
+static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm)
{
- return (1 << s->lmul) * nf <= 8;
+ return vext_wide_check_common(s, vd, vm) &&
+ require_align(vs, s->lmul) &&
+ require_noover(vd, s->lmul + 1, vs, s->lmul);
+}
+
+static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm)
+{
+ return vext_wide_check_common(s, vd, vm) &&
+ require_align(vs, s->lmul + 1);
}
/*
- * The destination vector register group cannot overlap a source vector register
- * group of a different element width. (Section 11.2)
+ * Check function for vector instruction with format:
+ * double-width result and single-width sources (2*SEW = SEW op SEW)
+ *
+ * Rules to be checked here:
+ * 1. All rules in defined in widen common rules are applied.
+ * 2. Source (vs2, vs1) vector register number are multiples of LMUL.
+ * (Section 3.4.2)
+ * 3. Destination vector register cannot overlap a source vector
+ * register (vs2, vs1) group.
+ * (Section 5.2)
*/
-static inline bool vext_check_overlap_group(int rd, int dlen, int rs, int slen)
+static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm)
{
- return ((rd >= rs + slen) || (rs >= rd + dlen));
+ return vext_check_ds(s, vd, vs2, vm) &&
+ require_align(vs1, s->lmul) &&
+ require_noover(vd, s->lmul + 1, vs1, s->lmul);
}
+
+/*
+ * Check function for vector instruction with format:
+ * double-width result and double-width source1 and single-width
+ * source2 (2*SEW = 2*SEW op SEW)
+ *
+ * Rules to be checked here:
+ * 1. All rules in defined in widen common rules are applied.
+ * 2. Source 1 (vs2) vector register number is multiples of 2 * LMUL.
+ * (Section 3.4.2)
+ * 3. Source 2 (vs1) vector register number is multiples of LMUL.
+ * (Section 3.4.2)
+ * 4. Destination vector register cannot overlap a source vector
+ * register (vs1) group.
+ * (Section 5.2)
+ */
+static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm)
+{
+ return vext_check_ds(s, vd, vs1, vm) &&
+ require_align(vs2, s->lmul + 1);
+}
+
+static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm)
+{
+ bool ret = vext_narrow_check_common(s, vd, vs, vm);
+ if (vd != vs) {
+ ret &= require_noover(vd, s->lmul, vs, s->lmul + 1);
+ }
+ return ret;
+}
+
+/*
+ * Check function for vector instruction with format:
+ * single-width result and double-width source 1 and single-width
+ * source 2 (SEW = 2*SEW op SEW)
+ *
+ * Rules to be checked here:
+ * 1. All rules in defined in narrow common rules are applied.
+ * 2. Destination vector register cannot overlap a source vector
+ * register (vs2) group.
+ * (Section 5.2)
+ * 3. Source 2 (vs1) vector register number is multiples of LMUL.
+ * (Section 3.4.2)
+ */
+static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
+{
+ return vext_check_sd(s, vd, vs2, vm) &&
+ require_align(vs1, s->lmul);
+}
+
+/*
+ * Check function for vector reduction instructions.
+ *
+ * Rules to be checked here:
+ * 1. Source 1 (vs2) vector register number is multiples of LMUL.
+ * (Section 3.4.2)
+ */
+static bool vext_check_reduction(DisasContext *s, int vs2)
+{
+ return require_align(vs2, s->lmul) && (s->vstart == 0);
+}
+
+/*
+ * Check function for vector slide instructions.
+ *
+ * Rules to be checked here:
+ * 1. Source 1 (vs2) vector register number is multiples of LMUL.
+ * (Section 3.4.2)
+ * 2. Destination vector register number is multiples of LMUL.
+ * (Section 3.4.2)
+ * 3. Destination vector register group for a masked vector
+ * instruction cannot overlap the source mask register (v0).
+ * (Section 5.3)
+ * 4. The destination vector register group for vslideup, vslide1up,
+ * vfslide1up, cannot overlap the source vector register (vs2) group.
+ * (Section 5.2, 16.3.1, 16.3.3)
+ */
+static bool vext_check_slide(DisasContext *s, int vd, int vs2,
+ int vm, bool is_over)
+{
+ bool ret = require_align(vs2, s->lmul) &&
+ require_align(vd, s->lmul) &&
+ require_vm(vm, vd);
+ if (is_over) {
+ ret &= (vd != vs2);
+ }
+ return ret;
+}
+
+/*
+ * In cpu_get_tb_cpu_state(), set VILL if RVV was not present.
+ * So RVV is also be checked in this function.
+ */
+static bool vext_check_isa_ill(DisasContext *s)
+{
+ return !s->vill;
+}
+
/* common translation macro */
-#define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK) \
-static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\
-{ \
- if (CHECK(s, a)) { \
- return OP(s, a, SEQ); \
- } \
- return false; \
+#define GEN_VEXT_TRANS(NAME, EEW, ARGTYPE, OP, CHECK) \
+static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE * a) \
+{ \
+ if (CHECK(s, a, EEW)) { \
+ return OP(s, a, EEW); \
+ } \
+ return false; \
+}
+
+static uint8_t vext_get_emul(DisasContext *s, uint8_t eew)
+{
+ int8_t emul = eew - s->sew + s->lmul;
+ return emul < 0 ? 0 : emul;
}
/*
@@ -154,7 +583,8 @@ typedef void gen_helper_ldst_us(TCGv_ptr, TCGv_ptr, TCGv,
TCGv_env, TCGv_i32);
static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
- gen_helper_ldst_us *fn, DisasContext *s)
+ gen_helper_ldst_us *fn, DisasContext *s,
+ bool is_store)
{
TCGv_ptr dest, mask;
TCGv base;
@@ -168,8 +598,8 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
base = get_gpr(s, rs1, EXT_NONE);
/*
- * As simd_desc supports at most 256 bytes, and in this implementation,
- * the max vector group length is 2048 bytes. So split it into two parts.
+ * As simd_desc supports at most 2048 bytes, and in this implementation,
+ * the max vector group length is 4096 bytes. So split it into two parts.
*
* The first part is vlen in bytes, encoded in maxsz of simd_desc.
* The second part is lmul, encoded in data of simd_desc.
@@ -183,123 +613,133 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
tcg_temp_free_ptr(dest);
tcg_temp_free_ptr(mask);
+
+ if (!is_store) {
+ mark_vs_dirty(s);
+ }
+
gen_set_label(over);
return true;
}
-static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
+static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
{
uint32_t data = 0;
gen_helper_ldst_us *fn;
- static gen_helper_ldst_us * const fns[2][7][4] = {
+ static gen_helper_ldst_us * const fns[2][4] = {
/* masked unit stride load */
- { { gen_helper_vlb_v_b_mask, gen_helper_vlb_v_h_mask,
- gen_helper_vlb_v_w_mask, gen_helper_vlb_v_d_mask },
- { NULL, gen_helper_vlh_v_h_mask,
- gen_helper_vlh_v_w_mask, gen_helper_vlh_v_d_mask },
- { NULL, NULL,
- gen_helper_vlw_v_w_mask, gen_helper_vlw_v_d_mask },
- { gen_helper_vle_v_b_mask, gen_helper_vle_v_h_mask,
- gen_helper_vle_v_w_mask, gen_helper_vle_v_d_mask },
- { gen_helper_vlbu_v_b_mask, gen_helper_vlbu_v_h_mask,
- gen_helper_vlbu_v_w_mask, gen_helper_vlbu_v_d_mask },
- { NULL, gen_helper_vlhu_v_h_mask,
- gen_helper_vlhu_v_w_mask, gen_helper_vlhu_v_d_mask },
- { NULL, NULL,
- gen_helper_vlwu_v_w_mask, gen_helper_vlwu_v_d_mask } },
+ { gen_helper_vle8_v_mask, gen_helper_vle16_v_mask,
+ gen_helper_vle32_v_mask, gen_helper_vle64_v_mask },
/* unmasked unit stride load */
- { { gen_helper_vlb_v_b, gen_helper_vlb_v_h,
- gen_helper_vlb_v_w, gen_helper_vlb_v_d },
- { NULL, gen_helper_vlh_v_h,
- gen_helper_vlh_v_w, gen_helper_vlh_v_d },
- { NULL, NULL,
- gen_helper_vlw_v_w, gen_helper_vlw_v_d },
- { gen_helper_vle_v_b, gen_helper_vle_v_h,
- gen_helper_vle_v_w, gen_helper_vle_v_d },
- { gen_helper_vlbu_v_b, gen_helper_vlbu_v_h,
- gen_helper_vlbu_v_w, gen_helper_vlbu_v_d },
- { NULL, gen_helper_vlhu_v_h,
- gen_helper_vlhu_v_w, gen_helper_vlhu_v_d },
- { NULL, NULL,
- gen_helper_vlwu_v_w, gen_helper_vlwu_v_d } }
+ { gen_helper_vle8_v, gen_helper_vle16_v,
+ gen_helper_vle32_v, gen_helper_vle64_v }
};
- fn = fns[a->vm][seq][s->sew];
+ fn = fns[a->vm][eew];
if (fn == NULL) {
return false;
}
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
+ /*
+ * Vector load/store instructions have the EEW encoded
+ * directly in the instructions. The maximum vector size is
+ * calculated with EMUL rather than LMUL.
+ */
+ uint8_t emul = vext_get_emul(s, eew);
data = FIELD_DP32(data, VDATA, VM, a->vm);
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
- return ldst_us_trans(a->rd, a->rs1, data, fn, s);
+ return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
}
-static bool ld_us_check(DisasContext *s, arg_r2nfvm* a)
+static bool ld_us_check(DisasContext *s, arg_r2nfvm* a, uint8_t eew)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_nf(s, a->nf));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_load(s, a->rd, a->nf, a->vm, eew);
}
-GEN_VEXT_TRANS(vlb_v, 0, r2nfvm, ld_us_op, ld_us_check)
-GEN_VEXT_TRANS(vlh_v, 1, r2nfvm, ld_us_op, ld_us_check)
-GEN_VEXT_TRANS(vlw_v, 2, r2nfvm, ld_us_op, ld_us_check)
-GEN_VEXT_TRANS(vle_v, 3, r2nfvm, ld_us_op, ld_us_check)
-GEN_VEXT_TRANS(vlbu_v, 4, r2nfvm, ld_us_op, ld_us_check)
-GEN_VEXT_TRANS(vlhu_v, 5, r2nfvm, ld_us_op, ld_us_check)
-GEN_VEXT_TRANS(vlwu_v, 6, r2nfvm, ld_us_op, ld_us_check)
+GEN_VEXT_TRANS(vle8_v, MO_8, r2nfvm, ld_us_op, ld_us_check)
+GEN_VEXT_TRANS(vle16_v, MO_16, r2nfvm, ld_us_op, ld_us_check)
+GEN_VEXT_TRANS(vle32_v, MO_32, r2nfvm, ld_us_op, ld_us_check)
+GEN_VEXT_TRANS(vle64_v, MO_64, r2nfvm, ld_us_op, ld_us_check)
-static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
+static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
{
uint32_t data = 0;
gen_helper_ldst_us *fn;
- static gen_helper_ldst_us * const fns[2][4][4] = {
- /* masked unit stride load and store */
- { { gen_helper_vsb_v_b_mask, gen_helper_vsb_v_h_mask,
- gen_helper_vsb_v_w_mask, gen_helper_vsb_v_d_mask },
- { NULL, gen_helper_vsh_v_h_mask,
- gen_helper_vsh_v_w_mask, gen_helper_vsh_v_d_mask },
- { NULL, NULL,
- gen_helper_vsw_v_w_mask, gen_helper_vsw_v_d_mask },
- { gen_helper_vse_v_b_mask, gen_helper_vse_v_h_mask,
- gen_helper_vse_v_w_mask, gen_helper_vse_v_d_mask } },
+ static gen_helper_ldst_us * const fns[2][4] = {
+ /* masked unit stride store */
+ { gen_helper_vse8_v_mask, gen_helper_vse16_v_mask,
+ gen_helper_vse32_v_mask, gen_helper_vse64_v_mask },
/* unmasked unit stride store */
- { { gen_helper_vsb_v_b, gen_helper_vsb_v_h,
- gen_helper_vsb_v_w, gen_helper_vsb_v_d },
- { NULL, gen_helper_vsh_v_h,
- gen_helper_vsh_v_w, gen_helper_vsh_v_d },
- { NULL, NULL,
- gen_helper_vsw_v_w, gen_helper_vsw_v_d },
- { gen_helper_vse_v_b, gen_helper_vse_v_h,
- gen_helper_vse_v_w, gen_helper_vse_v_d } }
+ { gen_helper_vse8_v, gen_helper_vse16_v,
+ gen_helper_vse32_v, gen_helper_vse64_v }
};
- fn = fns[a->vm][seq][s->sew];
+ fn = fns[a->vm][eew];
if (fn == NULL) {
return false;
}
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
+ uint8_t emul = vext_get_emul(s, eew);
data = FIELD_DP32(data, VDATA, VM, a->vm);
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
- return ldst_us_trans(a->rd, a->rs1, data, fn, s);
+ return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
}
-static bool st_us_check(DisasContext *s, arg_r2nfvm* a)
+static bool st_us_check(DisasContext *s, arg_r2nfvm* a, uint8_t eew)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_nf(s, a->nf));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_store(s, a->rd, a->nf, eew);
}
-GEN_VEXT_TRANS(vsb_v, 0, r2nfvm, st_us_op, st_us_check)
-GEN_VEXT_TRANS(vsh_v, 1, r2nfvm, st_us_op, st_us_check)
-GEN_VEXT_TRANS(vsw_v, 2, r2nfvm, st_us_op, st_us_check)
-GEN_VEXT_TRANS(vse_v, 3, r2nfvm, st_us_op, st_us_check)
+GEN_VEXT_TRANS(vse8_v, MO_8, r2nfvm, st_us_op, st_us_check)
+GEN_VEXT_TRANS(vse16_v, MO_16, r2nfvm, st_us_op, st_us_check)
+GEN_VEXT_TRANS(vse32_v, MO_32, r2nfvm, st_us_op, st_us_check)
+GEN_VEXT_TRANS(vse64_v, MO_64, r2nfvm, st_us_op, st_us_check)
+
+/*
+ *** unit stride mask load and store
+ */
+static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew)
+{
+ uint32_t data = 0;
+ gen_helper_ldst_us *fn = gen_helper_vlm_v;
+
+ /* EMUL = 1, NFIELDS = 1 */
+ data = FIELD_DP32(data, VDATA, LMUL, 0);
+ data = FIELD_DP32(data, VDATA, NF, 1);
+ return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
+}
+
+static bool ld_us_mask_check(DisasContext *s, arg_vlm_v *a, uint8_t eew)
+{
+ /* EMUL = 1, NFIELDS = 1 */
+ return require_rvv(s) && vext_check_isa_ill(s);
+}
+
+static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, uint8_t eew)
+{
+ uint32_t data = 0;
+ gen_helper_ldst_us *fn = gen_helper_vsm_v;
+
+ /* EMUL = 1, NFIELDS = 1 */
+ data = FIELD_DP32(data, VDATA, LMUL, 0);
+ data = FIELD_DP32(data, VDATA, NF, 1);
+ return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
+}
+
+static bool st_us_mask_check(DisasContext *s, arg_vsm_v *a, uint8_t eew)
+{
+ /* EMUL = 1, NFIELDS = 1 */
+ return require_rvv(s) && vext_check_isa_ill(s);
+}
+
+GEN_VEXT_TRANS(vlm_v, MO_8, vlm_v, ld_us_mask_op, ld_us_mask_check)
+GEN_VEXT_TRANS(vsm_v, MO_8, vsm_v, st_us_mask_op, st_us_mask_check)
/*
*** stride load and store
@@ -309,7 +749,7 @@ typedef void gen_helper_ldst_stride(TCGv_ptr, TCGv_ptr, TCGv,
static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
uint32_t data, gen_helper_ldst_stride *fn,
- DisasContext *s)
+ DisasContext *s, bool is_store)
{
TCGv_ptr dest, mask;
TCGv base, stride;
@@ -331,98 +771,81 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
tcg_temp_free_ptr(dest);
tcg_temp_free_ptr(mask);
+
+ if (!is_store) {
+ mark_vs_dirty(s);
+ }
+
gen_set_label(over);
return true;
}
-static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
+static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
{
uint32_t data = 0;
gen_helper_ldst_stride *fn;
- static gen_helper_ldst_stride * const fns[7][4] = {
- { gen_helper_vlsb_v_b, gen_helper_vlsb_v_h,
- gen_helper_vlsb_v_w, gen_helper_vlsb_v_d },
- { NULL, gen_helper_vlsh_v_h,
- gen_helper_vlsh_v_w, gen_helper_vlsh_v_d },
- { NULL, NULL,
- gen_helper_vlsw_v_w, gen_helper_vlsw_v_d },
- { gen_helper_vlse_v_b, gen_helper_vlse_v_h,
- gen_helper_vlse_v_w, gen_helper_vlse_v_d },
- { gen_helper_vlsbu_v_b, gen_helper_vlsbu_v_h,
- gen_helper_vlsbu_v_w, gen_helper_vlsbu_v_d },
- { NULL, gen_helper_vlshu_v_h,
- gen_helper_vlshu_v_w, gen_helper_vlshu_v_d },
- { NULL, NULL,
- gen_helper_vlswu_v_w, gen_helper_vlswu_v_d },
+ static gen_helper_ldst_stride * const fns[4] = {
+ gen_helper_vlse8_v, gen_helper_vlse16_v,
+ gen_helper_vlse32_v, gen_helper_vlse64_v
};
- fn = fns[seq][s->sew];
+ fn = fns[eew];
if (fn == NULL) {
return false;
}
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
+ uint8_t emul = vext_get_emul(s, eew);
data = FIELD_DP32(data, VDATA, VM, a->vm);
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
- return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
+ return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
}
-static bool ld_stride_check(DisasContext *s, arg_rnfvm* a)
+static bool ld_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_nf(s, a->nf));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_load(s, a->rd, a->nf, a->vm, eew);
}
-GEN_VEXT_TRANS(vlsb_v, 0, rnfvm, ld_stride_op, ld_stride_check)
-GEN_VEXT_TRANS(vlsh_v, 1, rnfvm, ld_stride_op, ld_stride_check)
-GEN_VEXT_TRANS(vlsw_v, 2, rnfvm, ld_stride_op, ld_stride_check)
-GEN_VEXT_TRANS(vlse_v, 3, rnfvm, ld_stride_op, ld_stride_check)
-GEN_VEXT_TRANS(vlsbu_v, 4, rnfvm, ld_stride_op, ld_stride_check)
-GEN_VEXT_TRANS(vlshu_v, 5, rnfvm, ld_stride_op, ld_stride_check)
-GEN_VEXT_TRANS(vlswu_v, 6, rnfvm, ld_stride_op, ld_stride_check)
+GEN_VEXT_TRANS(vlse8_v, MO_8, rnfvm, ld_stride_op, ld_stride_check)
+GEN_VEXT_TRANS(vlse16_v, MO_16, rnfvm, ld_stride_op, ld_stride_check)
+GEN_VEXT_TRANS(vlse32_v, MO_32, rnfvm, ld_stride_op, ld_stride_check)
+GEN_VEXT_TRANS(vlse64_v, MO_64, rnfvm, ld_stride_op, ld_stride_check)
-static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
+static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
{
uint32_t data = 0;
gen_helper_ldst_stride *fn;
- static gen_helper_ldst_stride * const fns[4][4] = {
+ static gen_helper_ldst_stride * const fns[4] = {
/* masked stride store */
- { gen_helper_vssb_v_b, gen_helper_vssb_v_h,
- gen_helper_vssb_v_w, gen_helper_vssb_v_d },
- { NULL, gen_helper_vssh_v_h,
- gen_helper_vssh_v_w, gen_helper_vssh_v_d },
- { NULL, NULL,
- gen_helper_vssw_v_w, gen_helper_vssw_v_d },
- { gen_helper_vsse_v_b, gen_helper_vsse_v_h,
- gen_helper_vsse_v_w, gen_helper_vsse_v_d }
+ gen_helper_vsse8_v, gen_helper_vsse16_v,
+ gen_helper_vsse32_v, gen_helper_vsse64_v
};
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
+ uint8_t emul = vext_get_emul(s, eew);
data = FIELD_DP32(data, VDATA, VM, a->vm);
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
- fn = fns[seq][s->sew];
+ fn = fns[eew];
if (fn == NULL) {
return false;
}
- return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
+ return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
}
-static bool st_stride_check(DisasContext *s, arg_rnfvm* a)
+static bool st_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_nf(s, a->nf));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_store(s, a->rd, a->nf, eew);
}
-GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check)
-GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check)
-GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check)
-GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check)
+GEN_VEXT_TRANS(vsse8_v, MO_8, rnfvm, st_stride_op, st_stride_check)
+GEN_VEXT_TRANS(vsse16_v, MO_16, rnfvm, st_stride_op, st_stride_check)
+GEN_VEXT_TRANS(vsse32_v, MO_32, rnfvm, st_stride_op, st_stride_check)
+GEN_VEXT_TRANS(vsse64_v, MO_64, rnfvm, st_stride_op, st_stride_check)
/*
*** index load and store
@@ -432,7 +855,7 @@ typedef void gen_helper_ldst_index(TCGv_ptr, TCGv_ptr, TCGv,
static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
uint32_t data, gen_helper_ldst_index *fn,
- DisasContext *s)
+ DisasContext *s, bool is_store)
{
TCGv_ptr dest, mask, index;
TCGv base;
@@ -456,107 +879,118 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
tcg_temp_free_ptr(dest);
tcg_temp_free_ptr(mask);
tcg_temp_free_ptr(index);
+
+ if (!is_store) {
+ mark_vs_dirty(s);
+ }
+
gen_set_label(over);
return true;
}
-static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
+static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
{
uint32_t data = 0;
gen_helper_ldst_index *fn;
- static gen_helper_ldst_index * const fns[7][4] = {
- { gen_helper_vlxb_v_b, gen_helper_vlxb_v_h,
- gen_helper_vlxb_v_w, gen_helper_vlxb_v_d },
- { NULL, gen_helper_vlxh_v_h,
- gen_helper_vlxh_v_w, gen_helper_vlxh_v_d },
- { NULL, NULL,
- gen_helper_vlxw_v_w, gen_helper_vlxw_v_d },
- { gen_helper_vlxe_v_b, gen_helper_vlxe_v_h,
- gen_helper_vlxe_v_w, gen_helper_vlxe_v_d },
- { gen_helper_vlxbu_v_b, gen_helper_vlxbu_v_h,
- gen_helper_vlxbu_v_w, gen_helper_vlxbu_v_d },
- { NULL, gen_helper_vlxhu_v_h,
- gen_helper_vlxhu_v_w, gen_helper_vlxhu_v_d },
- { NULL, NULL,
- gen_helper_vlxwu_v_w, gen_helper_vlxwu_v_d },
+ static gen_helper_ldst_index * const fns[4][4] = {
+ /*
+ * offset vector register group EEW = 8,
+ * data vector register group EEW = SEW
+ */
+ { gen_helper_vlxei8_8_v, gen_helper_vlxei8_16_v,
+ gen_helper_vlxei8_32_v, gen_helper_vlxei8_64_v },
+ /*
+ * offset vector register group EEW = 16,
+ * data vector register group EEW = SEW
+ */
+ { gen_helper_vlxei16_8_v, gen_helper_vlxei16_16_v,
+ gen_helper_vlxei16_32_v, gen_helper_vlxei16_64_v },
+ /*
+ * offset vector register group EEW = 32,
+ * data vector register group EEW = SEW
+ */
+ { gen_helper_vlxei32_8_v, gen_helper_vlxei32_16_v,
+ gen_helper_vlxei32_32_v, gen_helper_vlxei32_64_v },
+ /*
+ * offset vector register group EEW = 64,
+ * data vector register group EEW = SEW
+ */
+ { gen_helper_vlxei64_8_v, gen_helper_vlxei64_16_v,
+ gen_helper_vlxei64_32_v, gen_helper_vlxei64_64_v }
};
- fn = fns[seq][s->sew];
- if (fn == NULL) {
- return false;
- }
+ fn = fns[eew][s->sew];
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
+ uint8_t emul = vext_get_emul(s, s->sew);
data = FIELD_DP32(data, VDATA, VM, a->vm);
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
- return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
+ return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
}
-/*
- * For vector indexed segment loads, the destination vector register
- * groups cannot overlap the source vector register group (specified by
- * `vs2`), else an illegal instruction exception is raised.
- */
-static bool ld_index_check(DisasContext *s, arg_rnfvm* a)
+static bool ld_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_nf(s, a->nf) &&
- ((a->nf == 1) ||
- vext_check_overlap_group(a->rd, a->nf << s->lmul,
- a->rs2, 1 << s->lmul)));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew);
}
-GEN_VEXT_TRANS(vlxb_v, 0, rnfvm, ld_index_op, ld_index_check)
-GEN_VEXT_TRANS(vlxh_v, 1, rnfvm, ld_index_op, ld_index_check)
-GEN_VEXT_TRANS(vlxw_v, 2, rnfvm, ld_index_op, ld_index_check)
-GEN_VEXT_TRANS(vlxe_v, 3, rnfvm, ld_index_op, ld_index_check)
-GEN_VEXT_TRANS(vlxbu_v, 4, rnfvm, ld_index_op, ld_index_check)
-GEN_VEXT_TRANS(vlxhu_v, 5, rnfvm, ld_index_op, ld_index_check)
-GEN_VEXT_TRANS(vlxwu_v, 6, rnfvm, ld_index_op, ld_index_check)
+GEN_VEXT_TRANS(vlxei8_v, MO_8, rnfvm, ld_index_op, ld_index_check)
+GEN_VEXT_TRANS(vlxei16_v, MO_16, rnfvm, ld_index_op, ld_index_check)
+GEN_VEXT_TRANS(vlxei32_v, MO_32, rnfvm, ld_index_op, ld_index_check)
+GEN_VEXT_TRANS(vlxei64_v, MO_64, rnfvm, ld_index_op, ld_index_check)
-static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
+static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
{
uint32_t data = 0;
gen_helper_ldst_index *fn;
static gen_helper_ldst_index * const fns[4][4] = {
- { gen_helper_vsxb_v_b, gen_helper_vsxb_v_h,
- gen_helper_vsxb_v_w, gen_helper_vsxb_v_d },
- { NULL, gen_helper_vsxh_v_h,
- gen_helper_vsxh_v_w, gen_helper_vsxh_v_d },
- { NULL, NULL,
- gen_helper_vsxw_v_w, gen_helper_vsxw_v_d },
- { gen_helper_vsxe_v_b, gen_helper_vsxe_v_h,
- gen_helper_vsxe_v_w, gen_helper_vsxe_v_d }
+ /*
+ * offset vector register group EEW = 8,
+ * data vector register group EEW = SEW
+ */
+ { gen_helper_vsxei8_8_v, gen_helper_vsxei8_16_v,
+ gen_helper_vsxei8_32_v, gen_helper_vsxei8_64_v },
+ /*
+ * offset vector register group EEW = 16,
+ * data vector register group EEW = SEW
+ */
+ { gen_helper_vsxei16_8_v, gen_helper_vsxei16_16_v,
+ gen_helper_vsxei16_32_v, gen_helper_vsxei16_64_v },
+ /*
+ * offset vector register group EEW = 32,
+ * data vector register group EEW = SEW
+ */
+ { gen_helper_vsxei32_8_v, gen_helper_vsxei32_16_v,
+ gen_helper_vsxei32_32_v, gen_helper_vsxei32_64_v },
+ /*
+ * offset vector register group EEW = 64,
+ * data vector register group EEW = SEW
+ */
+ { gen_helper_vsxei64_8_v, gen_helper_vsxei64_16_v,
+ gen_helper_vsxei64_32_v, gen_helper_vsxei64_64_v }
};
- fn = fns[seq][s->sew];
- if (fn == NULL) {
- return false;
- }
+ fn = fns[eew][s->sew];
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
+ uint8_t emul = vext_get_emul(s, s->sew);
data = FIELD_DP32(data, VDATA, VM, a->vm);
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
- return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
+ return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
}
-static bool st_index_check(DisasContext *s, arg_rnfvm* a)
+static bool st_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_nf(s, a->nf));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_st_index(s, a->rd, a->rs2, a->nf, eew);
}
-GEN_VEXT_TRANS(vsxb_v, 0, rnfvm, st_index_op, st_index_check)
-GEN_VEXT_TRANS(vsxh_v, 1, rnfvm, st_index_op, st_index_check)
-GEN_VEXT_TRANS(vsxw_v, 2, rnfvm, st_index_op, st_index_check)
-GEN_VEXT_TRANS(vsxe_v, 3, rnfvm, st_index_op, st_index_check)
+GEN_VEXT_TRANS(vsxei8_v, MO_8, rnfvm, st_index_op, st_index_check)
+GEN_VEXT_TRANS(vsxei16_v, MO_16, rnfvm, st_index_op, st_index_check)
+GEN_VEXT_TRANS(vsxei32_v, MO_32, rnfvm, st_index_op, st_index_check)
+GEN_VEXT_TRANS(vsxei64_v, MO_64, rnfvm, st_index_op, st_index_check)
/*
*** unit stride fault-only-first load
@@ -583,203 +1017,125 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
tcg_temp_free_ptr(dest);
tcg_temp_free_ptr(mask);
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
-static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
+static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
{
uint32_t data = 0;
gen_helper_ldst_us *fn;
- static gen_helper_ldst_us * const fns[7][4] = {
- { gen_helper_vlbff_v_b, gen_helper_vlbff_v_h,
- gen_helper_vlbff_v_w, gen_helper_vlbff_v_d },
- { NULL, gen_helper_vlhff_v_h,
- gen_helper_vlhff_v_w, gen_helper_vlhff_v_d },
- { NULL, NULL,
- gen_helper_vlwff_v_w, gen_helper_vlwff_v_d },
- { gen_helper_vleff_v_b, gen_helper_vleff_v_h,
- gen_helper_vleff_v_w, gen_helper_vleff_v_d },
- { gen_helper_vlbuff_v_b, gen_helper_vlbuff_v_h,
- gen_helper_vlbuff_v_w, gen_helper_vlbuff_v_d },
- { NULL, gen_helper_vlhuff_v_h,
- gen_helper_vlhuff_v_w, gen_helper_vlhuff_v_d },
- { NULL, NULL,
- gen_helper_vlwuff_v_w, gen_helper_vlwuff_v_d }
+ static gen_helper_ldst_us * const fns[4] = {
+ gen_helper_vle8ff_v, gen_helper_vle16ff_v,
+ gen_helper_vle32ff_v, gen_helper_vle64ff_v
};
- fn = fns[seq][s->sew];
+ fn = fns[eew];
if (fn == NULL) {
return false;
}
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
+ uint8_t emul = vext_get_emul(s, eew);
data = FIELD_DP32(data, VDATA, VM, a->vm);
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
return ldff_trans(a->rd, a->rs1, data, fn, s);
}
-GEN_VEXT_TRANS(vlbff_v, 0, r2nfvm, ldff_op, ld_us_check)
-GEN_VEXT_TRANS(vlhff_v, 1, r2nfvm, ldff_op, ld_us_check)
-GEN_VEXT_TRANS(vlwff_v, 2, r2nfvm, ldff_op, ld_us_check)
-GEN_VEXT_TRANS(vleff_v, 3, r2nfvm, ldff_op, ld_us_check)
-GEN_VEXT_TRANS(vlbuff_v, 4, r2nfvm, ldff_op, ld_us_check)
-GEN_VEXT_TRANS(vlhuff_v, 5, r2nfvm, ldff_op, ld_us_check)
-GEN_VEXT_TRANS(vlwuff_v, 6, r2nfvm, ldff_op, ld_us_check)
+GEN_VEXT_TRANS(vle8ff_v, MO_8, r2nfvm, ldff_op, ld_us_check)
+GEN_VEXT_TRANS(vle16ff_v, MO_16, r2nfvm, ldff_op, ld_us_check)
+GEN_VEXT_TRANS(vle32ff_v, MO_32, r2nfvm, ldff_op, ld_us_check)
+GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check)
/*
- *** vector atomic operation
+ * load and store whole register instructions
*/
-typedef void gen_helper_amo(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr,
- TCGv_env, TCGv_i32);
+typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32);
-static bool amo_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
- uint32_t data, gen_helper_amo *fn, DisasContext *s)
+static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
+ gen_helper_ldst_whole *fn, DisasContext *s,
+ bool is_store)
{
- TCGv_ptr dest, mask, index;
+ TCGv_ptr dest;
TCGv base;
TCGv_i32 desc;
- TCGLabel *over = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
-
+ uint32_t data = FIELD_DP32(0, VDATA, NF, nf);
dest = tcg_temp_new_ptr();
- mask = tcg_temp_new_ptr();
- index = tcg_temp_new_ptr();
- base = get_gpr(s, rs1, EXT_NONE);
desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
+ base = get_gpr(s, rs1, EXT_NONE);
tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
- tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2));
- tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
- fn(dest, mask, base, index, cpu_env, desc);
+ fn(dest, base, cpu_env, desc);
tcg_temp_free_ptr(dest);
- tcg_temp_free_ptr(mask);
- tcg_temp_free_ptr(index);
- gen_set_label(over);
- return true;
-}
-
-static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq)
-{
- uint32_t data = 0;
- gen_helper_amo *fn;
- static gen_helper_amo *const fnsw[9] = {
- /* no atomic operation */
- gen_helper_vamoswapw_v_w,
- gen_helper_vamoaddw_v_w,
- gen_helper_vamoxorw_v_w,
- gen_helper_vamoandw_v_w,
- gen_helper_vamoorw_v_w,
- gen_helper_vamominw_v_w,
- gen_helper_vamomaxw_v_w,
- gen_helper_vamominuw_v_w,
- gen_helper_vamomaxuw_v_w
- };
- static gen_helper_amo *const fnsd[18] = {
- gen_helper_vamoswapw_v_d,
- gen_helper_vamoaddw_v_d,
- gen_helper_vamoxorw_v_d,
- gen_helper_vamoandw_v_d,
- gen_helper_vamoorw_v_d,
- gen_helper_vamominw_v_d,
- gen_helper_vamomaxw_v_d,
- gen_helper_vamominuw_v_d,
- gen_helper_vamomaxuw_v_d,
- gen_helper_vamoswapd_v_d,
- gen_helper_vamoaddd_v_d,
- gen_helper_vamoxord_v_d,
- gen_helper_vamoandd_v_d,
- gen_helper_vamoord_v_d,
- gen_helper_vamomind_v_d,
- gen_helper_vamomaxd_v_d,
- gen_helper_vamominud_v_d,
- gen_helper_vamomaxud_v_d
- };
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
- gen_helper_exit_atomic(cpu_env);
- s->base.is_jmp = DISAS_NORETURN;
- return true;
+ if (!is_store) {
+ mark_vs_dirty(s);
}
- switch (s->sew) {
- case 0 ... 2:
- assert(seq < ARRAY_SIZE(fnsw));
- fn = fnsw[seq];
- break;
- case 3:
- /* XLEN check done in amo_check(). */
- assert(seq < ARRAY_SIZE(fnsd));
- fn = fnsd[seq];
- break;
- default:
- g_assert_not_reached();
- }
-
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
- data = FIELD_DP32(data, VDATA, VM, a->vm);
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
- data = FIELD_DP32(data, VDATA, WD, a->wd);
- return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
+ return true;
}
+
/*
- * There are two rules check here.
- *
- * 1. SEW must be at least as wide as the AMO memory element size.
- *
- * 2. If SEW is greater than XLEN, an illegal instruction exception is raised.
+ * load and store whole register instructions ignore vtype and vl setting.
+ * Thus, we don't need to check vill bit. (Section 7.9)
*/
-static bool amo_check(DisasContext *s, arg_rwdvm* a)
-{
- return (!s->vill && has_ext(s, RVA) &&
- (!a->wd || vext_check_overlap_mask(s, a->rd, a->vm, false)) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- ((1 << s->sew) <= sizeof(target_ulong)) &&
- ((1 << s->sew) >= 4));
-}
-
-static bool amo_check64(DisasContext *s, arg_rwdvm* a)
-{
- REQUIRE_64BIT(s);
- return amo_check(s, a);
-}
-
-GEN_VEXT_TRANS(vamoswapw_v, 0, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamoaddw_v, 1, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamoxorw_v, 2, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamoandw_v, 3, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamoorw_v, 4, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamominw_v, 5, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamomaxw_v, 6, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamominuw_v, 7, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamomaxuw_v, 8, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamoswapd_v, 9, rwdvm, amo_op, amo_check64)
-GEN_VEXT_TRANS(vamoaddd_v, 10, rwdvm, amo_op, amo_check64)
-GEN_VEXT_TRANS(vamoxord_v, 11, rwdvm, amo_op, amo_check64)
-GEN_VEXT_TRANS(vamoandd_v, 12, rwdvm, amo_op, amo_check64)
-GEN_VEXT_TRANS(vamoord_v, 13, rwdvm, amo_op, amo_check64)
-GEN_VEXT_TRANS(vamomind_v, 14, rwdvm, amo_op, amo_check64)
-GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check64)
-GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check64)
-GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check64)
+#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF, IS_STORE) \
+static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \
+{ \
+ if (require_rvv(s) && \
+ QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \
+ return ldst_whole_trans(a->rd, a->rs1, ARG_NF, gen_helper_##NAME, \
+ s, IS_STORE); \
+ } \
+ return false; \
+}
+
+GEN_LDST_WHOLE_TRANS(vl1re8_v, 1, false)
+GEN_LDST_WHOLE_TRANS(vl1re16_v, 1, false)
+GEN_LDST_WHOLE_TRANS(vl1re32_v, 1, false)
+GEN_LDST_WHOLE_TRANS(vl1re64_v, 1, false)
+GEN_LDST_WHOLE_TRANS(vl2re8_v, 2, false)
+GEN_LDST_WHOLE_TRANS(vl2re16_v, 2, false)
+GEN_LDST_WHOLE_TRANS(vl2re32_v, 2, false)
+GEN_LDST_WHOLE_TRANS(vl2re64_v, 2, false)
+GEN_LDST_WHOLE_TRANS(vl4re8_v, 4, false)
+GEN_LDST_WHOLE_TRANS(vl4re16_v, 4, false)
+GEN_LDST_WHOLE_TRANS(vl4re32_v, 4, false)
+GEN_LDST_WHOLE_TRANS(vl4re64_v, 4, false)
+GEN_LDST_WHOLE_TRANS(vl8re8_v, 8, false)
+GEN_LDST_WHOLE_TRANS(vl8re16_v, 8, false)
+GEN_LDST_WHOLE_TRANS(vl8re32_v, 8, false)
+GEN_LDST_WHOLE_TRANS(vl8re64_v, 8, false)
+
+GEN_LDST_WHOLE_TRANS(vs1r_v, 1, true)
+GEN_LDST_WHOLE_TRANS(vs2r_v, 2, true)
+GEN_LDST_WHOLE_TRANS(vs4r_v, 4, true)
+GEN_LDST_WHOLE_TRANS(vs8r_v, 8, true)
/*
*** Vector Integer Arithmetic Instructions
*/
-#define MAXSZ(s) (s->vlen >> (3 - s->lmul))
+
+/*
+ * MAXSZ returns the maximum vector size can be operated in bytes,
+ * which is used in GVEC IR when vl_eq_vlmax flag is set to true
+ * to accerlate vector operation.
+ */
+static inline uint32_t MAXSZ(DisasContext *s)
+{
+ int scale = s->lmul - 3;
+ return scale < 0 ? s->vlen >> -scale : s->vlen << scale;
+}
static bool opivv_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_reg(s, a->rs1, false));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
}
typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
@@ -803,13 +1159,13 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
} else {
uint32_t data = 0;
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
cpu_env, s->vlen / 8, s->vlen / 8, data, fn);
}
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
@@ -847,7 +1203,6 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
src2 = tcg_temp_new_ptr();
src1 = get_gpr(s, rs1, EXT_NONE);
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
data = FIELD_DP32(data, VDATA, VM, vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
@@ -861,16 +1216,16 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
tcg_temp_free_ptr(dest);
tcg_temp_free_ptr(mask);
tcg_temp_free_ptr(src2);
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
static bool opivx_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ss(s, a->rd, a->rs2, a->vm);
}
typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64,
@@ -892,6 +1247,7 @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
src1, MAXSZ(s), MAXSZ(s));
tcg_temp_free_i64(src1);
+ mark_vs_dirty(s);
return true;
}
return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
@@ -970,8 +1326,32 @@ static void tcg_gen_gvec_rsubs(unsigned vece, uint32_t dofs, uint32_t aofs,
GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs)
+typedef enum {
+ IMM_ZX, /* Zero-extended */
+ IMM_SX, /* Sign-extended */
+ IMM_TRUNC_SEW, /* Truncate to log(SEW) bits */
+ IMM_TRUNC_2SEW, /* Truncate to log(2*SEW) bits */
+} imm_mode_t;
+
+static int64_t extract_imm(DisasContext *s, uint32_t imm, imm_mode_t imm_mode)
+{
+ switch (imm_mode) {
+ case IMM_ZX:
+ return extract64(imm, 0, 5);
+ case IMM_SX:
+ return sextract64(imm, 0, 5);
+ case IMM_TRUNC_SEW:
+ return extract64(imm, 0, s->sew + 3);
+ case IMM_TRUNC_2SEW:
+ return extract64(imm, 0, s->sew + 4);
+ default:
+ g_assert_not_reached();
+ }
+}
+
static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
- gen_helper_opivx *fn, DisasContext *s, int zx)
+ gen_helper_opivx *fn, DisasContext *s,
+ imm_mode_t imm_mode)
{
TCGv_ptr dest, src2, mask;
TCGv src1;
@@ -984,12 +1364,8 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
dest = tcg_temp_new_ptr();
mask = tcg_temp_new_ptr();
src2 = tcg_temp_new_ptr();
- if (zx) {
- src1 = tcg_constant_tl(imm);
- } else {
- src1 = tcg_constant_tl(sextract64(imm, 0, 5));
- }
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
+ src1 = tcg_constant_tl(extract_imm(s, imm, imm_mode));
+
data = FIELD_DP32(data, VDATA, VM, vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
@@ -1003,6 +1379,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
tcg_temp_free_ptr(dest);
tcg_temp_free_ptr(mask);
tcg_temp_free_ptr(src2);
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
@@ -1012,28 +1389,23 @@ typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
static inline bool
do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
- gen_helper_opivx *fn, int zx)
+ gen_helper_opivx *fn, imm_mode_t imm_mode)
{
if (!opivx_check(s, a)) {
return false;
}
if (a->vm && s->vl_eq_vlmax) {
- if (zx) {
- gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
- extract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s));
- } else {
- gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
- sextract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s));
- }
- } else {
- return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, zx);
+ gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
+ extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s));
+ mark_vs_dirty(s);
+ return true;
}
- return true;
+ return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, imm_mode);
}
/* OPIVI with GVEC IR */
-#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, SUF) \
+#define GEN_OPIVI_GVEC_TRANS(NAME, IMM_MODE, OPIVX, SUF) \
static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
{ \
static gen_helper_opivx * const fns[4] = { \
@@ -1041,10 +1413,10 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \
}; \
return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \
- fns[s->sew], ZX); \
+ fns[s->sew], IMM_MODE); \
}
-GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi)
+GEN_OPIVI_GVEC_TRANS(vadd_vi, IMM_SX, vadd_vx, addi)
static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t c, uint32_t oprsz, uint32_t maxsz)
@@ -1053,23 +1425,16 @@ static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs,
tcg_gen_gvec_rsubs(vece, dofs, aofs, tmp, oprsz, maxsz);
}
-GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi)
+GEN_OPIVI_GVEC_TRANS(vrsub_vi, IMM_SX, vrsub_vx, rsubi)
/* Vector Widening Integer Add/Subtract */
/* OPIVV with WIDEN */
static bool opivv_widen_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, true) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_reg(s, a->rs1, false) &&
- vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
- 1 << s->lmul) &&
- vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
- 1 << s->lmul) &&
- (s->lmul < 0x3) && (s->sew < 0x3));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
}
static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
@@ -1081,7 +1446,6 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
@@ -1089,6 +1453,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
vreg_ofs(s, a->rs2),
cpu_env, s->vlen / 8, s->vlen / 8,
data, fn);
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
@@ -1114,13 +1479,9 @@ GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check)
/* OPIVX with WIDEN */
static bool opivx_widen_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, true) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
- 1 << s->lmul) &&
- (s->lmul < 0x3) && (s->sew < 0x3));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ds(s, a->rd, a->rs2, a->vm);
}
static bool do_opivx_widen(DisasContext *s, arg_rmrr *a,
@@ -1151,14 +1512,9 @@ GEN_OPIVX_WIDEN_TRANS(vwsub_vx)
/* WIDEN OPIVV with WIDEN */
static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, true) &&
- vext_check_reg(s, a->rs2, true) &&
- vext_check_reg(s, a->rs1, false) &&
- vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
- 1 << s->lmul) &&
- (s->lmul < 0x3) && (s->sew < 0x3));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm);
}
static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
@@ -1169,13 +1525,13 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1),
vreg_ofs(s, a->rs2),
cpu_env, s->vlen / 8, s->vlen / 8, data, fn);
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
@@ -1201,11 +1557,9 @@ GEN_OPIWV_WIDEN_TRANS(vwsub_wv)
/* WIDEN OPIVX with WIDEN */
static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, true) &&
- vext_check_reg(s, a->rs2, true) &&
- (s->lmul < 0x3) && (s->sew < 0x3));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_dd(s, a->rd, a->rs2, a->vm);
}
static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a,
@@ -1247,7 +1601,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
TCGLabel *over = gen_new_label(); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
\
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
@@ -1255,6 +1608,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
vreg_ofs(s, a->rs2), cpu_env, \
s->vlen / 8, s->vlen / 8, data, \
fns[s->sew]); \
+ mark_vs_dirty(s); \
gen_set_label(over); \
return true; \
} \
@@ -1263,15 +1617,14 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
/*
* For vadc and vsbc, an illegal instruction exception is raised if the
- * destination vector register is v0 and LMUL > 1. (Section 12.3)
+ * destination vector register is v0 and LMUL > 1. (Section 11.4)
*/
static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_reg(s, a->rs1, false) &&
- ((a->rd != 0) || (s->lmul == 0)));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ (a->rd != 0) &&
+ vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
}
GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check)
@@ -1283,11 +1636,9 @@ GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check)
*/
static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_reg(s, a->rs1, false) &&
- vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) &&
- vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_mss(s, a->rd, a->rs1, a->rs2);
}
GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check)
@@ -1295,10 +1646,10 @@ GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check)
static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- ((a->rd != 0) || (s->lmul == 0)));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ (a->rd != 0) &&
+ vext_check_ss(s, a->rd, a->rs2, a->vm);
}
/* OPIVX without GVEC IR */
@@ -1321,16 +1672,16 @@ GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check)
static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ms(s, a->rd, a->rs2);
}
GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check)
GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check)
/* OPIVI without GVEC IR */
-#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK) \
+#define GEN_OPIVI_TRANS(NAME, IMM_MODE, OPIVX, CHECK) \
static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
{ \
if (CHECK(s, a)) { \
@@ -1339,13 +1690,13 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \
}; \
return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \
- fns[s->sew], s, ZX); \
+ fns[s->sew], s, IMM_MODE); \
} \
return false; \
}
-GEN_OPIVI_TRANS(vadc_vim, 0, vadc_vxm, opivx_vadc_check)
-GEN_OPIVI_TRANS(vmadc_vim, 0, vmadc_vxm, opivx_vmadc_check)
+GEN_OPIVI_TRANS(vadc_vim, IMM_SX, vadc_vxm, opivx_vadc_check)
+GEN_OPIVI_TRANS(vmadc_vim, IMM_SX, vmadc_vxm, opivx_vmadc_check)
/* Vector Bitwise Logical Instructions */
GEN_OPIVV_GVEC_TRANS(vand_vv, and)
@@ -1354,9 +1705,9 @@ GEN_OPIVV_GVEC_TRANS(vxor_vv, xor)
GEN_OPIVX_GVEC_TRANS(vand_vx, ands)
GEN_OPIVX_GVEC_TRANS(vor_vx, ors)
GEN_OPIVX_GVEC_TRANS(vxor_vx, xors)
-GEN_OPIVI_GVEC_TRANS(vand_vi, 0, vand_vx, andi)
-GEN_OPIVI_GVEC_TRANS(vor_vi, 0, vor_vx, ori)
-GEN_OPIVI_GVEC_TRANS(vxor_vi, 0, vxor_vx, xori)
+GEN_OPIVI_GVEC_TRANS(vand_vi, IMM_SX, vand_vx, andi)
+GEN_OPIVI_GVEC_TRANS(vor_vi, IMM_SX, vor_vx, ori)
+GEN_OPIVI_GVEC_TRANS(vxor_vi, IMM_SX, vxor_vx, xori)
/* Vector Single-Width Bit Shift Instructions */
GEN_OPIVV_GVEC_TRANS(vsll_vv, shlv)
@@ -1383,6 +1734,7 @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
src1, MAXSZ(s), MAXSZ(s));
tcg_temp_free_i32(src1);
+ mark_vs_dirty(s);
return true;
}
return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
@@ -1403,28 +1755,23 @@ GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx, shls)
GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx, shrs)
GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars)
-GEN_OPIVI_GVEC_TRANS(vsll_vi, 1, vsll_vx, shli)
-GEN_OPIVI_GVEC_TRANS(vsrl_vi, 1, vsrl_vx, shri)
-GEN_OPIVI_GVEC_TRANS(vsra_vi, 1, vsra_vx, sari)
+GEN_OPIVI_GVEC_TRANS(vsll_vi, IMM_TRUNC_SEW, vsll_vx, shli)
+GEN_OPIVI_GVEC_TRANS(vsrl_vi, IMM_TRUNC_SEW, vsrl_vx, shri)
+GEN_OPIVI_GVEC_TRANS(vsra_vi, IMM_TRUNC_SEW, vsra_vx, sari)
/* Vector Narrowing Integer Right Shift Instructions */
-static bool opivv_narrow_check(DisasContext *s, arg_rmrr *a)
+static bool opiwv_narrow_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, true) &&
- vext_check_reg(s, a->rs1, false) &&
- vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
- 2 << s->lmul) &&
- (s->lmul < 0x3) && (s->sew < 0x3));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_sds(s, a->rd, a->rs1, a->rs2, a->vm);
}
/* OPIVV with NARROW */
-#define GEN_OPIVV_NARROW_TRANS(NAME) \
+#define GEN_OPIWV_NARROW_TRANS(NAME) \
static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
{ \
- if (opivv_narrow_check(s, a)) { \
+ if (opiwv_narrow_check(s, a)) { \
uint32_t data = 0; \
static gen_helper_gvec_4_ptr * const fns[3] = { \
gen_helper_##NAME##_b, \
@@ -1434,7 +1781,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
TCGLabel *over = gen_new_label(); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
\
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
@@ -1442,30 +1788,27 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
vreg_ofs(s, a->rs2), cpu_env, \
s->vlen / 8, s->vlen / 8, data, \
fns[s->sew]); \
+ mark_vs_dirty(s); \
gen_set_label(over); \
return true; \
} \
return false; \
}
-GEN_OPIVV_NARROW_TRANS(vnsra_vv)
-GEN_OPIVV_NARROW_TRANS(vnsrl_vv)
+GEN_OPIWV_NARROW_TRANS(vnsra_wv)
+GEN_OPIWV_NARROW_TRANS(vnsrl_wv)
-static bool opivx_narrow_check(DisasContext *s, arg_rmrr *a)
+static bool opiwx_narrow_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, true) &&
- vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
- 2 << s->lmul) &&
- (s->lmul < 0x3) && (s->sew < 0x3));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_sd(s, a->rd, a->rs2, a->vm);
}
/* OPIVX with NARROW */
-#define GEN_OPIVX_NARROW_TRANS(NAME) \
+#define GEN_OPIWX_NARROW_TRANS(NAME) \
static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
{ \
- if (opivx_narrow_check(s, a)) { \
+ if (opiwx_narrow_check(s, a)) { \
static gen_helper_opivx * const fns[3] = { \
gen_helper_##NAME##_b, \
gen_helper_##NAME##_h, \
@@ -1476,27 +1819,27 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
return false; \
}
-GEN_OPIVX_NARROW_TRANS(vnsra_vx)
-GEN_OPIVX_NARROW_TRANS(vnsrl_vx)
+GEN_OPIWX_NARROW_TRANS(vnsra_wx)
+GEN_OPIWX_NARROW_TRANS(vnsrl_wx)
-/* OPIVI with NARROW */
-#define GEN_OPIVI_NARROW_TRANS(NAME, ZX, OPIVX) \
+/* OPIWI with NARROW */
+#define GEN_OPIWI_NARROW_TRANS(NAME, IMM_MODE, OPIVX) \
static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
{ \
- if (opivx_narrow_check(s, a)) { \
+ if (opiwx_narrow_check(s, a)) { \
static gen_helper_opivx * const fns[3] = { \
gen_helper_##OPIVX##_b, \
gen_helper_##OPIVX##_h, \
gen_helper_##OPIVX##_w, \
}; \
return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \
- fns[s->sew], s, ZX); \
+ fns[s->sew], s, IMM_MODE); \
} \
return false; \
}
-GEN_OPIVI_NARROW_TRANS(vnsra_vi, 1, vnsra_vx)
-GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx)
+GEN_OPIWI_NARROW_TRANS(vnsra_wi, IMM_ZX, vnsra_wx)
+GEN_OPIWI_NARROW_TRANS(vnsrl_wi, IMM_ZX, vnsrl_wx)
/* Vector Integer Comparison Instructions */
/*
@@ -1506,13 +1849,11 @@ GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx)
*/
static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_reg(s, a->rs1, false) &&
- ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) &&
- vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) ||
- (s->lmul == 0)));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_mss(s, a->rd, a->rs1, a->rs2);
}
+
GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check)
GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check)
GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check)
@@ -1522,10 +1863,9 @@ GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check)
static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rs2, false) &&
- (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) ||
- (s->lmul == 0)));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ms(s, a->rd, a->rs2);
}
GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check)
@@ -1537,12 +1877,12 @@ GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check)
GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check)
GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check)
-GEN_OPIVI_TRANS(vmseq_vi, 0, vmseq_vx, opivx_cmp_check)
-GEN_OPIVI_TRANS(vmsne_vi, 0, vmsne_vx, opivx_cmp_check)
-GEN_OPIVI_TRANS(vmsleu_vi, 1, vmsleu_vx, opivx_cmp_check)
-GEN_OPIVI_TRANS(vmsle_vi, 0, vmsle_vx, opivx_cmp_check)
-GEN_OPIVI_TRANS(vmsgtu_vi, 1, vmsgtu_vx, opivx_cmp_check)
-GEN_OPIVI_TRANS(vmsgt_vi, 0, vmsgt_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmseq_vi, IMM_SX, vmseq_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmsne_vi, IMM_SX, vmsne_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmsleu_vi, IMM_SX, vmsleu_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmsle_vi, IMM_SX, vmsle_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmsgtu_vi, IMM_SX, vmsgtu_vx, opivx_cmp_check)
+GEN_OPIVI_TRANS(vmsgt_vi, IMM_SX, vmsgt_vx, opivx_cmp_check)
/* Vector Integer Min/Max Instructions */
GEN_OPIVV_GVEC_TRANS(vminu_vv, umin)
@@ -1604,10 +1944,10 @@ GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx)
/* Vector Integer Merge and Move Instructions */
static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
{
- if (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs1, false)) {
-
+ if (require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ /* vmv.v.v has rs2 = 0 and vm = 1 */
+ vext_check_sss(s, a->rd, a->rs1, 0, 1)) {
if (s->vl_eq_vlmax) {
tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd),
vreg_ofs(s, a->rs1),
@@ -1626,6 +1966,7 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
fns[s->sew]);
gen_set_label(over);
}
+ mark_vs_dirty(s);
return true;
}
return false;
@@ -1634,9 +1975,10 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
typedef void gen_helper_vmv_vx(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32);
static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
{
- if (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false)) {
-
+ if (require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ /* vmv.v.x has rs2 = 0 and vm = 1 */
+ vext_check_ss(s, a->rd, 0, 1)) {
TCGv s1;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
@@ -1665,6 +2007,7 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
tcg_temp_free_i64(s1_i64);
}
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
@@ -1673,13 +2016,15 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
{
- if (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false)) {
-
+ if (require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ /* vmv.v.i has rs2 = 0 and vm = 1 */
+ vext_check_ss(s, a->rd, 0, 1)) {
int64_t simm = sextract64(a->rs1, 0, 5);
if (s->vl_eq_vlmax) {
tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd),
MAXSZ(s), MAXSZ(s), simm);
+ mark_vs_dirty(s);
} else {
TCGv_i32 desc;
TCGv_i64 s1;
@@ -1699,6 +2044,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
fns[s->sew](dest, s1, cpu_env, desc);
tcg_temp_free_ptr(dest);
+ mark_vs_dirty(s);
gen_set_label(over);
}
return true;
@@ -1708,7 +2054,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check)
GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check)
-GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vadc_check)
+GEN_OPIVI_TRANS(vmerge_vim, IMM_SX, vmerge_vxm, opivx_vadc_check)
/*
*** Vector Fixed-Point Arithmetic Instructions
@@ -1723,48 +2069,69 @@ GEN_OPIVX_TRANS(vsaddu_vx, opivx_check)
GEN_OPIVX_TRANS(vsadd_vx, opivx_check)
GEN_OPIVX_TRANS(vssubu_vx, opivx_check)
GEN_OPIVX_TRANS(vssub_vx, opivx_check)
-GEN_OPIVI_TRANS(vsaddu_vi, 1, vsaddu_vx, opivx_check)
-GEN_OPIVI_TRANS(vsadd_vi, 0, vsadd_vx, opivx_check)
+GEN_OPIVI_TRANS(vsaddu_vi, IMM_SX, vsaddu_vx, opivx_check)
+GEN_OPIVI_TRANS(vsadd_vi, IMM_SX, vsadd_vx, opivx_check)
/* Vector Single-Width Averaging Add and Subtract */
GEN_OPIVV_TRANS(vaadd_vv, opivv_check)
+GEN_OPIVV_TRANS(vaaddu_vv, opivv_check)
GEN_OPIVV_TRANS(vasub_vv, opivv_check)
+GEN_OPIVV_TRANS(vasubu_vv, opivv_check)
GEN_OPIVX_TRANS(vaadd_vx, opivx_check)
+GEN_OPIVX_TRANS(vaaddu_vx, opivx_check)
GEN_OPIVX_TRANS(vasub_vx, opivx_check)
-GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check)
+GEN_OPIVX_TRANS(vasubu_vx, opivx_check)
/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
GEN_OPIVV_TRANS(vsmul_vv, opivv_check)
GEN_OPIVX_TRANS(vsmul_vx, opivx_check)
-/* Vector Widening Saturating Scaled Multiply-Add */
-GEN_OPIVV_WIDEN_TRANS(vwsmaccu_vv, opivv_widen_check)
-GEN_OPIVV_WIDEN_TRANS(vwsmacc_vv, opivv_widen_check)
-GEN_OPIVV_WIDEN_TRANS(vwsmaccsu_vv, opivv_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx)
-GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx)
-GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx)
-GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx)
-
/* Vector Single-Width Scaling Shift Instructions */
GEN_OPIVV_TRANS(vssrl_vv, opivv_check)
GEN_OPIVV_TRANS(vssra_vv, opivv_check)
GEN_OPIVX_TRANS(vssrl_vx, opivx_check)
GEN_OPIVX_TRANS(vssra_vx, opivx_check)
-GEN_OPIVI_TRANS(vssrl_vi, 1, vssrl_vx, opivx_check)
-GEN_OPIVI_TRANS(vssra_vi, 0, vssra_vx, opivx_check)
+GEN_OPIVI_TRANS(vssrl_vi, IMM_TRUNC_SEW, vssrl_vx, opivx_check)
+GEN_OPIVI_TRANS(vssra_vi, IMM_TRUNC_SEW, vssra_vx, opivx_check)
/* Vector Narrowing Fixed-Point Clip Instructions */
-GEN_OPIVV_NARROW_TRANS(vnclipu_vv)
-GEN_OPIVV_NARROW_TRANS(vnclip_vv)
-GEN_OPIVX_NARROW_TRANS(vnclipu_vx)
-GEN_OPIVX_NARROW_TRANS(vnclip_vx)
-GEN_OPIVI_NARROW_TRANS(vnclipu_vi, 1, vnclipu_vx)
-GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx)
+GEN_OPIWV_NARROW_TRANS(vnclipu_wv)
+GEN_OPIWV_NARROW_TRANS(vnclip_wv)
+GEN_OPIWX_NARROW_TRANS(vnclipu_wx)
+GEN_OPIWX_NARROW_TRANS(vnclip_wx)
+GEN_OPIWI_NARROW_TRANS(vnclipu_wi, IMM_ZX, vnclipu_wx)
+GEN_OPIWI_NARROW_TRANS(vnclip_wi, IMM_ZX, vnclip_wx)
/*
*** Vector Float Point Arithmetic Instructions
*/
+
+/*
+ * As RVF-only cpus always have values NaN-boxed to 64-bits,
+ * RVF and RVD can be treated equally.
+ * We don't have to deal with the cases of: SEW > FLEN.
+ *
+ * If SEW < FLEN, check whether input fp register is a valid
+ * NaN-boxed value, in which case the least-significant SEW bits
+ * of the f regsiter are used, else the canonical NaN value is used.
+ */
+static void do_nanbox(DisasContext *s, TCGv_i64 out, TCGv_i64 in)
+{
+ switch (s->sew) {
+ case 1:
+ gen_check_nanbox_h(out, in);
+ break;
+ case 2:
+ gen_check_nanbox_s(out, in);
+ break;
+ case 3:
+ tcg_gen_mov_i64(out, in);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
/* Vector Single-Width Floating-Point Add/Subtract Instructions */
/*
@@ -1773,12 +2140,10 @@ GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx)
*/
static bool opfvv_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_reg(s, a->rs1, false) &&
- (s->sew != 0));
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
}
/* OPFVV without GVEC IR */
@@ -1793,10 +2158,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
gen_helper_##NAME##_d, \
}; \
TCGLabel *over = gen_new_label(); \
- gen_set_rm(s, 7); \
+ gen_set_rm(s, RISCV_FRM_DYN); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
\
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
@@ -1804,6 +2168,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
vreg_ofs(s, a->rs2), cpu_env, \
s->vlen / 8, s->vlen / 8, data, \
fns[s->sew - 1]); \
+ mark_vs_dirty(s); \
gen_set_label(over); \
return true; \
} \
@@ -1820,6 +2185,7 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
{
TCGv_ptr dest, src2, mask;
TCGv_i32 desc;
+ TCGv_i64 t1;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
@@ -1833,26 +2199,31 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
- fn(dest, mask, cpu_fpr[rs1], src2, cpu_env, desc);
+ /* NaN-box f[rs1] */
+ t1 = tcg_temp_new_i64();
+ do_nanbox(s, t1, cpu_fpr[rs1]);
+
+ fn(dest, mask, t1, src2, cpu_env, desc);
tcg_temp_free_ptr(dest);
tcg_temp_free_ptr(mask);
tcg_temp_free_ptr(src2);
+ tcg_temp_free_i64(t1);
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
-static bool opfvf_check(DisasContext *s, arg_rmrr *a)
-{
/*
* If the current SEW does not correspond to a supported IEEE floating-point
* type, an illegal instruction exception is raised
*/
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- (s->sew != 0));
+static bool opfvf_check(DisasContext *s, arg_rmrr *a)
+{
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ss(s, a->rd, a->rs2, a->vm);
}
/* OPFVF without GVEC IR */
@@ -1866,8 +2237,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
gen_helper_##NAME##_w, \
gen_helper_##NAME##_d, \
}; \
- gen_set_rm(s, 7); \
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
+ gen_set_rm(s, RISCV_FRM_DYN); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
@@ -1883,16 +2253,10 @@ GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check)
/* Vector Widening Floating-Point Add/Subtract Instructions */
static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, true) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_reg(s, a->rs1, false) &&
- vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
- 1 << s->lmul) &&
- vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
- 1 << s->lmul) &&
- (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
}
/* OPFVV with WIDEN */
@@ -1905,10 +2269,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
gen_helper_##NAME##_h, gen_helper_##NAME##_w, \
}; \
TCGLabel *over = gen_new_label(); \
- gen_set_rm(s, 7); \
+ gen_set_rm(s, RISCV_FRM_DYN); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
\
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
@@ -1916,6 +2279,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
vreg_ofs(s, a->rs2), cpu_env, \
s->vlen / 8, s->vlen / 8, data, \
fns[s->sew - 1]); \
+ mark_vs_dirty(s); \
gen_set_label(over); \
return true; \
} \
@@ -1927,13 +2291,10 @@ GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check)
static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, true) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
- 1 << s->lmul) &&
- (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ds(s, a->rd, a->rs2, a->vm);
}
/* OPFVF with WIDEN */
@@ -1945,8 +2306,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
static gen_helper_opfvf *const fns[2] = { \
gen_helper_##NAME##_h, gen_helper_##NAME##_w, \
}; \
- gen_set_rm(s, 7); \
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
+ gen_set_rm(s, RISCV_FRM_DYN); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
@@ -1960,14 +2320,10 @@ GEN_OPFVF_WIDEN_TRANS(vfwsub_vf)
static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, true) &&
- vext_check_reg(s, a->rs2, true) &&
- vext_check_reg(s, a->rs1, false) &&
- vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1,
- 1 << s->lmul) &&
- (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm);
}
/* WIDEN OPFVV with WIDEN */
@@ -1980,10 +2336,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
gen_helper_##NAME##_h, gen_helper_##NAME##_w, \
}; \
TCGLabel *over = gen_new_label(); \
- gen_set_rm(s, 7); \
+ gen_set_rm(s, RISCV_FRM_DYN); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
\
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
@@ -1991,6 +2346,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
vreg_ofs(s, a->rs2), cpu_env, \
s->vlen / 8, s->vlen / 8, data, \
fns[s->sew - 1]); \
+ mark_vs_dirty(s); \
gen_set_label(over); \
return true; \
} \
@@ -2002,11 +2358,10 @@ GEN_OPFWV_WIDEN_TRANS(vfwsub_wv)
static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, true) &&
- vext_check_reg(s, a->rs2, true) &&
- (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_dd(s, a->rd, a->rs2, a->vm);
}
/* WIDEN OPFVF with WIDEN */
@@ -2018,8 +2373,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
static gen_helper_opfvf *const fns[2] = { \
gen_helper_##NAME##_h, gen_helper_##NAME##_w, \
}; \
- gen_set_rm(s, 7); \
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
+ gen_set_rm(s, RISCV_FRM_DYN); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
@@ -2078,41 +2432,54 @@ GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf)
*/
static bool opfv_check(DisasContext *s, arg_rmr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- (s->sew != 0));
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ /* OPFV instructions ignore vs1 check */
+ vext_check_ss(s, a->rd, a->rs2, a->vm);
}
-#define GEN_OPFV_TRANS(NAME, CHECK) \
-static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
-{ \
- if (CHECK(s, a)) { \
- uint32_t data = 0; \
- static gen_helper_gvec_3_ptr * const fns[3] = { \
- gen_helper_##NAME##_h, \
- gen_helper_##NAME##_w, \
- gen_helper_##NAME##_d, \
- }; \
- TCGLabel *over = gen_new_label(); \
- gen_set_rm(s, 7); \
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
- \
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
- data = FIELD_DP32(data, VDATA, VM, a->vm); \
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
- tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
- vreg_ofs(s, a->rs2), cpu_env, \
- s->vlen / 8, s->vlen / 8, data, \
- fns[s->sew - 1]); \
- gen_set_label(over); \
- return true; \
- } \
- return false; \
+static bool do_opfv(DisasContext *s, arg_rmr *a,
+ gen_helper_gvec_3_ptr *fn,
+ bool (*checkfn)(DisasContext *, arg_rmr *),
+ int rm)
+{
+ if (checkfn(s, a)) {
+ if (rm != RISCV_FRM_DYN) {
+ gen_set_rm(s, RISCV_FRM_DYN);
+ }
+
+ uint32_t data = 0;
+ TCGLabel *over = gen_new_label();
+ gen_set_rm(s, rm);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+ vreg_ofs(s, a->rs2), cpu_env,
+ s->vlen / 8, s->vlen / 8, data, fn);
+ mark_vs_dirty(s);
+ gen_set_label(over);
+ return true;
+ }
+ return false;
}
-GEN_OPFV_TRANS(vfsqrt_v, opfv_check)
+#define GEN_OPFV_TRANS(NAME, CHECK, FRM) \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
+{ \
+ static gen_helper_gvec_3_ptr * const fns[3] = { \
+ gen_helper_##NAME##_h, \
+ gen_helper_##NAME##_w, \
+ gen_helper_##NAME##_d \
+ }; \
+ return do_opfv(s, a, fns[s->sew - 1], CHECK, FRM); \
+}
+
+GEN_OPFV_TRANS(vfsqrt_v, opfv_check, RISCV_FRM_DYN)
+GEN_OPFV_TRANS(vfrsqrt7_v, opfv_check, RISCV_FRM_DYN)
+GEN_OPFV_TRANS(vfrec7_v, opfv_check, RISCV_FRM_DYN)
/* Vector Floating-Point MIN/MAX Instructions */
GEN_OPFVV_TRANS(vfmin_vv, opfvv_check)
@@ -2131,28 +2498,23 @@ GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check)
/* Vector Floating-Point Compare Instructions */
static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_reg(s, a->rs1, false) &&
- (s->sew != 0) &&
- ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) &&
- vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) ||
- (s->lmul == 0)));
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_mss(s, a->rd, a->rs1, a->rs2);
}
GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check)
GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check)
GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check)
GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check)
-GEN_OPFVV_TRANS(vmford_vv, opfvv_cmp_check)
static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rs2, false) &&
- (s->sew != 0) &&
- (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) ||
- (s->lmul == 0)));
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ms(s, a->rd, a->rs2);
}
GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check)
@@ -2161,23 +2523,31 @@ GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check)
GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check)
GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check)
GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check)
-GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check)
/* Vector Floating-Point Classify Instruction */
-GEN_OPFV_TRANS(vfclass_v, opfv_check)
+GEN_OPFV_TRANS(vfclass_v, opfv_check, RISCV_FRM_DYN)
/* Vector Floating-Point Merge Instruction */
GEN_OPFVF_TRANS(vfmerge_vfm, opfvf_check)
static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
{
- if (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false) &&
- (s->sew != 0)) {
+ if (require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ require_align(a->rd, s->lmul)) {
+ gen_set_rm(s, RISCV_FRM_DYN);
+
+ TCGv_i64 t1;
if (s->vl_eq_vlmax) {
+ t1 = tcg_temp_new_i64();
+ /* NaN-box f[rs1] */
+ do_nanbox(s, t1, cpu_fpr[a->rs1]);
+
tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
- MAXSZ(s), MAXSZ(s), cpu_fpr[a->rs1]);
+ MAXSZ(s), MAXSZ(s), t1);
+ mark_vs_dirty(s);
} else {
TCGv_ptr dest;
TCGv_i32 desc;
@@ -2190,24 +2560,45 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ t1 = tcg_temp_new_i64();
+ /* NaN-box f[rs1] */
+ do_nanbox(s, t1, cpu_fpr[a->rs1]);
+
dest = tcg_temp_new_ptr();
desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
- fns[s->sew - 1](dest, cpu_fpr[a->rs1], cpu_env, desc);
+
+ fns[s->sew - 1](dest, t1, cpu_env, desc);
tcg_temp_free_ptr(dest);
+ mark_vs_dirty(s);
gen_set_label(over);
}
+ tcg_temp_free_i64(t1);
return true;
}
return false;
}
/* Single-Width Floating-Point/Integer Type-Convert Instructions */
-GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check)
-GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check)
-GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check)
-GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check)
+#define GEN_OPFV_CVT_TRANS(NAME, HELPER, FRM) \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
+{ \
+ static gen_helper_gvec_3_ptr * const fns[3] = { \
+ gen_helper_##HELPER##_h, \
+ gen_helper_##HELPER##_w, \
+ gen_helper_##HELPER##_d \
+ }; \
+ return do_opfv(s, a, fns[s->sew - 1], opfv_check, FRM); \
+}
+
+GEN_OPFV_CVT_TRANS(vfcvt_xu_f_v, vfcvt_xu_f_v, RISCV_FRM_DYN)
+GEN_OPFV_CVT_TRANS(vfcvt_x_f_v, vfcvt_x_f_v, RISCV_FRM_DYN)
+GEN_OPFV_CVT_TRANS(vfcvt_f_xu_v, vfcvt_f_xu_v, RISCV_FRM_DYN)
+GEN_OPFV_CVT_TRANS(vfcvt_f_x_v, vfcvt_f_x_v, RISCV_FRM_DYN)
+/* Reuse the helper functions from vfcvt.xu.f.v and vfcvt.x.f.v */
+GEN_OPFV_CVT_TRANS(vfcvt_rtz_xu_f_v, vfcvt_xu_f_v, RISCV_FRM_RTZ)
+GEN_OPFV_CVT_TRANS(vfcvt_rtz_x_f_v, vfcvt_x_f_v, RISCV_FRM_RTZ)
/* Widening Floating-Point/Integer Type-Convert Instructions */
@@ -2217,46 +2608,87 @@ GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check)
*/
static bool opfv_widen_check(DisasContext *s, arg_rmr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, true) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2,
- 1 << s->lmul) &&
- (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+ return require_rvv(s) &&
+ require_scale_rvf(s) &&
+ (s->sew != MO_8) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ds(s, a->rd, a->rs2, a->vm);
}
-#define GEN_OPFV_WIDEN_TRANS(NAME) \
+#define GEN_OPFV_WIDEN_TRANS(NAME, HELPER, FRM) \
static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
{ \
if (opfv_widen_check(s, a)) { \
+ if (FRM != RISCV_FRM_DYN) { \
+ gen_set_rm(s, RISCV_FRM_DYN); \
+ } \
+ \
uint32_t data = 0; \
static gen_helper_gvec_3_ptr * const fns[2] = { \
- gen_helper_##NAME##_h, \
- gen_helper_##NAME##_w, \
+ gen_helper_##HELPER##_h, \
+ gen_helper_##HELPER##_w, \
}; \
TCGLabel *over = gen_new_label(); \
- gen_set_rm(s, 7); \
+ gen_set_rm(s, FRM); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
\
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs2), cpu_env, \
s->vlen / 8, s->vlen / 8, data, \
fns[s->sew - 1]); \
+ mark_vs_dirty(s); \
+ gen_set_label(over); \
+ return true; \
+ } \
+ return false; \
+}
+
+GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v, vfwcvt_xu_f_v, RISCV_FRM_DYN)
+GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v, vfwcvt_x_f_v, RISCV_FRM_DYN)
+GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v, vfwcvt_f_f_v, RISCV_FRM_DYN)
+/* Reuse the helper functions from vfwcvt.xu.f.v and vfwcvt.x.f.v */
+GEN_OPFV_WIDEN_TRANS(vfwcvt_rtz_xu_f_v, vfwcvt_xu_f_v, RISCV_FRM_RTZ)
+GEN_OPFV_WIDEN_TRANS(vfwcvt_rtz_x_f_v, vfwcvt_x_f_v, RISCV_FRM_RTZ)
+
+static bool opfxv_widen_check(DisasContext *s, arg_rmr *a)
+{
+ return require_rvv(s) &&
+ require_scale_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ /* OPFV widening instructions ignore vs1 check */
+ vext_check_ds(s, a->rd, a->rs2, a->vm);
+}
+
+#define GEN_OPFXV_WIDEN_TRANS(NAME) \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
+{ \
+ if (opfxv_widen_check(s, a)) { \
+ uint32_t data = 0; \
+ static gen_helper_gvec_3_ptr * const fns[3] = { \
+ gen_helper_##NAME##_b, \
+ gen_helper_##NAME##_h, \
+ gen_helper_##NAME##_w, \
+ }; \
+ TCGLabel *over = gen_new_label(); \
+ gen_set_rm(s, RISCV_FRM_DYN); \
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ \
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
+ vreg_ofs(s, a->rs2), cpu_env, \
+ s->vlen / 8, s->vlen / 8, data, \
+ fns[s->sew]); \
+ mark_vs_dirty(s); \
gen_set_label(over); \
return true; \
} \
return false; \
}
-GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v)
-GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v)
-GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v)
-GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v)
-GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v)
+GEN_OPFXV_WIDEN_TRANS(vfwcvt_f_xu_v)
+GEN_OPFXV_WIDEN_TRANS(vfwcvt_f_x_v)
/* Narrowing Floating-Point/Integer Type-Convert Instructions */
@@ -2266,46 +2698,94 @@ GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v)
*/
static bool opfv_narrow_check(DisasContext *s, arg_rmr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, true) &&
- vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
- 2 << s->lmul) &&
- (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ (s->sew != MO_64) &&
+ vext_check_isa_ill(s) &&
+ /* OPFV narrowing instructions ignore vs1 check */
+ vext_check_sd(s, a->rd, a->rs2, a->vm);
}
-#define GEN_OPFV_NARROW_TRANS(NAME) \
+#define GEN_OPFV_NARROW_TRANS(NAME, HELPER, FRM) \
static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
{ \
if (opfv_narrow_check(s, a)) { \
+ if (FRM != RISCV_FRM_DYN) { \
+ gen_set_rm(s, RISCV_FRM_DYN); \
+ } \
+ \
uint32_t data = 0; \
static gen_helper_gvec_3_ptr * const fns[2] = { \
- gen_helper_##NAME##_h, \
- gen_helper_##NAME##_w, \
+ gen_helper_##HELPER##_h, \
+ gen_helper_##HELPER##_w, \
}; \
TCGLabel *over = gen_new_label(); \
- gen_set_rm(s, 7); \
+ gen_set_rm(s, FRM); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
\
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs2), cpu_env, \
s->vlen / 8, s->vlen / 8, data, \
fns[s->sew - 1]); \
+ mark_vs_dirty(s); \
gen_set_label(over); \
return true; \
} \
return false; \
}
-GEN_OPFV_NARROW_TRANS(vfncvt_xu_f_v)
-GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v)
-GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v)
-GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v)
-GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v)
+GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_w, vfncvt_f_xu_w, RISCV_FRM_DYN)
+GEN_OPFV_NARROW_TRANS(vfncvt_f_x_w, vfncvt_f_x_w, RISCV_FRM_DYN)
+GEN_OPFV_NARROW_TRANS(vfncvt_f_f_w, vfncvt_f_f_w, RISCV_FRM_DYN)
+/* Reuse the helper function from vfncvt.f.f.w */
+GEN_OPFV_NARROW_TRANS(vfncvt_rod_f_f_w, vfncvt_f_f_w, RISCV_FRM_ROD)
+
+static bool opxfv_narrow_check(DisasContext *s, arg_rmr *a)
+{
+ return require_rvv(s) &&
+ require_scale_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ /* OPFV narrowing instructions ignore vs1 check */
+ vext_check_sd(s, a->rd, a->rs2, a->vm);
+}
+
+#define GEN_OPXFV_NARROW_TRANS(NAME, HELPER, FRM) \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
+{ \
+ if (opxfv_narrow_check(s, a)) { \
+ if (FRM != RISCV_FRM_DYN) { \
+ gen_set_rm(s, RISCV_FRM_DYN); \
+ } \
+ \
+ uint32_t data = 0; \
+ static gen_helper_gvec_3_ptr * const fns[3] = { \
+ gen_helper_##HELPER##_b, \
+ gen_helper_##HELPER##_h, \
+ gen_helper_##HELPER##_w, \
+ }; \
+ TCGLabel *over = gen_new_label(); \
+ gen_set_rm(s, FRM); \
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ \
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
+ vreg_ofs(s, a->rs2), cpu_env, \
+ s->vlen / 8, s->vlen / 8, data, \
+ fns[s->sew]); \
+ mark_vs_dirty(s); \
+ gen_set_label(over); \
+ return true; \
+ } \
+ return false; \
+}
+
+GEN_OPXFV_NARROW_TRANS(vfncvt_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_DYN)
+GEN_OPXFV_NARROW_TRANS(vfncvt_x_f_w, vfncvt_x_f_w, RISCV_FRM_DYN)
+/* Reuse the helper functions from vfncvt.xu.f.w and vfncvt.x.f.w */
+GEN_OPXFV_NARROW_TRANS(vfncvt_rtz_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_RTZ)
+GEN_OPXFV_NARROW_TRANS(vfncvt_rtz_x_f_w, vfncvt_x_f_w, RISCV_FRM_RTZ)
/*
*** Vector Reduction Operations
@@ -2313,7 +2793,9 @@ GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v)
/* Vector Single-Width Integer Reduction Instructions */
static bool reduction_check(DisasContext *s, arg_rmrr *a)
{
- return vext_check_isa_ill(s) && vext_check_reg(s, a->rs2, false);
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_reduction(s, a->rs2);
}
GEN_OPIVV_TRANS(vredsum_vs, reduction_check)
@@ -2326,16 +2808,35 @@ GEN_OPIVV_TRANS(vredor_vs, reduction_check)
GEN_OPIVV_TRANS(vredxor_vs, reduction_check)
/* Vector Widening Integer Reduction Instructions */
-GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_check)
-GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check)
+static bool reduction_widen_check(DisasContext *s, arg_rmrr *a)
+{
+ return reduction_check(s, a) && (s->sew < MO_64) &&
+ ((s->sew + 1) <= (s->elen >> 4));
+}
+
+GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_widen_check)
/* Vector Single-Width Floating-Point Reduction Instructions */
-GEN_OPFVV_TRANS(vfredsum_vs, reduction_check)
-GEN_OPFVV_TRANS(vfredmax_vs, reduction_check)
-GEN_OPFVV_TRANS(vfredmin_vs, reduction_check)
+static bool freduction_check(DisasContext *s, arg_rmrr *a)
+{
+ return reduction_check(s, a) &&
+ require_rvf(s);
+}
+
+GEN_OPFVV_TRANS(vfredsum_vs, freduction_check)
+GEN_OPFVV_TRANS(vfredmax_vs, freduction_check)
+GEN_OPFVV_TRANS(vfredmin_vs, freduction_check)
/* Vector Widening Floating-Point Reduction Instructions */
-GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check)
+static bool freduction_widen_check(DisasContext *s, arg_rmrr *a)
+{
+ return reduction_widen_check(s, a) &&
+ require_scale_rvf(s) &&
+ (s->sew != MO_8);
+}
+
+GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, freduction_widen_check)
/*
*** Vector Mask Operations
@@ -2345,18 +2846,19 @@ GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check)
#define GEN_MM_TRANS(NAME) \
static bool trans_##NAME(DisasContext *s, arg_r *a) \
{ \
- if (vext_check_isa_ill(s)) { \
+ if (require_rvv(s) && \
+ vext_check_isa_ill(s)) { \
uint32_t data = 0; \
gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \
TCGLabel *over = gen_new_label(); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
\
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1), \
vreg_ofs(s, a->rs2), cpu_env, \
s->vlen / 8, s->vlen / 8, data, fn); \
+ mark_vs_dirty(s); \
gen_set_label(over); \
return true; \
} \
@@ -2365,22 +2867,23 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \
GEN_MM_TRANS(vmand_mm)
GEN_MM_TRANS(vmnand_mm)
-GEN_MM_TRANS(vmandnot_mm)
+GEN_MM_TRANS(vmandn_mm)
GEN_MM_TRANS(vmxor_mm)
GEN_MM_TRANS(vmor_mm)
GEN_MM_TRANS(vmnor_mm)
-GEN_MM_TRANS(vmornot_mm)
+GEN_MM_TRANS(vmorn_mm)
GEN_MM_TRANS(vmxnor_mm)
-/* Vector mask population count vmpopc */
-static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a)
+/* Vector count population in mask vcpop */
+static bool trans_vcpop_m(DisasContext *s, arg_rmr *a)
{
- if (vext_check_isa_ill(s)) {
+ if (require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ s->vstart == 0) {
TCGv_ptr src2, mask;
TCGv dst;
TCGv_i32 desc;
uint32_t data = 0;
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
@@ -2392,25 +2895,27 @@ static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a)
tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
- gen_helper_vmpopc_m(dst, mask, src2, cpu_env, desc);
+ gen_helper_vcpop_m(dst, mask, src2, cpu_env, desc);
gen_set_gpr(s, a->rd, dst);
tcg_temp_free_ptr(mask);
tcg_temp_free_ptr(src2);
+
return true;
}
return false;
}
/* vmfirst find-first-set mask bit */
-static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a)
+static bool trans_vfirst_m(DisasContext *s, arg_rmr *a)
{
- if (vext_check_isa_ill(s)) {
+ if (require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ s->vstart == 0) {
TCGv_ptr src2, mask;
TCGv dst;
TCGv_i32 desc;
uint32_t data = 0;
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
@@ -2422,7 +2927,7 @@ static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a)
tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
- gen_helper_vmfirst_m(dst, mask, src2, cpu_env, desc);
+ gen_helper_vfirst_m(dst, mask, src2, cpu_env, desc);
gen_set_gpr(s, a->rd, dst);
tcg_temp_free_ptr(mask);
@@ -2438,19 +2943,23 @@ static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a)
#define GEN_M_TRANS(NAME) \
static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
{ \
- if (vext_check_isa_ill(s)) { \
+ if (require_rvv(s) && \
+ vext_check_isa_ill(s) && \
+ require_vm(a->vm, a->rd) && \
+ (a->rd != a->rs2) && \
+ (s->vstart == 0)) { \
uint32_t data = 0; \
gen_helper_gvec_3_ptr *fn = gen_helper_##NAME; \
TCGLabel *over = gen_new_label(); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
\
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \
vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \
cpu_env, s->vlen / 8, s->vlen / 8, \
data, fn); \
+ mark_vs_dirty(s); \
gen_set_label(over); \
return true; \
} \
@@ -2461,18 +2970,25 @@ GEN_M_TRANS(vmsbf_m)
GEN_M_TRANS(vmsif_m)
GEN_M_TRANS(vmsof_m)
-/* Vector Iota Instruction */
+/*
+ * Vector Iota Instruction
+ *
+ * 1. The destination register cannot overlap the source register.
+ * 2. If masked, cannot overlap the mask register ('v0').
+ * 3. An illegal instruction exception is raised if vstart is non-zero.
+ */
static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
{
- if (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, 1) &&
- (a->vm != 0 || a->rd != 0)) {
+ if (require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) &&
+ require_vm(a->vm, a->rd) &&
+ require_align(a->rd, s->lmul) &&
+ (s->vstart == 0)) {
uint32_t data = 0;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
static gen_helper_gvec_3_ptr * const fns[4] = {
@@ -2482,6 +2998,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs2), cpu_env,
s->vlen / 8, s->vlen / 8, data, fns[s->sew]);
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
@@ -2491,14 +3008,14 @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
/* Vector Element Index Instruction */
static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
{
- if (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_overlap_mask(s, a->rd, a->vm, false)) {
+ if (require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ require_align(a->rd, s->lmul) &&
+ require_vm(a->vm, a->rd)) {
uint32_t data = 0;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
static gen_helper_gvec_2_ptr * const fns[4] = {
@@ -2508,6 +3025,7 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
cpu_env, s->vlen / 8, s->vlen / 8,
data, fns[s->sew]);
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
@@ -2518,20 +3036,30 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
*** Vector Permutation Instructions
*/
-/* Integer Extract Instruction */
-
static void load_element(TCGv_i64 dest, TCGv_ptr base,
- int ofs, int sew)
+ int ofs, int sew, bool sign)
{
switch (sew) {
case MO_8:
- tcg_gen_ld8u_i64(dest, base, ofs);
+ if (!sign) {
+ tcg_gen_ld8u_i64(dest, base, ofs);
+ } else {
+ tcg_gen_ld8s_i64(dest, base, ofs);
+ }
break;
case MO_16:
- tcg_gen_ld16u_i64(dest, base, ofs);
+ if (!sign) {
+ tcg_gen_ld16u_i64(dest, base, ofs);
+ } else {
+ tcg_gen_ld16s_i64(dest, base, ofs);
+ }
break;
case MO_32:
- tcg_gen_ld32u_i64(dest, base, ofs);
+ if (!sign) {
+ tcg_gen_ld32u_i64(dest, base, ofs);
+ } else {
+ tcg_gen_ld32s_i64(dest, base, ofs);
+ }
break;
case MO_64:
tcg_gen_ld_i64(dest, base, ofs);
@@ -2586,7 +3114,7 @@ static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
/* Perform the load. */
load_element(dest, base,
- vreg_ofs(s, vreg), s->sew);
+ vreg_ofs(s, vreg), s->sew, false);
tcg_temp_free_ptr(base);
tcg_temp_free_i32(ofs);
@@ -2602,30 +3130,9 @@ static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
}
static void vec_element_loadi(DisasContext *s, TCGv_i64 dest,
- int vreg, int idx)
+ int vreg, int idx, bool sign)
{
- load_element(dest, cpu_env, endian_ofs(s, vreg, idx), s->sew);
-}
-
-static bool trans_vext_x_v(DisasContext *s, arg_r *a)
-{
- TCGv_i64 tmp = tcg_temp_new_i64();
- TCGv dest = dest_gpr(s, a->rd);
-
- if (a->rs1 == 0) {
- /* Special case vmv.x.s rd, vs2. */
- vec_element_loadi(s, tmp, a->rs2, 0);
- } else {
- /* This instruction ignores LMUL and vector register groups */
- int vlmax = s->vlen >> (3 + s->sew);
- vec_element_loadx(s, tmp, a->rs2, cpu_gpr[a->rs1], vlmax);
- }
-
- tcg_gen_trunc_i64_tl(dest, tmp);
- gen_set_gpr(s, a->rd, dest);
-
- tcg_temp_free_i64(tmp);
- return true;
+ load_element(dest, cpu_env, endian_ofs(s, vreg, idx), s->sew, sign);
}
/* Integer Scalar Move Instruction */
@@ -2662,26 +3169,55 @@ static void vec_element_storei(DisasContext *s, int vreg,
store_element(val, cpu_env, endian_ofs(s, vreg, idx), s->sew);
}
+/* vmv.x.s rd, vs2 # x[rd] = vs2[0] */
+static bool trans_vmv_x_s(DisasContext *s, arg_vmv_x_s *a)
+{
+ if (require_rvv(s) &&
+ vext_check_isa_ill(s)) {
+ TCGv_i64 t1;
+ TCGv dest;
+
+ t1 = tcg_temp_new_i64();
+ dest = tcg_temp_new();
+ /*
+ * load vreg and sign-extend to 64 bits,
+ * then truncate to XLEN bits before storing to gpr.
+ */
+ vec_element_loadi(s, t1, a->rs2, 0, true);
+ tcg_gen_trunc_i64_tl(dest, t1);
+ gen_set_gpr(s, a->rd, dest);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free(dest);
+
+ return true;
+ }
+ return false;
+}
+
/* vmv.s.x vd, rs1 # vd[0] = rs1 */
static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
{
- if (vext_check_isa_ill(s)) {
+ if (require_rvv(s) &&
+ vext_check_isa_ill(s)) {
/* This instruction ignores LMUL and vector register groups */
- int maxsz = s->vlen >> 3;
TCGv_i64 t1;
+ TCGv s1;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
- tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), maxsz, maxsz, 0);
- if (a->rs1 == 0) {
- goto done;
- }
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
t1 = tcg_temp_new_i64();
- tcg_gen_extu_tl_i64(t1, cpu_gpr[a->rs1]);
+
+ /*
+ * load gpr and sign-extend to 64 bits,
+ * then truncate to SEW bits when storing to vreg.
+ */
+ s1 = get_gpr(s, a->rs1, EXT_NONE);
+ tcg_gen_ext_tl_i64(t1, s1);
vec_element_storei(s, a->rd, 0, t1);
tcg_temp_free_i64(t1);
- done:
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
@@ -2691,14 +3227,21 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
/* Floating-Point Scalar Move Instructions */
static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a)
{
- if (!s->vill && has_ext(s, RVF) &&
- (s->mstatus_fs != 0) && (s->sew != 0)) {
- unsigned int len = 8 << s->sew;
-
- vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0);
- if (len < 64) {
- tcg_gen_ori_i64(cpu_fpr[a->rd], cpu_fpr[a->rd],
- MAKE_64BIT_MASK(len, 64 - len));
+ if (require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s)) {
+ gen_set_rm(s, RISCV_FRM_DYN);
+
+ unsigned int ofs = (8 << s->sew);
+ unsigned int len = 64 - ofs;
+ TCGv_i64 t_nan;
+
+ vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0, false);
+ /* NaN-box f[rd] as necessary for SEW */
+ if (len) {
+ t_nan = tcg_constant_i64(UINT64_MAX);
+ tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rd],
+ t_nan, ofs, len);
}
mark_fs_dirty(s);
@@ -2710,27 +3253,26 @@ static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a)
/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */
static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
{
- if (!s->vill && has_ext(s, RVF) && (s->sew != 0)) {
- TCGv_i64 t1;
- /* The instructions ignore LMUL and vector register group. */
- uint32_t vlmax = s->vlen >> 3;
+ if (require_rvv(s) &&
+ require_rvf(s) &&
+ vext_check_isa_ill(s)) {
+ gen_set_rm(s, RISCV_FRM_DYN);
- /* if vl == 0, skip vector register write back */
+ /* The instructions ignore LMUL and vector register group. */
+ TCGv_i64 t1;
TCGLabel *over = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
- /* zeroed all elements */
- tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), vlmax, vlmax, 0);
+ /* if vl == 0 or vstart >= vl, skip vector register write back */
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
- /* NaN-box f[rs1] as necessary for SEW */
+ /* NaN-box f[rs1] */
t1 = tcg_temp_new_i64();
- if (s->sew == MO_64 && !has_ext(s, RVD)) {
- tcg_gen_ori_i64(t1, cpu_fpr[a->rs1], MAKE_64BIT_MASK(32, 32));
- } else {
- tcg_gen_mov_i64(t1, cpu_fpr[a->rs1]);
- }
+ do_nanbox(s, t1, cpu_fpr[a->rs1]);
+
vec_element_storei(s, a->rd, 0, t1);
tcg_temp_free_i64(t1);
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
@@ -2740,41 +3282,82 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
/* Vector Slide Instructions */
static bool slideup_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- (a->rd != a->rs2));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_slide(s, a->rd, a->rs2, a->vm, true);
}
GEN_OPIVX_TRANS(vslideup_vx, slideup_check)
GEN_OPIVX_TRANS(vslide1up_vx, slideup_check)
-GEN_OPIVI_TRANS(vslideup_vi, 1, vslideup_vx, slideup_check)
+GEN_OPIVI_TRANS(vslideup_vi, IMM_ZX, vslideup_vx, slideup_check)
+
+static bool slidedown_check(DisasContext *s, arg_rmrr *a)
+{
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_slide(s, a->rd, a->rs2, a->vm, false);
+}
+
+GEN_OPIVX_TRANS(vslidedown_vx, slidedown_check)
+GEN_OPIVX_TRANS(vslide1down_vx, slidedown_check)
+GEN_OPIVI_TRANS(vslidedown_vi, IMM_ZX, vslidedown_vx, slidedown_check)
+
+/* Vector Floating-Point Slide Instructions */
+static bool fslideup_check(DisasContext *s, arg_rmrr *a)
+{
+ return slideup_check(s, a) &&
+ require_rvf(s);
+}
+
+static bool fslidedown_check(DisasContext *s, arg_rmrr *a)
+{
+ return slidedown_check(s, a) &&
+ require_rvf(s);
+}
-GEN_OPIVX_TRANS(vslidedown_vx, opivx_check)
-GEN_OPIVX_TRANS(vslide1down_vx, opivx_check)
-GEN_OPIVI_TRANS(vslidedown_vi, 1, vslidedown_vx, opivx_check)
+GEN_OPFVF_TRANS(vfslide1up_vf, fslideup_check)
+GEN_OPFVF_TRANS(vfslide1down_vf, fslidedown_check)
/* Vector Register Gather Instruction */
static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs1, false) &&
- vext_check_reg(s, a->rs2, false) &&
- (a->rd != a->rs2) && (a->rd != a->rs1));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ require_align(a->rd, s->lmul) &&
+ require_align(a->rs1, s->lmul) &&
+ require_align(a->rs2, s->lmul) &&
+ (a->rd != a->rs2 && a->rd != a->rs1) &&
+ require_vm(a->vm, a->rd);
+}
+
+static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a)
+{
+ int8_t emul = MO_16 - s->sew + s->lmul;
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ (emul >= -3 && emul <= 3) &&
+ require_align(a->rd, s->lmul) &&
+ require_align(a->rs1, emul) &&
+ require_align(a->rs2, s->lmul) &&
+ (a->rd != a->rs2 && a->rd != a->rs1) &&
+ !is_overlapped(a->rd, 1 << MAX(s->lmul, 0),
+ a->rs1, 1 << MAX(emul, 0)) &&
+ !is_overlapped(a->rd, 1 << MAX(s->lmul, 0),
+ a->rs2, 1 << MAX(s->lmul, 0)) &&
+ require_vm(a->vm, a->rd);
}
GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check)
+GEN_OPIVV_TRANS(vrgatherei16_vv, vrgatherei16_vv_check)
static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_overlap_mask(s, a->rd, a->vm, true) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- (a->rd != a->rs2));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ require_align(a->rd, s->lmul) &&
+ require_align(a->rs2, s->lmul) &&
+ (a->rd != a->rs2) &&
+ require_vm(a->vm, a->rd);
}
/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
@@ -2785,11 +3368,12 @@ static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a)
}
if (a->vm && s->vl_eq_vlmax) {
- int vlmax = s->vlen / s->mlen;
+ int scale = s->lmul - (s->sew + 3);
+ int vlmax = scale < 0 ? s->vlen >> -scale : s->vlen << scale;
TCGv_i64 dest = tcg_temp_new_i64();
if (a->rs1 == 0) {
- vec_element_loadi(s, dest, a->rs2, 0);
+ vec_element_loadi(s, dest, a->rs2, 0, false);
} else {
vec_element_loadx(s, dest, a->rs2, cpu_gpr[a->rs1], vlmax);
}
@@ -2797,6 +3381,7 @@ static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a)
tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
MAXSZ(s), MAXSZ(s), dest);
tcg_temp_free_i64(dest);
+ mark_vs_dirty(s);
} else {
static gen_helper_opivx * const fns[4] = {
gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h,
@@ -2815,32 +3400,43 @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a)
}
if (a->vm && s->vl_eq_vlmax) {
- if (a->rs1 >= s->vlen / s->mlen) {
- tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd),
+ int scale = s->lmul - (s->sew + 3);
+ int vlmax = scale < 0 ? s->vlen >> -scale : s->vlen << scale;
+ if (a->rs1 >= vlmax) {
+ tcg_gen_gvec_dup_imm(MO_64, vreg_ofs(s, a->rd),
MAXSZ(s), MAXSZ(s), 0);
} else {
tcg_gen_gvec_dup_mem(s->sew, vreg_ofs(s, a->rd),
endian_ofs(s, a->rs2, a->rs1),
MAXSZ(s), MAXSZ(s));
}
+ mark_vs_dirty(s);
} else {
static gen_helper_opivx * const fns[4] = {
gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h,
gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d
};
- return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, 1);
+ return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew],
+ s, IMM_ZX);
}
return true;
}
-/* Vector Compress Instruction */
+/*
+ * Vector Compress Instruction
+ *
+ * The destination vector register group cannot overlap the
+ * source vector register group or the source mask register.
+ */
static bool vcompress_vm_check(DisasContext *s, arg_r *a)
{
- return (vext_check_isa_ill(s) &&
- vext_check_reg(s, a->rd, false) &&
- vext_check_reg(s, a->rs2, false) &&
- vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs1, 1) &&
- (a->rd != a->rs2));
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ require_align(a->rd, s->lmul) &&
+ require_align(a->rs2, s->lmul) &&
+ (a->rd != a->rs2) &&
+ !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs1, 1) &&
+ (s->vstart == 0);
}
static bool trans_vcompress_vm(DisasContext *s, arg_r *a)
@@ -2854,14 +3450,133 @@ static bool trans_vcompress_vm(DisasContext *s, arg_r *a)
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
- data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
cpu_env, s->vlen / 8, s->vlen / 8, data,
fns[s->sew]);
+ mark_vs_dirty(s);
gen_set_label(over);
return true;
}
return false;
}
+
+/*
+ * Whole Vector Register Move Instructions ignore vtype and vl setting.
+ * Thus, we don't need to check vill bit. (Section 16.6)
+ */
+#define GEN_VMV_WHOLE_TRANS(NAME, LEN, SEQ) \
+static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \
+{ \
+ if (require_rvv(s) && \
+ QEMU_IS_ALIGNED(a->rd, LEN) && \
+ QEMU_IS_ALIGNED(a->rs2, LEN)) { \
+ uint32_t maxsz = (s->vlen >> 3) * LEN; \
+ if (s->vstart == 0) { \
+ /* EEW = 8 */ \
+ tcg_gen_gvec_mov(MO_8, vreg_ofs(s, a->rd), \
+ vreg_ofs(s, a->rs2), maxsz, maxsz); \
+ mark_vs_dirty(s); \
+ } else { \
+ TCGLabel *over = gen_new_label(); \
+ tcg_gen_brcondi_tl(TCG_COND_GEU, cpu_vstart, maxsz, over); \
+ \
+ static gen_helper_gvec_2_ptr * const fns[4] = { \
+ gen_helper_vmv1r_v, gen_helper_vmv2r_v, \
+ gen_helper_vmv4r_v, gen_helper_vmv8r_v, \
+ }; \
+ tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), \
+ cpu_env, maxsz, maxsz, 0, fns[SEQ]); \
+ mark_vs_dirty(s); \
+ gen_set_label(over); \
+ } \
+ return true; \
+ } \
+ return false; \
+}
+
+GEN_VMV_WHOLE_TRANS(vmv1r_v, 1, 0)
+GEN_VMV_WHOLE_TRANS(vmv2r_v, 2, 1)
+GEN_VMV_WHOLE_TRANS(vmv4r_v, 4, 2)
+GEN_VMV_WHOLE_TRANS(vmv8r_v, 8, 3)
+
+static bool int_ext_check(DisasContext *s, arg_rmr *a, uint8_t div)
+{
+ uint8_t from = (s->sew + 3) - div;
+ bool ret = require_rvv(s) &&
+ (from >= 3 && from <= 8) &&
+ (a->rd != a->rs2) &&
+ require_align(a->rd, s->lmul) &&
+ require_align(a->rs2, s->lmul - div) &&
+ require_vm(a->vm, a->rd) &&
+ require_noover(a->rd, s->lmul, a->rs2, s->lmul - div);
+ return ret;
+}
+
+static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
+{
+ uint32_t data = 0;
+ gen_helper_gvec_3_ptr *fn;
+ TCGLabel *over = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+
+ static gen_helper_gvec_3_ptr * const fns[6][4] = {
+ {
+ NULL, gen_helper_vzext_vf2_h,
+ gen_helper_vzext_vf2_w, gen_helper_vzext_vf2_d
+ },
+ {
+ NULL, NULL,
+ gen_helper_vzext_vf4_w, gen_helper_vzext_vf4_d,
+ },
+ {
+ NULL, NULL,
+ NULL, gen_helper_vzext_vf8_d
+ },
+ {
+ NULL, gen_helper_vsext_vf2_h,
+ gen_helper_vsext_vf2_w, gen_helper_vsext_vf2_d
+ },
+ {
+ NULL, NULL,
+ gen_helper_vsext_vf4_w, gen_helper_vsext_vf4_d,
+ },
+ {
+ NULL, NULL,
+ NULL, gen_helper_vsext_vf8_d
+ }
+ };
+
+ fn = fns[seq][s->sew];
+ if (fn == NULL) {
+ return false;
+ }
+
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
+
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+ vreg_ofs(s, a->rs2), cpu_env,
+ s->vlen / 8, s->vlen / 8, data, fn);
+
+ mark_vs_dirty(s);
+ gen_set_label(over);
+ return true;
+}
+
+/* Vector Integer Extension */
+#define GEN_INT_EXT_TRANS(NAME, DIV, SEQ) \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
+{ \
+ if (int_ext_check(s, a, DIV)) { \
+ return int_ext_op(s, a, SEQ); \
+ } \
+ return false; \
+}
+
+GEN_INT_EXT_TRANS(vzext_vf2, 1, 0)
+GEN_INT_EXT_TRANS(vzext_vf4, 2, 1)
+GEN_INT_EXT_TRANS(vzext_vf8, 3, 2)
+GEN_INT_EXT_TRANS(vsext_vf2, 1, 3)
+GEN_INT_EXT_TRANS(vsext_vf4, 2, 4)
+GEN_INT_EXT_TRANS(vsext_vf8, 3, 5)
diff --git a/target/riscv/insn_trans/trans_rvzfh.c.inc b/target/riscv/insn_trans/trans_rvzfh.c.inc
new file mode 100644
index 0000000000..5a7cac8958
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvzfh.c.inc
@@ -0,0 +1,537 @@
+/*
+ * RISC-V translation routines for the RV64Zfh Standard Extension.
+ *
+ * Copyright (c) 2020 Chih-Min Chao, chihmin.chao@sifive.com
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define REQUIRE_ZFH(ctx) do { \
+ if (!ctx->ext_zfh) { \
+ return false; \
+ } \
+} while (0)
+
+#define REQUIRE_ZFH_OR_ZFHMIN(ctx) do { \
+ if (!(ctx->ext_zfh || ctx->ext_zfhmin)) { \
+ return false; \
+ } \
+} while (0)
+
+static bool trans_flh(DisasContext *ctx, arg_flh *a)
+{
+ TCGv_i64 dest;
+ TCGv t0;
+
+ REQUIRE_FPU;
+ REQUIRE_ZFH_OR_ZFHMIN(ctx);
+
+ t0 = get_gpr(ctx, a->rs1, EXT_NONE);
+ if (a->imm) {
+ TCGv temp = temp_new(ctx);
+ tcg_gen_addi_tl(temp, t0, a->imm);
+ t0 = temp;
+ }
+
+ dest = cpu_fpr[a->rd];
+ tcg_gen_qemu_ld_i64(dest, t0, ctx->mem_idx, MO_TEUW);
+ gen_nanbox_h(dest, dest);
+
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fsh(DisasContext *ctx, arg_fsh *a)
+{
+ TCGv t0;
+
+ REQUIRE_FPU;
+ REQUIRE_ZFH_OR_ZFHMIN(ctx);
+
+ t0 = get_gpr(ctx, a->rs1, EXT_NONE);
+ if (a->imm) {
+ TCGv temp = tcg_temp_new();
+ tcg_gen_addi_tl(temp, t0, a->imm);
+ t0 = temp;
+ }
+
+ tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], t0, ctx->mem_idx, MO_TEUW);
+
+ return true;
+}
+
+static bool trans_fmadd_h(DisasContext *ctx, arg_fmadd_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fmadd_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+ cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fmsub_h(DisasContext *ctx, arg_fmsub_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fmsub_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+ cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fnmsub_h(DisasContext *ctx, arg_fnmsub_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fnmsub_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+ cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fnmadd_h(DisasContext *ctx, arg_fnmadd_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fnmadd_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+ cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fadd_h(DisasContext *ctx, arg_fadd_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fadd_h(cpu_fpr[a->rd], cpu_env,
+ cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fsub_h(DisasContext *ctx, arg_fsub_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fsub_h(cpu_fpr[a->rd], cpu_env,
+ cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fmul_h(DisasContext *ctx, arg_fmul_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fmul_h(cpu_fpr[a->rd], cpu_env,
+ cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fdiv_h(DisasContext *ctx, arg_fdiv_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fdiv_h(cpu_fpr[a->rd], cpu_env,
+ cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fsqrt_h(DisasContext *ctx, arg_fsqrt_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fsqrt_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fsgnj_h(DisasContext *ctx, arg_fsgnj_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ if (a->rs1 == a->rs2) { /* FMOV */
+ gen_check_nanbox_h(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+ } else {
+ TCGv_i64 rs1 = tcg_temp_new_i64();
+ TCGv_i64 rs2 = tcg_temp_new_i64();
+
+ gen_check_nanbox_h(rs1, cpu_fpr[a->rs1]);
+ gen_check_nanbox_h(rs2, cpu_fpr[a->rs2]);
+
+ /* This formulation retains the nanboxing of rs2. */
+ tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 15);
+ tcg_temp_free_i64(rs1);
+ tcg_temp_free_i64(rs2);
+ }
+
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fsgnjn_h(DisasContext *ctx, arg_fsgnjn_h *a)
+{
+ TCGv_i64 rs1, rs2, mask;
+
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ rs1 = tcg_temp_new_i64();
+ gen_check_nanbox_h(rs1, cpu_fpr[a->rs1]);
+
+ if (a->rs1 == a->rs2) { /* FNEG */
+ tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(15, 1));
+ } else {
+ rs2 = tcg_temp_new_i64();
+ gen_check_nanbox_h(rs2, cpu_fpr[a->rs2]);
+
+ /*
+ * Replace bit 15 in rs1 with inverse in rs2.
+ * This formulation retains the nanboxing of rs1.
+ */
+ mask = tcg_const_i64(~MAKE_64BIT_MASK(15, 1));
+ tcg_gen_not_i64(rs2, rs2);
+ tcg_gen_andc_i64(rs2, rs2, mask);
+ tcg_gen_and_i64(rs1, mask, rs1);
+ tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
+
+ tcg_temp_free_i64(mask);
+ tcg_temp_free_i64(rs2);
+ }
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fsgnjx_h(DisasContext *ctx, arg_fsgnjx_h *a)
+{
+ TCGv_i64 rs1, rs2;
+
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ rs1 = tcg_temp_new_i64();
+ gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+
+ if (a->rs1 == a->rs2) { /* FABS */
+ tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(15, 1));
+ } else {
+ rs2 = tcg_temp_new_i64();
+ gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+ /*
+ * Xor bit 15 in rs1 with that in rs2.
+ * This formulation retains the nanboxing of rs1.
+ */
+ tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(15, 1));
+ tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
+
+ tcg_temp_free_i64(rs2);
+ }
+
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fmin_h(DisasContext *ctx, arg_fmin_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_helper_fmin_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+ cpu_fpr[a->rs2]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fmax_h(DisasContext *ctx, arg_fmax_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ gen_helper_fmax_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+ cpu_fpr[a->rs2]);
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fcvt_s_h(DisasContext *ctx, arg_fcvt_s_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH_OR_ZFHMIN(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_s_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+ mark_fs_dirty(ctx);
+
+ return true;
+}
+
+static bool trans_fcvt_d_h(DisasContext *ctx, arg_fcvt_d_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH_OR_ZFHMIN(ctx);
+ REQUIRE_EXT(ctx, RVD);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_d_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+ mark_fs_dirty(ctx);
+
+
+ return true;
+}
+
+static bool trans_fcvt_h_s(DisasContext *ctx, arg_fcvt_h_s *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH_OR_ZFHMIN(ctx);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_h_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+ mark_fs_dirty(ctx);
+
+ return true;
+}
+
+static bool trans_fcvt_h_d(DisasContext *ctx, arg_fcvt_h_d *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH_OR_ZFHMIN(ctx);
+ REQUIRE_EXT(ctx, RVD);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_h_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+ mark_fs_dirty(ctx);
+
+ return true;
+}
+
+static bool trans_feq_h(DisasContext *ctx, arg_feq_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv dest = dest_gpr(ctx, a->rd);
+
+ gen_helper_feq_h(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_set_gpr(ctx, a->rd, dest);
+ return true;
+}
+
+static bool trans_flt_h(DisasContext *ctx, arg_flt_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv dest = dest_gpr(ctx, a->rd);
+
+ gen_helper_flt_h(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_set_gpr(ctx, a->rd, dest);
+
+ return true;
+}
+
+static bool trans_fle_h(DisasContext *ctx, arg_fle_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv dest = dest_gpr(ctx, a->rd);
+
+ gen_helper_fle_h(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_set_gpr(ctx, a->rd, dest);
+ return true;
+}
+
+static bool trans_fclass_h(DisasContext *ctx, arg_fclass_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv dest = dest_gpr(ctx, a->rd);
+
+ gen_helper_fclass_h(dest, cpu_fpr[a->rs1]);
+ gen_set_gpr(ctx, a->rd, dest);
+ return true;
+}
+
+static bool trans_fcvt_w_h(DisasContext *ctx, arg_fcvt_w_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv dest = dest_gpr(ctx, a->rd);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_w_h(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_set_gpr(ctx, a->rd, dest);
+ return true;
+}
+
+static bool trans_fcvt_wu_h(DisasContext *ctx, arg_fcvt_wu_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv dest = dest_gpr(ctx, a->rd);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_wu_h(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_set_gpr(ctx, a->rd, dest);
+ return true;
+}
+
+static bool trans_fcvt_h_w(DisasContext *ctx, arg_fcvt_h_w *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_h_w(cpu_fpr[a->rd], cpu_env, t0);
+
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fcvt_h_wu(DisasContext *ctx, arg_fcvt_h_wu *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_h_wu(cpu_fpr[a->rd], cpu_env, t0);
+
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fmv_x_h(DisasContext *ctx, arg_fmv_x_h *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH_OR_ZFHMIN(ctx);
+
+ TCGv dest = dest_gpr(ctx, a->rd);
+
+#if defined(TARGET_RISCV64)
+ /* 16 bits -> 64 bits */
+ tcg_gen_ext16s_tl(dest, cpu_fpr[a->rs1]);
+#else
+ /* 16 bits -> 32 bits */
+ tcg_gen_extrl_i64_i32(dest, cpu_fpr[a->rs1]);
+ tcg_gen_ext16s_tl(dest, dest);
+#endif
+
+ gen_set_gpr(ctx, a->rd, dest);
+ return true;
+}
+
+static bool trans_fmv_h_x(DisasContext *ctx, arg_fmv_h_x *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZFH_OR_ZFHMIN(ctx);
+
+ TCGv t0 = get_gpr(ctx, a->rs1, EXT_ZERO);
+
+ tcg_gen_extu_tl_i64(cpu_fpr[a->rd], t0);
+ gen_nanbox_h(cpu_fpr[a->rd], cpu_fpr[a->rd]);
+
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fcvt_l_h(DisasContext *ctx, arg_fcvt_l_h *a)
+{
+ REQUIRE_64BIT(ctx);
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv dest = dest_gpr(ctx, a->rd);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_l_h(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_set_gpr(ctx, a->rd, dest);
+ return true;
+}
+
+static bool trans_fcvt_lu_h(DisasContext *ctx, arg_fcvt_lu_h *a)
+{
+ REQUIRE_64BIT(ctx);
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv dest = dest_gpr(ctx, a->rd);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_lu_h(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_set_gpr(ctx, a->rd, dest);
+ return true;
+}
+
+static bool trans_fcvt_h_l(DisasContext *ctx, arg_fcvt_h_l *a)
+{
+ REQUIRE_64BIT(ctx);
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_h_l(cpu_fpr[a->rd], cpu_env, t0);
+
+ mark_fs_dirty(ctx);
+ return true;
+}
+
+static bool trans_fcvt_h_lu(DisasContext *ctx, arg_fcvt_h_lu *a)
+{
+ REQUIRE_64BIT(ctx);
+ REQUIRE_FPU;
+ REQUIRE_ZFH(ctx);
+
+ TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN);
+
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_h_lu(cpu_fpr[a->rd], cpu_env, t0);
+
+ mark_fs_dirty(ctx);
+ return true;
+}
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index b15ad394bb..065e8162a2 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -22,26 +22,30 @@
#include "hw/registerfields.h"
/* share data between vector helpers and decode code */
-FIELD(VDATA, MLEN, 0, 8)
-FIELD(VDATA, VM, 8, 1)
-FIELD(VDATA, LMUL, 9, 2)
-FIELD(VDATA, NF, 11, 4)
-FIELD(VDATA, WD, 11, 1)
+FIELD(VDATA, VM, 0, 1)
+FIELD(VDATA, LMUL, 1, 3)
+FIELD(VDATA, NF, 4, 4)
+FIELD(VDATA, WD, 4, 1)
/* float point classify helpers */
target_ulong fclass_h(uint64_t frs1);
target_ulong fclass_s(uint64_t frs1);
target_ulong fclass_d(uint64_t frs1);
-#define SEW8 0
-#define SEW16 1
-#define SEW32 2
-#define SEW64 3
-
#ifndef CONFIG_USER_ONLY
extern const VMStateDescription vmstate_riscv_cpu;
#endif
+enum {
+ RISCV_FRM_RNE = 0, /* Round to Nearest, ties to Even */
+ RISCV_FRM_RTZ = 1, /* Round towards Zero */
+ RISCV_FRM_RDN = 2, /* Round Down */
+ RISCV_FRM_RUP = 3, /* Round Up */
+ RISCV_FRM_RMM = 4, /* Round to Nearest, ties to Max Magnitude */
+ RISCV_FRM_DYN = 7, /* Dynamic rounding mode */
+ RISCV_FRM_ROD = 8, /* Round to Odd */
+};
+
static inline uint64_t nanbox_s(float32 f)
{
return f | MAKE_64BIT_MASK(32, 32);
@@ -58,4 +62,20 @@ static inline float32 check_nanbox_s(uint64_t f)
}
}
+static inline uint64_t nanbox_h(float16 f)
+{
+ return f | MAKE_64BIT_MASK(16, 48);
+}
+
+static inline float16 check_nanbox_h(uint64_t f)
+{
+ uint64_t mask = MAKE_64BIT_MASK(16, 48);
+
+ if (likely((f & mask) == mask)) {
+ return (uint16_t)f;
+ } else {
+ return 0x7E00u; /* default qnan */
+ }
+}
+
#endif
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 1d57bc97b5..5df6c0d800 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -30,9 +30,10 @@
#include "exec/log.h"
#include "instmap.h"
+#include "internals.h"
/* global register indices */
-static TCGv cpu_gpr[32], cpu_pc, cpu_vl;
+static TCGv cpu_gpr[32], cpu_pc, cpu_vl, cpu_vstart;
static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
static TCGv load_res;
static TCGv load_val;
@@ -62,7 +63,9 @@ typedef struct DisasContext {
uint32_t misa_ext;
uint32_t opcode;
uint32_t mstatus_fs;
+ uint32_t mstatus_vs;
uint32_t mstatus_hs_fs;
+ uint32_t mstatus_hs_vs;
uint32_t mem_idx;
/* Remember the rounding mode encoded in the previous fp instruction,
which we have already installed into env->fp_status. Or -1 for
@@ -73,13 +76,28 @@ typedef struct DisasContext {
RISCVMXL ol;
bool virt_enabled;
bool ext_ifencei;
+ bool ext_zfh;
+ bool ext_zfhmin;
bool hlsx;
/* vector extension */
bool vill;
- uint8_t lmul;
+ /*
+ * Encode LMUL to lmul as follows:
+ * LMUL vlmul lmul
+ * 1 000 0
+ * 2 001 1
+ * 4 010 2
+ * 8 011 3
+ * - 100 -
+ * 1/8 101 -3
+ * 1/4 110 -2
+ * 1/2 111 -1
+ */
+ int8_t lmul;
uint8_t sew;
uint16_t vlen;
- uint16_t mlen;
+ uint16_t elen;
+ target_ulong vstart;
bool vl_eq_vlmax;
uint8_t ntemp;
CPUState *cs;
@@ -134,6 +152,11 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
}
+static void gen_nanbox_h(TCGv_i64 out, TCGv_i64 in)
+{
+ tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(16, 48));
+}
+
/*
* A narrow n-bit operation, where n < FLEN, checks that input operands
* are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
@@ -142,6 +165,16 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
*
* Here, the result is always nan-boxed, even the canonical nan.
*/
+static void gen_check_nanbox_h(TCGv_i64 out, TCGv_i64 in)
+{
+ TCGv_i64 t_max = tcg_const_i64(0xffffffffffff0000ull);
+ TCGv_i64 t_nan = tcg_const_i64(0xffffffffffff7e00ull);
+
+ tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
+ tcg_temp_free_i64(t_max);
+ tcg_temp_free_i64(t_nan);
+}
+
static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
{
TCGv_i64 t_max = tcg_constant_i64(0xffffffff00000000ull);
@@ -331,12 +364,54 @@ static void mark_fs_dirty(DisasContext *ctx)
static inline void mark_fs_dirty(DisasContext *ctx) { }
#endif
+#ifndef CONFIG_USER_ONLY
+/* The states of mstatus_vs are:
+ * 0 = disabled, 1 = initial, 2 = clean, 3 = dirty
+ * We will have already diagnosed disabled state,
+ * and need to turn initial/clean into dirty.
+ */
+static void mark_vs_dirty(DisasContext *ctx)
+{
+ TCGv tmp;
+
+ if (ctx->mstatus_vs != MSTATUS_VS) {
+ /* Remember the state change for the rest of the TB. */
+ ctx->mstatus_vs = MSTATUS_VS;
+
+ tmp = tcg_temp_new();
+ tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
+ tcg_gen_ori_tl(tmp, tmp, MSTATUS_VS);
+ tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
+ tcg_temp_free(tmp);
+ }
+
+ if (ctx->virt_enabled && ctx->mstatus_hs_vs != MSTATUS_VS) {
+ /* Remember the stage change for the rest of the TB. */
+ ctx->mstatus_hs_vs = MSTATUS_VS;
+
+ tmp = tcg_temp_new();
+ tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus_hs));
+ tcg_gen_ori_tl(tmp, tmp, MSTATUS_VS);
+ tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus_hs));
+ tcg_temp_free(tmp);
+ }
+}
+#else
+static inline void mark_vs_dirty(DisasContext *ctx) { }
+#endif
+
static void gen_set_rm(DisasContext *ctx, int rm)
{
if (ctx->frm == rm) {
return;
}
ctx->frm = rm;
+
+ if (rm == RISCV_FRM_ROD) {
+ gen_helper_set_rod_rounding_mode(cpu_env);
+ return;
+ }
+
gen_helper_set_rounding_mode(cpu_env, tcg_constant_i32(rm));
}
@@ -574,6 +649,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
#include "insn_trans/trans_rvh.c.inc"
#include "insn_trans/trans_rvv.c.inc"
#include "insn_trans/trans_rvb.c.inc"
+#include "insn_trans/trans_rvzfh.c.inc"
#include "insn_trans/trans_privileged.c.inc"
/* Include the auto-generated decoder for 16 bit insn */
@@ -613,6 +689,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
ctx->pc_succ_insn = ctx->base.pc_first;
ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX);
ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
+ ctx->mstatus_vs = tb_flags & TB_FLAGS_MSTATUS_VS;
ctx->priv_ver = env->priv_ver;
#if !defined(CONFIG_USER_ONLY)
if (riscv_has_ext(env, RVH)) {
@@ -626,13 +703,17 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
ctx->misa_ext = env->misa_ext;
ctx->frm = -1; /* unknown rounding mode */
ctx->ext_ifencei = cpu->cfg.ext_ifencei;
+ ctx->ext_zfh = cpu->cfg.ext_zfh;
+ ctx->ext_zfhmin = cpu->cfg.ext_zfhmin;
ctx->vlen = cpu->cfg.vlen;
+ ctx->elen = cpu->cfg.elen;
ctx->mstatus_hs_fs = FIELD_EX32(tb_flags, TB_FLAGS, MSTATUS_HS_FS);
+ ctx->mstatus_hs_vs = FIELD_EX32(tb_flags, TB_FLAGS, MSTATUS_HS_VS);
ctx->hlsx = FIELD_EX32(tb_flags, TB_FLAGS, HLSX);
ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL);
ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
- ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL);
- ctx->mlen = 1 << (ctx->sew + 3 - ctx->lmul);
+ ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
+ ctx->vstart = env->vstart;
ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
ctx->xl = FIELD_EX32(tb_flags, TB_FLAGS, XL);
ctx->cs = cs;
@@ -751,6 +832,8 @@ void riscv_translate_init(void)
cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, pc), "pc");
cpu_vl = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, vl), "vl");
+ cpu_vstart = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, vstart),
+ "vstart");
load_res = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_res),
"load_res");
load_val = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_val),
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 12c31aa4b4..ad505ec9b2 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -17,6 +17,8 @@
*/
#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "qemu/bitops.h"
#include "cpu.h"
#include "exec/memop.h"
#include "exec/exec-all.h"
@@ -31,12 +33,24 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
{
int vlmax, vl;
RISCVCPU *cpu = env_archcpu(env);
+ uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
bool vill = FIELD_EX64(s2, VTYPE, VILL);
target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
- if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
+ if (lmul & 4) {
+ /* Fractional LMUL. */
+ if (lmul == 4 ||
+ cpu->cfg.elen >> (8 - lmul) < sew) {
+ vill = true;
+ }
+ }
+
+ if ((sew > cpu->cfg.elen)
+ || vill
+ || (ediv != 0)
+ || (reserved != 0)) {
/* only set vill bit. */
env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
env->vl = 0;
@@ -81,35 +95,44 @@ static inline uint32_t vext_nf(uint32_t desc)
return FIELD_EX32(simd_data(desc), VDATA, NF);
}
-static inline uint32_t vext_mlen(uint32_t desc)
-{
- return FIELD_EX32(simd_data(desc), VDATA, MLEN);
-}
-
static inline uint32_t vext_vm(uint32_t desc)
{
return FIELD_EX32(simd_data(desc), VDATA, VM);
}
-static inline uint32_t vext_lmul(uint32_t desc)
-{
- return FIELD_EX32(simd_data(desc), VDATA, LMUL);
-}
-
-static uint32_t vext_wd(uint32_t desc)
+/*
+ * Encode LMUL to lmul as following:
+ * LMUL vlmul lmul
+ * 1 000 0
+ * 2 001 1
+ * 4 010 2
+ * 8 011 3
+ * - 100 -
+ * 1/8 101 -3
+ * 1/4 110 -2
+ * 1/2 111 -1
+ */
+static inline int32_t vext_lmul(uint32_t desc)
{
- return (simd_data(desc) >> 11) & 0x1;
+ return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
}
/*
- * Get vector group length in bytes. Its range is [64, 2048].
+ * Get the maximum number of elements can be operated.
*
- * As simd_desc support at most 256, the max vlen is 512 bits.
- * So vlen in bytes is encoded as maxsz.
+ * esz: log2 of element size in bytes.
*/
-static inline uint32_t vext_maxsz(uint32_t desc)
+static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
{
- return simd_maxsz(desc) << vext_lmul(desc);
+ /*
+ * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
+ * so vlen in bytes (vlenb) is encoded as maxsz.
+ */
+ uint32_t vlenb = simd_maxsz(desc);
+
+ /* Return VLMAX */
+ int scale = vext_lmul(desc) - esz;
+ return scale < 0 ? vlenb >> -scale : vlenb << scale;
}
/*
@@ -139,108 +162,43 @@ static void probe_pages(CPURISCVState *env, target_ulong addr,
}
}
-#ifdef HOST_WORDS_BIGENDIAN
-static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
-{
- /*
- * Split the remaining range to two parts.
- * The first part is in the last uint64_t unit.
- * The second part start from the next uint64_t unit.
- */
- int part1 = 0, part2 = tot - cnt;
- if (cnt % 8) {
- part1 = 8 - (cnt % 8);
- part2 = tot - cnt - part1;
- memset(QEMU_ALIGN_PTR_DOWN(tail, 8), 0, part1);
- memset(QEMU_ALIGN_PTR_UP(tail, 8), 0, part2);
- } else {
- memset(tail, 0, part2);
- }
-}
-#else
-static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
+static inline void vext_set_elem_mask(void *v0, int index,
+ uint8_t value)
{
- memset(tail, 0, tot - cnt);
-}
-#endif
-
-static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
-{
- int8_t *cur = ((int8_t *)vd + H1(idx));
- vext_clear(cur, cnt, tot);
-}
-
-static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
-{
- int16_t *cur = ((int16_t *)vd + H2(idx));
- vext_clear(cur, cnt, tot);
-}
-
-static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
-{
- int32_t *cur = ((int32_t *)vd + H4(idx));
- vext_clear(cur, cnt, tot);
-}
-
-static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
-{
- int64_t *cur = (int64_t *)vd + idx;
- vext_clear(cur, cnt, tot);
-}
-
-static inline void vext_set_elem_mask(void *v0, int mlen, int index,
- uint8_t value)
-{
- int idx = (index * mlen) / 64;
- int pos = (index * mlen) % 64;
+ int idx = index / 64;
+ int pos = index % 64;
uint64_t old = ((uint64_t *)v0)[idx];
- ((uint64_t *)v0)[idx] = deposit64(old, pos, mlen, value);
+ ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
}
-static inline int vext_elem_mask(void *v0, int mlen, int index)
+/*
+ * Earlier designs (pre-0.9) had a varying number of bits
+ * per mask value (MLEN). In the 0.9 design, MLEN=1.
+ * (Section 4.5)
+ */
+static inline int vext_elem_mask(void *v0, int index)
{
- int idx = (index * mlen) / 64;
- int pos = (index * mlen) % 64;
+ int idx = index / 64;
+ int pos = index % 64;
return (((uint64_t *)v0)[idx] >> pos) & 1;
}
/* elements operations for load and store */
typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
uint32_t idx, void *vd, uintptr_t retaddr);
-typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot);
-#define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \
+#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
static void NAME(CPURISCVState *env, abi_ptr addr, \
uint32_t idx, void *vd, uintptr_t retaddr)\
{ \
- MTYPE data; \
ETYPE *cur = ((ETYPE *)vd + H(idx)); \
- data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
- *cur = data; \
+ *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
} \
-GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb)
-GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb)
-GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb)
-GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb)
-GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw)
-GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw)
-GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw)
-GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl)
-GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl)
-GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb)
-GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw)
-GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl)
-GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq)
-GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub)
-GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub)
-GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub)
-GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub)
-GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw)
-GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw)
-GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw)
-GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl)
-GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl)
+GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
+GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
+GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
+GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
static void NAME(CPURISCVState *env, abi_ptr addr, \
@@ -250,15 +208,6 @@ static void NAME(CPURISCVState *env, abi_ptr addr, \
cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
}
-GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb)
-GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb)
-GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb)
-GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb)
-GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw)
-GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw)
-GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw)
-GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl)
-GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl)
GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
@@ -271,100 +220,57 @@ static void
vext_ldst_stride(void *vd, void *v0, target_ulong base,
target_ulong stride, CPURISCVState *env,
uint32_t desc, uint32_t vm,
- vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
- uint32_t esz, uint32_t msz, uintptr_t ra,
- MMUAccessType access_type)
+ vext_ldst_elem_fn *ldst_elem,
+ uint32_t esz, uintptr_t ra, MMUAccessType access_type)
{
uint32_t i, k;
uint32_t nf = vext_nf(desc);
- uint32_t mlen = vext_mlen(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t max_elems = vext_max_elems(desc, esz);
- /* probe every access*/
- for (i = 0; i < env->vl; i++) {
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ for (i = env->vstart; i < env->vl; i++, env->vstart++) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
- probe_pages(env, base + stride * i, nf * msz, ra, access_type);
- }
- /* do real access */
- for (i = 0; i < env->vl; i++) {
+
k = 0;
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
- continue;
- }
while (k < nf) {
- target_ulong addr = base + stride * i + k * msz;
- ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ target_ulong addr = base + stride * i + (k << esz);
+ ldst_elem(env, addr, i + k * max_elems, vd, ra);
k++;
}
}
- /* clear tail elements */
- if (clear_elem) {
- for (k = 0; k < nf; k++) {
- clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
- }
- }
+ env->vstart = 0;
}
-#define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \
+#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
target_ulong stride, CPURISCVState *env, \
uint32_t desc) \
{ \
uint32_t vm = vext_vm(desc); \
vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
- CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \
- GETPC(), MMU_DATA_LOAD); \
-}
-
-GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b, clearb)
-GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h, clearh)
-GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w, clearl)
-GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d, clearq)
-GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h, clearh)
-GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w, clearl)
-GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d, clearq)
-GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w, clearl)
-GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d, clearq)
-GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b, clearb)
-GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h, clearh)
-GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w, clearl)
-GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d, clearq)
-GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b, clearb)
-GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h, clearh)
-GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w, clearl)
-GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d, clearq)
-GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
-GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
-GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
-GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
-GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
-
-#define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \
+ ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
+}
+
+GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
+GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
+GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
+GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
+
+#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
target_ulong stride, CPURISCVState *env, \
uint32_t desc) \
{ \
uint32_t vm = vext_vm(desc); \
vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
- NULL, sizeof(ETYPE), sizeof(MTYPE), \
- GETPC(), MMU_DATA_STORE); \
-}
-
-GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b)
-GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h)
-GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w)
-GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d)
-GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h)
-GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w)
-GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d)
-GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w)
-GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d)
-GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b)
-GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h)
-GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w)
-GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d)
+ ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
+}
+
+GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
+GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
+GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
+GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
/*
*** unit-stride: access elements stored contiguously in memory
@@ -373,31 +279,23 @@ GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d)
/* unmasked unit-stride load and store operation*/
static void
vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
- vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
- uint32_t esz, uint32_t msz, uintptr_t ra,
- MMUAccessType access_type)
+ vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
+ uintptr_t ra, MMUAccessType access_type)
{
uint32_t i, k;
uint32_t nf = vext_nf(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t max_elems = vext_max_elems(desc, esz);
- /* probe every access */
- probe_pages(env, base, env->vl * nf * msz, ra, access_type);
/* load bytes from guest memory */
- for (i = 0; i < env->vl; i++) {
+ for (i = env->vstart; i < evl; i++, env->vstart++) {
k = 0;
while (k < nf) {
- target_ulong addr = base + (i * nf + k) * msz;
- ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ target_ulong addr = base + ((i * nf + k) << esz);
+ ldst_elem(env, addr, i + k * max_elems, vd, ra);
k++;
}
}
- /* clear tail elements */
- if (clear_elem) {
- for (k = 0; k < nf; k++) {
- clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
- }
- }
+ env->vstart = 0;
}
/*
@@ -405,76 +303,68 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
* stride = NF * sizeof (MTYPE)
*/
-#define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \
+#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \
+ uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
- CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \
- GETPC(), MMU_DATA_LOAD); \
-} \
- \
-void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
- CPURISCVState *env, uint32_t desc) \
-{ \
- vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN, \
- sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \
-}
-
-GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b, clearb)
-GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h, clearh)
-GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w, clearl)
-GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d, clearq)
-GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h, clearh)
-GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w, clearl)
-GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d, clearq)
-GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w, clearl)
-GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d, clearq)
-GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b, clearb)
-GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h, clearh)
-GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w, clearl)
-GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d, clearq)
-GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b, clearb)
-GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h, clearh)
-GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w, clearl)
-GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d, clearq)
-GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
-GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
-GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
-GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
-GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
-
-#define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \
-void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
- CPURISCVState *env, uint32_t desc) \
-{ \
- uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \
- vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
- NULL, sizeof(ETYPE), sizeof(MTYPE), \
- GETPC(), MMU_DATA_STORE); \
+ ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
} \
\
void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
CPURISCVState *env, uint32_t desc) \
{ \
- vext_ldst_us(vd, base, env, desc, STORE_FN, NULL, \
- sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\
-}
-
-GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b)
-GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h)
-GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w)
-GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d)
-GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h)
-GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w)
-GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d)
-GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w)
-GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d)
-GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b)
-GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h)
-GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w)
-GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d)
+ vext_ldst_us(vd, base, env, desc, LOAD_FN, \
+ ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
+}
+
+GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
+GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
+GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
+GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
+
+#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
+void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
+ vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
+ ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
+} \
+ \
+void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ vext_ldst_us(vd, base, env, desc, STORE_FN, \
+ ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
+}
+
+GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
+GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
+GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
+GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
+
+/*
+ *** unit stride mask load and store, EEW = 1
+ */
+void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
+ CPURISCVState *env, uint32_t desc)
+{
+ /* evl = ceil(vl/8) */
+ uint8_t evl = (env->vl + 7) >> 3;
+ vext_ldst_us(vd, base, env, desc, lde_b,
+ 0, evl, GETPC(), MMU_DATA_LOAD);
+}
+
+void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
+ CPURISCVState *env, uint32_t desc)
+{
+ /* evl = ceil(vl/8) */
+ uint8_t evl = (env->vl + 7) >> 3;
+ vext_ldst_us(vd, base, env, desc, ste_b,
+ 0, evl, GETPC(), MMU_DATA_STORE);
+}
/*
*** index: access vector element from indexed memory
@@ -489,108 +379,89 @@ static target_ulong NAME(target_ulong base, \
return (base + *((ETYPE *)vs2 + H(idx))); \
}
-GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1)
-GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2)
-GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4)
-GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8)
+GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
+GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
+GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
+GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
static inline void
vext_ldst_index(void *vd, void *v0, target_ulong base,
void *vs2, CPURISCVState *env, uint32_t desc,
vext_get_index_addr get_index_addr,
vext_ldst_elem_fn *ldst_elem,
- clear_fn *clear_elem,
- uint32_t esz, uint32_t msz, uintptr_t ra,
- MMUAccessType access_type)
+ uint32_t esz, uintptr_t ra, MMUAccessType access_type)
{
uint32_t i, k;
uint32_t nf = vext_nf(desc);
uint32_t vm = vext_vm(desc);
- uint32_t mlen = vext_mlen(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t max_elems = vext_max_elems(desc, esz);
- /* probe every access*/
- for (i = 0; i < env->vl; i++) {
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
- continue;
- }
- probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra,
- access_type);
- }
/* load bytes from guest memory */
- for (i = 0; i < env->vl; i++) {
- k = 0;
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ for (i = env->vstart; i < env->vl; i++, env->vstart++) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
+
+ k = 0;
while (k < nf) {
- abi_ptr addr = get_index_addr(base, i, vs2) + k * msz;
- ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
+ ldst_elem(env, addr, i + k * max_elems, vd, ra);
k++;
}
}
- /* clear tail elements */
- if (clear_elem) {
- for (k = 0; k < nf; k++) {
- clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
- }
- }
+ env->vstart = 0;
}
-#define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \
+#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
- LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \
- GETPC(), MMU_DATA_LOAD); \
-}
-
-GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b, clearb)
-GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h, clearh)
-GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w, clearl)
-GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d, clearq)
-GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h, clearh)
-GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w, clearl)
-GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d, clearq)
-GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w, clearl)
-GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d, clearq)
-GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b, clearb)
-GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h, clearh)
-GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w, clearl)
-GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d, clearq)
-GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b, clearb)
-GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h, clearh)
-GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w, clearl)
-GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d, clearq)
-GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh)
-GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl)
-GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq)
-GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl)
-GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq)
-
-#define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\
+ LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
+}
+
+GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
+GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
+GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
+GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
+GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
+GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
+GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
+GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
+GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
+GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
+GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
+GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
+GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
+GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
+GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
+GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
+
+#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
- STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\
+ STORE_FN, ctzl(sizeof(ETYPE)), \
GETPC(), MMU_DATA_STORE); \
}
-GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b)
-GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h)
-GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w)
-GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d)
-GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h)
-GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w)
-GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d)
-GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w)
-GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d)
-GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b)
-GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h)
-GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w)
-GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d)
+GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
+GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
+GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
+GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
+GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
+GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
+GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
+GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
+GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
+GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
+GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
+GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
+GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
+GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
+GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
+GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
/*
*** unit-stride fault-only-fisrt load instructions
@@ -599,40 +470,38 @@ static inline void
vext_ldff(void *vd, void *v0, target_ulong base,
CPURISCVState *env, uint32_t desc,
vext_ldst_elem_fn *ldst_elem,
- clear_fn *clear_elem,
- uint32_t esz, uint32_t msz, uintptr_t ra)
+ uint32_t esz, uintptr_t ra)
{
void *host;
uint32_t i, k, vl = 0;
- uint32_t mlen = vext_mlen(desc);
uint32_t nf = vext_nf(desc);
uint32_t vm = vext_vm(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t max_elems = vext_max_elems(desc, esz);
target_ulong addr, offset, remain;
/* probe every access*/
- for (i = 0; i < env->vl; i++) {
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ for (i = env->vstart; i < env->vl; i++) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
- addr = base + nf * i * msz;
+ addr = base + i * (nf << esz);
if (i == 0) {
- probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
+ probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
} else {
/* if it triggers an exception, no need to check watchpoint */
- remain = nf * msz;
+ remain = nf << esz;
while (remain > 0) {
offset = -(addr | TARGET_PAGE_MASK);
host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
cpu_mmu_index(env, false));
if (host) {
#ifdef CONFIG_USER_ONLY
- if (page_check_range(addr, nf * msz, PAGE_READ) < 0) {
+ if (page_check_range(addr, nf << esz, PAGE_READ) < 0) {
vl = i;
goto ProbeSuccess;
}
#else
- probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
+ probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
#endif
} else {
vl = i;
@@ -651,89 +520,39 @@ ProbeSuccess:
if (vl != 0) {
env->vl = vl;
}
- for (i = 0; i < env->vl; i++) {
+ for (i = env->vstart; i < env->vl; i++) {
k = 0;
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
while (k < nf) {
- target_ulong addr = base + (i * nf + k) * msz;
- ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ target_ulong addr = base + ((i * nf + k) << esz);
+ ldst_elem(env, addr, i + k * max_elems, vd, ra);
k++;
}
}
- /* clear tail elements */
- if (vl != 0) {
- return;
- }
- for (k = 0; k < nf; k++) {
- clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
- }
+ env->vstart = 0;
}
-#define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \
-void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
- CPURISCVState *env, uint32_t desc) \
-{ \
- vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN, \
- sizeof(ETYPE), sizeof(MTYPE), GETPC()); \
-}
-
-GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b, clearb)
-GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h, clearh)
-GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w, clearl)
-GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d, clearq)
-GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h, clearh)
-GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w, clearl)
-GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d, clearq)
-GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w, clearl)
-GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d, clearq)
-GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b, clearb)
-GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h, clearh)
-GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w, clearl)
-GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d, clearq)
-GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b, clearb)
-GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h, clearh)
-GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w, clearl)
-GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d, clearq)
-GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh)
-GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl)
-GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq)
-GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl)
-GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq)
+#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
+void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
+ ctzl(sizeof(ETYPE)), GETPC()); \
+}
-/*
- *** Vector AMO Operations (Zvamo)
- */
-typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr,
- uint32_t wd, uint32_t idx, CPURISCVState *env,
- uintptr_t retaddr);
+GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
+GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
+GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
+GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
-/* no atomic opreation for vector atomic insructions */
#define DO_SWAP(N, M) (M)
#define DO_AND(N, M) (N & M)
#define DO_XOR(N, M) (N ^ M)
#define DO_OR(N, M) (N | M)
#define DO_ADD(N, M) (N + M)
-#define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \
-static void \
-vext_##NAME##_noatomic_op(void *vs3, target_ulong addr, \
- uint32_t wd, uint32_t idx, \
- CPURISCVState *env, uintptr_t retaddr)\
-{ \
- typedef int##ESZ##_t ETYPE; \
- typedef int##MSZ##_t MTYPE; \
- typedef uint##MSZ##_t UMTYPE __attribute__((unused)); \
- ETYPE *pe3 = (ETYPE *)vs3 + H(idx); \
- MTYPE a = cpu_ld##SUF##_data(env, addr), b = *pe3; \
- \
- cpu_st##SUF##_data(env, addr, DO_OP(a, b)); \
- if (wd) { \
- *pe3 = a; \
- } \
-}
-
/* Signed min/max */
#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
@@ -742,103 +561,81 @@ vext_##NAME##_noatomic_op(void *vs3, target_ulong addr, \
#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w, 32, 32, H4, DO_ADD, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w, 32, 32, H4, DO_XOR, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w, 32, 32, H4, DO_AND, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w, 32, 32, H4, DO_OR, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w, 32, 32, H4, DO_MIN, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w, 32, 32, H4, DO_MAX, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d, 64, 32, H8, DO_ADD, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d, 64, 64, H8, DO_ADD, q)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d, 64, 32, H8, DO_XOR, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d, 64, 64, H8, DO_XOR, q)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d, 64, 32, H8, DO_AND, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d, 64, 64, H8, DO_AND, q)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d, 64, 32, H8, DO_OR, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d, 64, 64, H8, DO_OR, q)
-GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d, 64, 32, H8, DO_MIN, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d, 64, 64, H8, DO_MIN, q)
-GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d, 64, 32, H8, DO_MAX, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d, 64, 64, H8, DO_MAX, q)
-GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q)
-GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l)
-GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q)
-
-static inline void
-vext_amo_noatomic(void *vs3, void *v0, target_ulong base,
- void *vs2, CPURISCVState *env, uint32_t desc,
- vext_get_index_addr get_index_addr,
- vext_amo_noatomic_fn *noatomic_op,
- clear_fn *clear_elem,
- uint32_t esz, uint32_t msz, uintptr_t ra)
+/*
+ *** load and store whole register instructions
+ */
+static void
+vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
+ vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
+ MMUAccessType access_type)
{
- uint32_t i;
- target_long addr;
- uint32_t wd = vext_wd(desc);
- uint32_t vm = vext_vm(desc);
- uint32_t mlen = vext_mlen(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t i, k, off, pos;
+ uint32_t nf = vext_nf(desc);
+ uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
+ uint32_t max_elems = vlenb >> esz;
- for (i = 0; i < env->vl; i++) {
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
- continue;
+ k = env->vstart / max_elems;
+ off = env->vstart % max_elems;
+
+ if (off) {
+ /* load/store rest of elements of current segment pointed by vstart */
+ for (pos = off; pos < max_elems; pos++, env->vstart++) {
+ target_ulong addr = base + ((pos + k * max_elems) << esz);
+ ldst_elem(env, addr, pos + k * max_elems, vd, ra);
}
- probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD);
- probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE);
+ k++;
}
- for (i = 0; i < env->vl; i++) {
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
- continue;
+
+ /* load/store elements for rest of segments */
+ for (; k < nf; k++) {
+ for (i = 0; i < max_elems; i++, env->vstart++) {
+ target_ulong addr = base + ((i + k * max_elems) << esz);
+ ldst_elem(env, addr, i + k * max_elems, vd, ra);
}
- addr = get_index_addr(base, i, vs2);
- noatomic_op(vs3, addr, wd, i, env, ra);
}
- clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz);
+
+ env->vstart = 0;
}
-#define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN) \
-void HELPER(NAME)(void *vs3, void *v0, target_ulong base, \
- void *vs2, CPURISCVState *env, uint32_t desc) \
-{ \
- vext_amo_noatomic(vs3, v0, base, vs2, env, desc, \
- INDEX_FN, vext_##NAME##_noatomic_op, \
- CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \
- GETPC()); \
-}
-
-GEN_VEXT_AMO(vamoswapw_v_d, int32_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamoswapd_v_d, int64_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamoaddw_v_d, int32_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamoaddd_v_d, int64_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamoxorw_v_d, int32_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamoxord_v_d, int64_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamoandw_v_d, int32_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamoandd_v_d, int64_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamoorw_v_d, int32_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamoord_v_d, int64_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamominw_v_d, int32_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamomind_v_d, int64_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamomaxw_v_d, int32_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamomaxd_v_d, int64_t, int64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq)
-GEN_VEXT_AMO(vamoswapw_v_w, int32_t, int32_t, idx_w, clearl)
-GEN_VEXT_AMO(vamoaddw_v_w, int32_t, int32_t, idx_w, clearl)
-GEN_VEXT_AMO(vamoxorw_v_w, int32_t, int32_t, idx_w, clearl)
-GEN_VEXT_AMO(vamoandw_v_w, int32_t, int32_t, idx_w, clearl)
-GEN_VEXT_AMO(vamoorw_v_w, int32_t, int32_t, idx_w, clearl)
-GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl)
-GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl)
-GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl)
-GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
+#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
+void HELPER(NAME)(void *vd, target_ulong base, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
+ ctzl(sizeof(ETYPE)), GETPC(), \
+ MMU_DATA_LOAD); \
+}
+
+GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
+GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
+GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
+GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
+GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
+GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
+GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
+GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
+GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
+GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
+GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
+GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
+GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
+GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
+GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
+GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
+
+#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
+void HELPER(NAME)(void *vd, target_ulong base, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ vext_ldst_whole(vd, base, env, desc, STORE_FN, \
+ ctzl(sizeof(ETYPE)), GETPC(), \
+ MMU_DATA_STORE); \
+}
+
+GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
+GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
+GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
+GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
/*
*** Vector Integer Arithmetic Instructions
@@ -904,41 +701,39 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
CPURISCVState *env, uint32_t desc,
uint32_t esz, uint32_t dsz,
- opivv2_fn *fn, clear_fn *clearfn)
+ opivv2_fn *fn)
{
- uint32_t vlmax = vext_maxsz(desc) / esz;
- uint32_t mlen = vext_mlen(desc);
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
uint32_t i;
- for (i = 0; i < vl; i++) {
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ for (i = env->vstart; i < vl; i++) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
fn(vd, vs1, vs2, i);
}
- clearfn(vd, vl, vl * dsz, vlmax * dsz);
+ env->vstart = 0;
}
/* generate the helpers for OPIVV */
-#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \
+#define GEN_VEXT_VV(NAME, ESZ, DSZ) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
- do_##NAME, CLEAR_FN); \
+ do_##NAME); \
}
-GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vadd_vv_b, 1, 1)
+GEN_VEXT_VV(vadd_vv_h, 2, 2)
+GEN_VEXT_VV(vadd_vv_w, 4, 4)
+GEN_VEXT_VV(vadd_vv_d, 8, 8)
+GEN_VEXT_VV(vsub_vv_b, 1, 1)
+GEN_VEXT_VV(vsub_vv_h, 2, 2)
+GEN_VEXT_VV(vsub_vv_w, 4, 4)
+GEN_VEXT_VV(vsub_vv_d, 8, 8)
typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
@@ -969,45 +764,43 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
CPURISCVState *env, uint32_t desc,
uint32_t esz, uint32_t dsz,
- opivx2_fn fn, clear_fn *clearfn)
+ opivx2_fn fn)
{
- uint32_t vlmax = vext_maxsz(desc) / esz;
- uint32_t mlen = vext_mlen(desc);
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
uint32_t i;
- for (i = 0; i < vl; i++) {
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ for (i = env->vstart; i < vl; i++) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
fn(vd, s1, vs2, i);
}
- clearfn(vd, vl, vl * dsz, vlmax * dsz);
+ env->vstart = 0;
}
/* generate the helpers for OPIVX */
-#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \
+#define GEN_VEXT_VX(NAME, ESZ, DSZ) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
- do_##NAME, CLEAR_FN); \
-}
-
-GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq)
+ do_##NAME); \
+}
+
+GEN_VEXT_VX(vadd_vx_b, 1, 1)
+GEN_VEXT_VX(vadd_vx_h, 2, 2)
+GEN_VEXT_VX(vadd_vx_w, 4, 4)
+GEN_VEXT_VX(vadd_vx_d, 8, 8)
+GEN_VEXT_VX(vsub_vx_b, 1, 1)
+GEN_VEXT_VX(vsub_vx_h, 2, 2)
+GEN_VEXT_VX(vsub_vx_w, 4, 4)
+GEN_VEXT_VX(vsub_vx_d, 8, 8)
+GEN_VEXT_VX(vrsub_vx_b, 1, 1)
+GEN_VEXT_VX(vrsub_vx_h, 2, 2)
+GEN_VEXT_VX(vrsub_vx_w, 4, 4)
+GEN_VEXT_VX(vrsub_vx_d, 8, 8)
void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
{
@@ -1086,30 +879,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
-GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
+GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
+GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
+GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
+GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
+GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
+GEN_VEXT_VV(vwadd_vv_b, 1, 2)
+GEN_VEXT_VV(vwadd_vv_h, 2, 4)
+GEN_VEXT_VV(vwadd_vv_w, 4, 8)
+GEN_VEXT_VV(vwsub_vv_b, 1, 2)
+GEN_VEXT_VV(vwsub_vv_h, 2, 4)
+GEN_VEXT_VV(vwsub_vv_w, 4, 8)
+GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
+GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
+GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
+GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
+GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
+GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
+GEN_VEXT_VV(vwadd_wv_b, 1, 2)
+GEN_VEXT_VV(vwadd_wv_h, 2, 4)
+GEN_VEXT_VV(vwadd_wv_w, 4, 8)
+GEN_VEXT_VV(vwsub_wv_b, 1, 2)
+GEN_VEXT_VV(vwsub_wv_h, 2, 4)
+GEN_VEXT_VV(vwsub_wv_w, 4, 8)
RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
@@ -1135,93 +928,87 @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
-GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
+GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
+GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
+GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
+GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
+GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
+GEN_VEXT_VX(vwadd_vx_b, 1, 2)
+GEN_VEXT_VX(vwadd_vx_h, 2, 4)
+GEN_VEXT_VX(vwadd_vx_w, 4, 8)
+GEN_VEXT_VX(vwsub_vx_b, 1, 2)
+GEN_VEXT_VX(vwsub_vx_h, 2, 4)
+GEN_VEXT_VX(vwsub_vx_w, 4, 8)
+GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
+GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
+GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
+GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
+GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
+GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
+GEN_VEXT_VX(vwadd_wx_b, 1, 2)
+GEN_VEXT_VX(vwadd_wx_h, 2, 4)
+GEN_VEXT_VX(vwadd_wx_w, 4, 8)
+GEN_VEXT_VX(vwsub_wx_b, 1, 2)
+GEN_VEXT_VX(vwsub_wx_h, 2, 4)
+GEN_VEXT_VX(vwsub_wx_w, 4, 8)
/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
#define DO_VADC(N, M, C) (N + M + C)
#define DO_VSBC(N, M, C) (N - M - C)
-#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \
+#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vl = env->vl; \
- uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
- uint8_t carry = vext_elem_mask(v0, mlen, i); \
+ ETYPE carry = vext_elem_mask(v0, i); \
\
*((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
} \
- CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
+ env->vstart = 0; \
}
-GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC, clearb)
-GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC, clearh)
-GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC, clearl)
-GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC, clearq)
+GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
+GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
+GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
+GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
-GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC, clearb)
-GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC, clearh)
-GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC, clearl)
-GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC, clearq)
+GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
+GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
+GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
+GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
-#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \
+#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vl = env->vl; \
- uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
- uint8_t carry = vext_elem_mask(v0, mlen, i); \
+ ETYPE carry = vext_elem_mask(v0, i); \
\
*((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
} \
- CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
+ env->vstart = 0; \
}
-GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC, clearb)
-GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC, clearh)
-GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC, clearl)
-GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC, clearq)
+GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
+GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
+GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
+GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
-GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC, clearb)
-GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC, clearh)
-GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC, clearl)
-GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq)
+GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
+GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
+GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
+GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
(__typeof(N))(N + M) < N)
@@ -1231,21 +1018,17 @@ GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq)
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vl = env->vl; \
- uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
+ uint32_t vm = vext_vm(desc); \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
- uint8_t carry = vext_elem_mask(v0, mlen, i); \
- \
- vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1, carry));\
- } \
- for (; i < vlmax; i++) { \
- vext_set_elem_mask(vd, mlen, i, 0); \
+ ETYPE carry = !vm && vext_elem_mask(v0, i); \
+ vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
} \
+ env->vstart = 0; \
}
GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
@@ -1262,21 +1045,17 @@ GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vl = env->vl; \
- uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
+ uint32_t vm = vext_vm(desc); \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
- uint8_t carry = vext_elem_mask(v0, mlen, i); \
- \
- vext_set_elem_mask(vd, mlen, i, \
+ ETYPE carry = !vm && vext_elem_mask(v0, i); \
+ vext_set_elem_mask(vd, i, \
DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
} \
- for (; i < vlmax; i++) { \
- vext_set_elem_mask(vd, mlen, i, 0); \
- } \
+ env->vstart = 0; \
}
GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
@@ -1302,18 +1081,18 @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
-GEN_VEXT_VV(vand_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vand_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vand_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vand_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vor_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vor_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vor_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vor_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vxor_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vxor_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vxor_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vxor_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vand_vv_b, 1, 1)
+GEN_VEXT_VV(vand_vv_h, 2, 2)
+GEN_VEXT_VV(vand_vv_w, 4, 4)
+GEN_VEXT_VV(vand_vv_d, 8, 8)
+GEN_VEXT_VV(vor_vv_b, 1, 1)
+GEN_VEXT_VV(vor_vv_h, 2, 2)
+GEN_VEXT_VV(vor_vv_w, 4, 4)
+GEN_VEXT_VV(vor_vv_d, 8, 8)
+GEN_VEXT_VV(vxor_vv_b, 1, 1)
+GEN_VEXT_VV(vxor_vv_h, 2, 2)
+GEN_VEXT_VV(vxor_vv_w, 4, 4)
+GEN_VEXT_VV(vxor_vv_d, 8, 8)
RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
@@ -1327,111 +1106,105 @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
-GEN_VEXT_VX(vand_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vand_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vand_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vand_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vor_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vor_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vor_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vor_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vand_vx_b, 1, 1)
+GEN_VEXT_VX(vand_vx_h, 2, 2)
+GEN_VEXT_VX(vand_vx_w, 4, 4)
+GEN_VEXT_VX(vand_vx_d, 8, 8)
+GEN_VEXT_VX(vor_vx_b, 1, 1)
+GEN_VEXT_VX(vor_vx_h, 2, 2)
+GEN_VEXT_VX(vor_vx_w, 4, 4)
+GEN_VEXT_VX(vor_vx_d, 8, 8)
+GEN_VEXT_VX(vxor_vx_b, 1, 1)
+GEN_VEXT_VX(vxor_vx_h, 2, 2)
+GEN_VEXT_VX(vxor_vx_w, 4, 4)
+GEN_VEXT_VX(vxor_vx_d, 8, 8)
/* Vector Single-Width Bit Shift Instructions */
#define DO_SLL(N, M) (N << (M))
#define DO_SRL(N, M) (N >> (M))
/* generate the helpers for shift instructions with two vector operators */
-#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK, CLEAR_FN) \
+#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
- uint32_t esz = sizeof(TS1); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
*((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
} \
- CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
+ env->vstart = 0; \
}
-GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7, clearb)
-GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf, clearh)
-GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f, clearl)
-GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f, clearq)
+GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
+GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
+GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
+GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
-GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb)
-GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh)
-GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl)
-GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq)
+GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
+GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
+GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
+GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
-GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb)
-GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
-GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
-GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
+GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
+GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
+GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
+GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
/* generate the helpers for shift instructions with one vector and one scalar */
-#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK, CLEAR_FN) \
-void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
- void *vs2, CPURISCVState *env, uint32_t desc) \
-{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vm = vext_vm(desc); \
- uint32_t vl = env->vl; \
- uint32_t esz = sizeof(TD); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
- uint32_t i; \
- \
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
- continue; \
- } \
- TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
- *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
- } \
- CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
-}
-
-GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7, clearb)
-GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf, clearh)
-GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f, clearl)
-GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f, clearq)
-
-GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb)
-GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh)
-GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl)
-GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq)
-
-GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb)
-GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
-GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
-GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
+#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
+ void *vs2, CPURISCVState *env, uint32_t desc) \
+{ \
+ uint32_t vm = vext_vm(desc); \
+ uint32_t vl = env->vl; \
+ uint32_t i; \
+ \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
+ continue; \
+ } \
+ TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
+ *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
+ } \
+ env->vstart = 0; \
+}
+
+GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
+GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
+GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
+GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
+
+GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
+GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
+GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
+GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
+
+GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
+GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
+GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
+GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
/* Vector Narrowing Integer Right Shift Instructions */
-GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb)
-GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh)
-GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
-GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb)
-GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
-GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
-GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb)
-GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh)
-GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
-GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb)
-GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
-GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
+GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
+GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
+GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
+GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
+GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
+GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
+GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
+GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
+GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
+GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
+GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
+GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
/* Vector Integer Comparison Instructions */
#define DO_MSEQ(N, M) (N == M)
@@ -1444,23 +1217,19 @@ GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
- uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
- vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1)); \
- } \
- for (; i < vlmax; i++) { \
- vext_set_elem_mask(vd, mlen, i, 0); \
+ vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
} \
+ env->vstart = 0; \
}
GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
@@ -1497,23 +1266,19 @@ GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
- uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
- vext_set_elem_mask(vd, mlen, i, \
+ vext_set_elem_mask(vd, i, \
DO_OP(s2, (ETYPE)(target_long)s1)); \
} \
- for (; i < vlmax; i++) { \
- vext_set_elem_mask(vd, mlen, i, 0); \
- } \
+ env->vstart = 0; \
}
GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
@@ -1573,22 +1338,22 @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
-GEN_VEXT_VV(vminu_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vminu_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vminu_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vminu_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vmin_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vmin_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vmin_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vmin_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vmaxu_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vmaxu_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vmaxu_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vmaxu_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vmax_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vmax_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vmax_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vmax_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vminu_vv_b, 1, 1)
+GEN_VEXT_VV(vminu_vv_h, 2, 2)
+GEN_VEXT_VV(vminu_vv_w, 4, 4)
+GEN_VEXT_VV(vminu_vv_d, 8, 8)
+GEN_VEXT_VV(vmin_vv_b, 1, 1)
+GEN_VEXT_VV(vmin_vv_h, 2, 2)
+GEN_VEXT_VV(vmin_vv_w, 4, 4)
+GEN_VEXT_VV(vmin_vv_d, 8, 8)
+GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
+GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
+GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
+GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
+GEN_VEXT_VV(vmax_vv_b, 1, 1)
+GEN_VEXT_VV(vmax_vv_h, 2, 2)
+GEN_VEXT_VV(vmax_vv_w, 4, 4)
+GEN_VEXT_VV(vmax_vv_d, 8, 8)
RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
@@ -1606,22 +1371,22 @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
-GEN_VEXT_VX(vminu_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vminu_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vminu_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vminu_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vmin_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vmin_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vmin_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vmin_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vmaxu_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vmaxu_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vmaxu_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vmaxu_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vminu_vx_b, 1, 1)
+GEN_VEXT_VX(vminu_vx_h, 2, 2)
+GEN_VEXT_VX(vminu_vx_w, 4, 4)
+GEN_VEXT_VX(vminu_vx_d, 8, 8)
+GEN_VEXT_VX(vmin_vx_b, 1, 1)
+GEN_VEXT_VX(vmin_vx_h, 2, 2)
+GEN_VEXT_VX(vmin_vx_w, 4, 4)
+GEN_VEXT_VX(vmin_vx_d, 8, 8)
+GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
+GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
+GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
+GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
+GEN_VEXT_VX(vmax_vx_b, 1, 1)
+GEN_VEXT_VX(vmax_vx_h, 2, 2)
+GEN_VEXT_VX(vmax_vx_w, 4, 4)
+GEN_VEXT_VX(vmax_vx_d, 8, 8)
/* Vector Single-Width Integer Multiply Instructions */
#define DO_MUL(N, M) (N * M)
@@ -1629,10 +1394,10 @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
-GEN_VEXT_VV(vmul_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vmul_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vmul_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vmul_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vmul_vv_b, 1, 1)
+GEN_VEXT_VV(vmul_vv_h, 2, 2)
+GEN_VEXT_VV(vmul_vv_w, 4, 4)
+GEN_VEXT_VV(vmul_vv_d, 8, 8)
static int8_t do_mulh_b(int8_t s2, int8_t s1)
{
@@ -1736,18 +1501,18 @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
-GEN_VEXT_VV(vmulh_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vmulh_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vmulh_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vmulh_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vmulhu_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vmulhu_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vmulhu_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vmulhu_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vmulhsu_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vmulhsu_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vmulhsu_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vmulhsu_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vmulh_vv_b, 1, 1)
+GEN_VEXT_VV(vmulh_vv_h, 2, 2)
+GEN_VEXT_VV(vmulh_vv_w, 4, 4)
+GEN_VEXT_VV(vmulh_vv_d, 8, 8)
+GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
+GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
+GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
+GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
+GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
+GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
+GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
+GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
@@ -1765,22 +1530,22 @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
-GEN_VEXT_VX(vmul_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vmul_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vmul_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vmul_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vmulh_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vmulh_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vmulh_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vmulh_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vmulhu_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vmulhu_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vmulhu_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vmulhu_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vmul_vx_b, 1, 1)
+GEN_VEXT_VX(vmul_vx_h, 2, 2)
+GEN_VEXT_VX(vmul_vx_w, 4, 4)
+GEN_VEXT_VX(vmul_vx_d, 8, 8)
+GEN_VEXT_VX(vmulh_vx_b, 1, 1)
+GEN_VEXT_VX(vmulh_vx_h, 2, 2)
+GEN_VEXT_VX(vmulh_vx_w, 4, 4)
+GEN_VEXT_VX(vmulh_vx_d, 8, 8)
+GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
+GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
+GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
+GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
+GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
+GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
+GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
+GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
/* Vector Integer Divide Instructions */
#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
@@ -1806,22 +1571,22 @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
-GEN_VEXT_VV(vdivu_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vdivu_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vdivu_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vdivu_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vdiv_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vdiv_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vdiv_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vdiv_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vremu_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vremu_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vremu_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vremu_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vrem_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vrem_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vrem_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vrem_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vdivu_vv_b, 1, 1)
+GEN_VEXT_VV(vdivu_vv_h, 2, 2)
+GEN_VEXT_VV(vdivu_vv_w, 4, 4)
+GEN_VEXT_VV(vdivu_vv_d, 8, 8)
+GEN_VEXT_VV(vdiv_vv_b, 1, 1)
+GEN_VEXT_VV(vdiv_vv_h, 2, 2)
+GEN_VEXT_VV(vdiv_vv_w, 4, 4)
+GEN_VEXT_VV(vdiv_vv_d, 8, 8)
+GEN_VEXT_VV(vremu_vv_b, 1, 1)
+GEN_VEXT_VV(vremu_vv_h, 2, 2)
+GEN_VEXT_VV(vremu_vv_w, 4, 4)
+GEN_VEXT_VV(vremu_vv_d, 8, 8)
+GEN_VEXT_VV(vrem_vv_b, 1, 1)
+GEN_VEXT_VV(vrem_vv_h, 2, 2)
+GEN_VEXT_VV(vrem_vv_w, 4, 4)
+GEN_VEXT_VV(vrem_vv_d, 8, 8)
RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
@@ -1839,22 +1604,22 @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
-GEN_VEXT_VX(vdivu_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vdivu_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vdivu_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vdivu_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vdiv_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vdiv_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vdiv_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vdiv_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vremu_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vremu_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vremu_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vremu_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vdivu_vx_b, 1, 1)
+GEN_VEXT_VX(vdivu_vx_h, 2, 2)
+GEN_VEXT_VX(vdivu_vx_w, 4, 4)
+GEN_VEXT_VX(vdivu_vx_d, 8, 8)
+GEN_VEXT_VX(vdiv_vx_b, 1, 1)
+GEN_VEXT_VX(vdiv_vx_h, 2, 2)
+GEN_VEXT_VX(vdiv_vx_w, 4, 4)
+GEN_VEXT_VX(vdiv_vx_d, 8, 8)
+GEN_VEXT_VX(vremu_vx_b, 1, 1)
+GEN_VEXT_VX(vremu_vx_h, 2, 2)
+GEN_VEXT_VX(vremu_vx_w, 4, 4)
+GEN_VEXT_VX(vremu_vx_d, 8, 8)
+GEN_VEXT_VX(vrem_vx_b, 1, 1)
+GEN_VEXT_VX(vrem_vx_h, 2, 2)
+GEN_VEXT_VX(vrem_vx_w, 4, 4)
+GEN_VEXT_VX(vrem_vx_d, 8, 8)
/* Vector Widening Integer Multiply Instructions */
RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
@@ -1866,15 +1631,15 @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
-GEN_VEXT_VV(vwmul_vv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwmul_vv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwmul_vv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwmulu_vv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwmulu_vv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwmulu_vv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwmulsu_vv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwmulsu_vv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwmulsu_vv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwmul_vv_b, 1, 2)
+GEN_VEXT_VV(vwmul_vv_h, 2, 4)
+GEN_VEXT_VV(vwmul_vv_w, 4, 8)
+GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
+GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
+GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
+GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
+GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
+GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
@@ -1885,15 +1650,15 @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
-GEN_VEXT_VX(vwmul_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwmul_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwmul_vx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwmulu_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwmulu_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwmul_vx_b, 1, 2)
+GEN_VEXT_VX(vwmul_vx_h, 2, 4)
+GEN_VEXT_VX(vwmul_vx_w, 4, 8)
+GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
+GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
+GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
+GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
+GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
+GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
/* Vector Single-Width Integer Multiply-Add Instructions */
#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
@@ -1925,22 +1690,22 @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
-GEN_VEXT_VV(vmacc_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vmacc_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vmacc_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vmacc_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vnmsac_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vnmsac_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vnmsac_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vnmsac_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vmadd_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vmadd_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vmadd_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vmadd_vv_d, 8, 8, clearq)
-GEN_VEXT_VV(vnmsub_vv_b, 1, 1, clearb)
-GEN_VEXT_VV(vnmsub_vv_h, 2, 2, clearh)
-GEN_VEXT_VV(vnmsub_vv_w, 4, 4, clearl)
-GEN_VEXT_VV(vnmsub_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vmacc_vv_b, 1, 1)
+GEN_VEXT_VV(vmacc_vv_h, 2, 2)
+GEN_VEXT_VV(vmacc_vv_w, 4, 4)
+GEN_VEXT_VV(vmacc_vv_d, 8, 8)
+GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
+GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
+GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
+GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
+GEN_VEXT_VV(vmadd_vv_b, 1, 1)
+GEN_VEXT_VV(vmadd_vv_h, 2, 2)
+GEN_VEXT_VV(vmadd_vv_w, 4, 4)
+GEN_VEXT_VV(vmadd_vv_d, 8, 8)
+GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
+GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
+GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
+GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
@@ -1966,22 +1731,22 @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
-GEN_VEXT_VX(vmacc_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vmacc_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vmacc_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vmacc_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vnmsac_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vnmsac_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vnmsac_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vnmsac_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vmadd_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vmadd_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vmadd_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vmadd_vx_d, 8, 8, clearq)
-GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb)
-GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh)
-GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl)
-GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vmacc_vx_b, 1, 1)
+GEN_VEXT_VX(vmacc_vx_h, 2, 2)
+GEN_VEXT_VX(vmacc_vx_w, 4, 4)
+GEN_VEXT_VX(vmacc_vx_d, 8, 8)
+GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
+GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
+GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
+GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
+GEN_VEXT_VX(vmadd_vx_b, 1, 1)
+GEN_VEXT_VX(vmadd_vx_h, 2, 2)
+GEN_VEXT_VX(vmadd_vx_w, 4, 4)
+GEN_VEXT_VX(vmadd_vx_d, 8, 8)
+GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
+GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
+GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
+GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
/* Vector Widening Integer Multiply-Add Instructions */
RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
@@ -1993,15 +1758,15 @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
-GEN_VEXT_VV(vwmaccu_vv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwmaccu_vv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwmaccu_vv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwmacc_vv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwmacc_vv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwmacc_vv_w, 4, 8, clearq)
-GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2, clearh)
-GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4, clearl)
-GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8, clearq)
+GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
+GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
+GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
+GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
+GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
+GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
+GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
+GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
+GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
@@ -2015,106 +1780,96 @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
-GEN_VEXT_VX(vwmaccu_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwmaccu_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwmaccu_vx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwmacc_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwmacc_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwmacc_vx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq)
-GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh)
-GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl)
-GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq)
+GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
+GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
+GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
+GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
+GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
+GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
+GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
+GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
+GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
+GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
+GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
+GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
/* Vector Integer Merge and Move Instructions */
-#define GEN_VEXT_VMV_VV(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
uint32_t desc) \
{ \
uint32_t vl = env->vl; \
- uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
*((ETYPE *)vd + H(i)) = s1; \
} \
- CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
+ env->vstart = 0; \
}
-GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1, clearb)
-GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2, clearh)
-GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4, clearl)
-GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8, clearq)
+GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
+GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
+GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
+GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
-#define GEN_VEXT_VMV_VX(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
uint32_t desc) \
{ \
uint32_t vl = env->vl; \
- uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
*((ETYPE *)vd + H(i)) = (ETYPE)s1; \
} \
- CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
+ env->vstart = 0; \
}
-GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1, clearb)
-GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2, clearh)
-GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4, clearl)
-GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8, clearq)
+GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
+GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
+GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
+GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
-#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vl = env->vl; \
- uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
- ETYPE *vt = (!vext_elem_mask(v0, mlen, i) ? vs2 : vs1); \
+ for (i = env->vstart; i < vl; i++) { \
+ ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
*((ETYPE *)vd + H(i)) = *(vt + H(i)); \
} \
- CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
+ env->vstart = 0; \
}
-GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1, clearb)
-GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh)
-GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl)
-GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq)
+GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
+GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
+GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
+GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
-#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vl = env->vl; \
- uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
- ETYPE d = (!vext_elem_mask(v0, mlen, i) ? s2 : \
+ ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
(ETYPE)(target_long)s1); \
*((ETYPE *)vd + H(i)) = d; \
} \
- CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
+ env->vstart = 0; \
}
-GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb)
-GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh)
-GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl)
-GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq)
+GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
+GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
+GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
+GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
/*
*** Vector Fixed-Point Arithmetic Instructions
@@ -2142,57 +1897,54 @@ do_##NAME(void *vd, void *vs1, void *vs2, int i, \
static inline void
vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
CPURISCVState *env,
- uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm,
+ uint32_t vl, uint32_t vm, int vxrm,
opivv2_rm_fn *fn)
{
- for (uint32_t i = 0; i < vl; i++) {
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ for (uint32_t i = env->vstart; i < vl; i++) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
fn(vd, vs1, vs2, i, env, vxrm);
}
+ env->vstart = 0;
}
static inline void
vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
CPURISCVState *env,
uint32_t desc, uint32_t esz, uint32_t dsz,
- opivv2_rm_fn *fn, clear_fn *clearfn)
+ opivv2_rm_fn *fn)
{
- uint32_t vlmax = vext_maxsz(desc) / esz;
- uint32_t mlen = vext_mlen(desc);
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
switch (env->vxrm) {
case 0: /* rnu */
vext_vv_rm_1(vd, v0, vs1, vs2,
- env, vl, vm, mlen, 0, fn);
+ env, vl, vm, 0, fn);
break;
case 1: /* rne */
vext_vv_rm_1(vd, v0, vs1, vs2,
- env, vl, vm, mlen, 1, fn);
+ env, vl, vm, 1, fn);
break;
case 2: /* rdn */
vext_vv_rm_1(vd, v0, vs1, vs2,
- env, vl, vm, mlen, 2, fn);
+ env, vl, vm, 2, fn);
break;
default: /* rod */
vext_vv_rm_1(vd, v0, vs1, vs2,
- env, vl, vm, mlen, 3, fn);
+ env, vl, vm, 3, fn);
break;
}
-
- clearfn(vd, vl, vl * dsz, vlmax * dsz);
}
/* generate helpers for fixed point instructions with OPIVV format */
-#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ, CLEAR_FN) \
+#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
- do_##NAME, CLEAR_FN); \
+ do_##NAME); \
}
static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
@@ -2242,10 +1994,10 @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
-GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
CPURISCVState *env, int vxrm);
@@ -2262,67 +2014,64 @@ do_##NAME(void *vd, target_long s1, void *vs2, int i, \
static inline void
vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
CPURISCVState *env,
- uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm,
+ uint32_t vl, uint32_t vm, int vxrm,
opivx2_rm_fn *fn)
{
- for (uint32_t i = 0; i < vl; i++) {
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ for (uint32_t i = env->vstart; i < vl; i++) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
fn(vd, s1, vs2, i, env, vxrm);
}
+ env->vstart = 0;
}
static inline void
vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
CPURISCVState *env,
uint32_t desc, uint32_t esz, uint32_t dsz,
- opivx2_rm_fn *fn, clear_fn *clearfn)
+ opivx2_rm_fn *fn)
{
- uint32_t vlmax = vext_maxsz(desc) / esz;
- uint32_t mlen = vext_mlen(desc);
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
switch (env->vxrm) {
case 0: /* rnu */
vext_vx_rm_1(vd, v0, s1, vs2,
- env, vl, vm, mlen, 0, fn);
+ env, vl, vm, 0, fn);
break;
case 1: /* rne */
vext_vx_rm_1(vd, v0, s1, vs2,
- env, vl, vm, mlen, 1, fn);
+ env, vl, vm, 1, fn);
break;
case 2: /* rdn */
vext_vx_rm_1(vd, v0, s1, vs2,
- env, vl, vm, mlen, 2, fn);
+ env, vl, vm, 2, fn);
break;
default: /* rod */
vext_vx_rm_1(vd, v0, s1, vs2,
- env, vl, vm, mlen, 3, fn);
+ env, vl, vm, 3, fn);
break;
}
-
- clearfn(vd, vl, vl * dsz, vlmax * dsz);
}
/* generate helpers for fixed point instructions with OPIVX format */
-#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ, CLEAR_FN) \
+#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
- do_##NAME, CLEAR_FN); \
+ do_##NAME); \
}
RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
-GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4, clearl)
-GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8, clearq)
+GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
{
@@ -2368,19 +2117,19 @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
-GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
-GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4, clearl)
-GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8, clearq)
+GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
{
@@ -2429,19 +2178,19 @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
-GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
-GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4, clearl)
-GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8, clearq)
+GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
{
@@ -2487,19 +2236,19 @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
-GEN_VEXT_VV_RM(vssub_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vssub_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vssub_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_RM(vssub_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
-GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl)
-GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq)
+GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
/* Vector Single-Width Averaging Add and Subtract */
static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
@@ -2551,19 +2300,56 @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
-GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
-GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4, clearl)
-GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8, clearq)
+GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
+
+static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
+ uint32_t a, uint32_t b)
+{
+ uint64_t res = (uint64_t)a + b;
+ uint8_t round = get_round(vxrm, res, 1);
+
+ return (res >> 1) + round;
+}
+
+static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
+ uint64_t a, uint64_t b)
+{
+ uint64_t res = a + b;
+ uint8_t round = get_round(vxrm, res, 1);
+ uint64_t over = (uint64_t)(res < a) << 63;
+
+ return ((res >> 1) | over) + round;
+}
+
+RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
+RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
+RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
+RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
+GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
+
+RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
+RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
+RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
+RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
+GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
{
@@ -2587,19 +2373,56 @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
-GEN_VEXT_VV_RM(vasub_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vasub_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vasub_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_RM(vasub_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
-GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl)
-GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq)
+GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
+
+static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
+ uint32_t a, uint32_t b)
+{
+ int64_t res = (int64_t)a - b;
+ uint8_t round = get_round(vxrm, res, 1);
+
+ return (res >> 1) + round;
+}
+
+static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
+ uint64_t a, uint64_t b)
+{
+ uint64_t res = (uint64_t)a - b;
+ uint8_t round = get_round(vxrm, res, 1);
+ uint64_t over = (uint64_t)(res > a) << 63;
+
+ return ((res >> 1) | over) + round;
+}
+
+RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
+RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
+RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
+RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
+GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
+
+RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
+RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
+RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
+RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
+GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
@@ -2694,224 +2517,19 @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
-GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
-GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl)
-GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq)
-
-/* Vector Widening Saturating Scaled Multiply-Add */
-static inline uint16_t
-vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b,
- uint16_t c)
-{
- uint8_t round;
- uint16_t res = (uint16_t)a * b;
-
- round = get_round(vxrm, res, 4);
- res = (res >> 4) + round;
- return saddu16(env, vxrm, c, res);
-}
-
-static inline uint32_t
-vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b,
- uint32_t c)
-{
- uint8_t round;
- uint32_t res = (uint32_t)a * b;
-
- round = get_round(vxrm, res, 8);
- res = (res >> 8) + round;
- return saddu32(env, vxrm, c, res);
-}
-
-static inline uint64_t
-vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b,
- uint64_t c)
-{
- uint8_t round;
- uint64_t res = (uint64_t)a * b;
-
- round = get_round(vxrm, res, 16);
- res = (res >> 16) + round;
- return saddu64(env, vxrm, c, res);
-}
-
-#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
-static inline void \
-do_##NAME(void *vd, void *vs1, void *vs2, int i, \
- CPURISCVState *env, int vxrm) \
-{ \
- TX1 s1 = *((T1 *)vs1 + HS1(i)); \
- TX2 s2 = *((T2 *)vs2 + HS2(i)); \
- TD d = *((TD *)vd + HD(i)); \
- *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \
-}
-
-RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8)
-RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16)
-RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32)
-GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh)
-GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl)
-GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq)
-
-#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
-static inline void \
-do_##NAME(void *vd, target_long s1, void *vs2, int i, \
- CPURISCVState *env, int vxrm) \
-{ \
- TX2 s2 = *((T2 *)vs2 + HS2(i)); \
- TD d = *((TD *)vd + HD(i)); \
- *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \
-}
-
-RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8)
-RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16)
-RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32)
-GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh)
-GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl)
-GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq)
-
-static inline int16_t
-vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c)
-{
- uint8_t round;
- int16_t res = (int16_t)a * b;
-
- round = get_round(vxrm, res, 4);
- res = (res >> 4) + round;
- return sadd16(env, vxrm, c, res);
-}
-
-static inline int32_t
-vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c)
-{
- uint8_t round;
- int32_t res = (int32_t)a * b;
-
- round = get_round(vxrm, res, 8);
- res = (res >> 8) + round;
- return sadd32(env, vxrm, c, res);
-
-}
-
-static inline int64_t
-vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c)
-{
- uint8_t round;
- int64_t res = (int64_t)a * b;
-
- round = get_round(vxrm, res, 16);
- res = (res >> 16) + round;
- return sadd64(env, vxrm, c, res);
-}
-
-RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8)
-RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16)
-RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32)
-GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh)
-GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl)
-GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq)
-RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8)
-RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16)
-RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32)
-GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh)
-GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl)
-GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq)
-
-static inline int16_t
-vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c)
-{
- uint8_t round;
- int16_t res = a * (int16_t)b;
-
- round = get_round(vxrm, res, 4);
- res = (res >> 4) + round;
- return ssub16(env, vxrm, c, res);
-}
-
-static inline int32_t
-vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c)
-{
- uint8_t round;
- int32_t res = a * (int32_t)b;
-
- round = get_round(vxrm, res, 8);
- res = (res >> 8) + round;
- return ssub32(env, vxrm, c, res);
-}
-
-static inline int64_t
-vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c)
-{
- uint8_t round;
- int64_t res = a * (int64_t)b;
-
- round = get_round(vxrm, res, 16);
- res = (res >> 16) + round;
- return ssub64(env, vxrm, c, res);
-}
-
-RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8)
-RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16)
-RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32)
-GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh)
-GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl)
-GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq)
-RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8)
-RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16)
-RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32)
-GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh)
-GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl)
-GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq)
-
-static inline int16_t
-vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c)
-{
- uint8_t round;
- int16_t res = (int16_t)a * b;
-
- round = get_round(vxrm, res, 4);
- res = (res >> 4) + round;
- return ssub16(env, vxrm, c, res);
-}
-
-static inline int32_t
-vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c)
-{
- uint8_t round;
- int32_t res = (int32_t)a * b;
-
- round = get_round(vxrm, res, 8);
- res = (res >> 8) + round;
- return ssub32(env, vxrm, c, res);
-}
-
-static inline int64_t
-vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c)
-{
- uint8_t round;
- int64_t res = (int64_t)a * b;
-
- round = get_round(vxrm, res, 16);
- res = (res >> 16) + round;
- return ssub64(env, vxrm, c, res);
-}
-
-RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8)
-RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16)
-RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32)
-GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh)
-GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl)
-GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq)
+GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
/* Vector Single-Width Scaling Shift Instructions */
static inline uint8_t
@@ -2958,19 +2576,19 @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
-GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
-GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4, clearl)
-GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8, clearq)
+GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
static inline int8_t
vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
@@ -3017,19 +2635,19 @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
-GEN_VEXT_VV_RM(vssra_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vssra_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vssra_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_RM(vssra_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
+GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
+GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
+GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
-GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl)
-GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq)
+GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
+GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
+GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
+GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
/* Vector Narrowing Fixed-Point Clip Instructions */
static inline int8_t
@@ -3089,19 +2707,19 @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
}
}
-RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
-RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
-RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
-GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4, clearl)
+RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
+RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
+RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
+GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
+GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
+GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
-RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8)
-RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16)
-RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32)
-GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4, clearl)
+RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
+RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
+RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
+GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
+GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
+GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
static inline uint8_t
vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
@@ -3139,7 +2757,7 @@ static inline uint32_t
vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
{
uint8_t round, shift = b & 0x3f;
- int64_t res;
+ uint64_t res;
round = get_round(vxrm, a, shift);
res = (a >> shift) + round;
@@ -3151,19 +2769,19 @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
}
}
-RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
-RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
-RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
-GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1, clearb)
-GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4, clearl)
+RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
+RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
+RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
+GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
+GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
+GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
-RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8)
-RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16)
-RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32)
-GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb)
-GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh)
-GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl)
+RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
+RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
+RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
+GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
+GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
+GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
/*
*** Vector Float Point Arithmetic Instructions
@@ -3178,32 +2796,30 @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
*((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
}
-#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN) \
+#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
- uint32_t vlmax = vext_maxsz(desc) / ESZ; \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
do_##NAME(vd, vs1, vs2, i, env); \
} \
- CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \
+ env->vstart = 0; \
}
RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
-GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
@@ -3213,45 +2829,43 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
*((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
}
-#define GEN_VEXT_VF(NAME, ESZ, DSZ, CLEAR_FN) \
+#define GEN_VEXT_VF(NAME, ESZ, DSZ) \
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
- uint32_t vlmax = vext_maxsz(desc) / ESZ; \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
do_##NAME(vd, s1, vs2, i, env); \
} \
- CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \
+ env->vstart = 0; \
}
RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
-GEN_VEXT_VF(vfadd_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfadd_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfadd_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfadd_vf_h, 2, 2)
+GEN_VEXT_VF(vfadd_vf_w, 4, 4)
+GEN_VEXT_VF(vfadd_vf_d, 8, 8)
RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
-GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
-GEN_VEXT_VF(vfsub_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfsub_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfsub_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfsub_vf_h, 2, 2)
+GEN_VEXT_VF(vfsub_vf_w, 4, 4)
+GEN_VEXT_VF(vfsub_vf_d, 8, 8)
static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
{
@@ -3271,9 +2885,9 @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
-GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
+GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
+GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
/* Vector Widening Floating-Point Add/Subtract Instructions */
static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
@@ -3291,12 +2905,12 @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
-GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4, clearl)
-GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8, clearq)
+GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
+GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
-GEN_VEXT_VF(vfwadd_vf_h, 2, 4, clearl)
-GEN_VEXT_VF(vfwadd_vf_w, 4, 8, clearq)
+GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
+GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
{
@@ -3313,12 +2927,12 @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
-GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4, clearl)
-GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8, clearq)
+GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
+GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
-GEN_VEXT_VF(vfwsub_vf_h, 2, 4, clearl)
-GEN_VEXT_VF(vfwsub_vf_w, 4, 8, clearq)
+GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
+GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
{
@@ -3332,12 +2946,12 @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
-GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4, clearl)
-GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8, clearq)
+GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
+GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
-GEN_VEXT_VF(vfwadd_wf_h, 2, 4, clearl)
-GEN_VEXT_VF(vfwadd_wf_w, 4, 8, clearq)
+GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
+GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
{
@@ -3351,39 +2965,39 @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
-GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4, clearl)
-GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8, clearq)
+GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
+GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
-GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl)
-GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq)
+GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
+GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
-GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
-GEN_VEXT_VF(vfmul_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfmul_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfmul_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfmul_vf_h, 2, 2)
+GEN_VEXT_VF(vfmul_vf_w, 4, 4)
+GEN_VEXT_VF(vfmul_vf_d, 8, 8)
RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
-GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
-GEN_VEXT_VF(vfdiv_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfdiv_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfdiv_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
+GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
+GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
{
@@ -3403,9 +3017,9 @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
-GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
+GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
+GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
/* Vector Widening Floating-Point Multiply */
static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
@@ -3422,12 +3036,12 @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
}
RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
-GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4, clearl)
-GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8, clearq)
+GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
+GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
-GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl)
-GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq)
+GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
+GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
@@ -3458,9 +3072,9 @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
-GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
@@ -3474,9 +3088,9 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
-GEN_VEXT_VF(vfmacc_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfmacc_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfmacc_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
+GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
+GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3499,15 +3113,15 @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
-GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
-GEN_VEXT_VF(vfnmacc_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfnmacc_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfnmacc_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
+GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
+GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3527,15 +3141,15 @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
-GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
-GEN_VEXT_VF(vfmsac_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfmsac_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfmsac_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
+GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
+GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3555,15 +3169,15 @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
-GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
-GEN_VEXT_VF(vfnmsac_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfnmsac_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfnmsac_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
+GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
+GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3583,15 +3197,15 @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
-GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
-GEN_VEXT_VF(vfmadd_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfmadd_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfmadd_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
+GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
+GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3614,15 +3228,15 @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
-GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
-GEN_VEXT_VF(vfnmadd_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfnmadd_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfnmadd_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
+GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
+GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3642,15 +3256,15 @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
-GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
-GEN_VEXT_VF(vfmsub_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfmsub_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfmsub_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
+GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
+GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3670,15 +3284,15 @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
-GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
-GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
+GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
+GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
@@ -3695,12 +3309,12 @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
-GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4, clearl)
-GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8, clearq)
+GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
+GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
-GEN_VEXT_VF(vfwmacc_vf_h, 2, 4, clearl)
-GEN_VEXT_VF(vfwmacc_vf_w, 4, 8, clearq)
+GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
+GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
@@ -3718,12 +3332,12 @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
-GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4, clearl)
-GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8, clearq)
+GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
+GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
-GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4, clearl)
-GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8, clearq)
+GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
+GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
@@ -3741,12 +3355,12 @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
-GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4, clearl)
-GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8, clearq)
+GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
+GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
-GEN_VEXT_VF(vfwmsac_vf_h, 2, 4, clearl)
-GEN_VEXT_VF(vfwmsac_vf_w, 4, 8, clearq)
+GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
+GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
@@ -3764,12 +3378,12 @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
-GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4, clearl)
-GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8, clearq)
+GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
+GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
-GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl)
-GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq)
+GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
+GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
/* Vector Floating-Point Square-Root Instruction */
/* (TD, T2, TX2) */
@@ -3785,12 +3399,10 @@ static void do_##NAME(void *vd, void *vs2, int i, \
*((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
}
-#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ, CLEAR_FN) \
+#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \
void HELPER(NAME)(void *vd, void *v0, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t vlmax = vext_maxsz(desc) / ESZ; \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint32_t i; \
@@ -3798,48 +3410,421 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \
if (vl == 0) { \
return; \
} \
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
do_##NAME(vd, vs2, i, env); \
} \
- CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \
+ env->vstart = 0; \
}
RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
-GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh)
-GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl)
-GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq)
+GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
+GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
+GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
+
+/*
+ * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+ *
+ * Adapted from riscv-v-spec recip.c:
+ * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
+ */
+static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
+{
+ uint64_t sign = extract64(f, frac_size + exp_size, 1);
+ uint64_t exp = extract64(f, frac_size, exp_size);
+ uint64_t frac = extract64(f, 0, frac_size);
+
+ const uint8_t lookup_table[] = {
+ 52, 51, 50, 48, 47, 46, 44, 43,
+ 42, 41, 40, 39, 38, 36, 35, 34,
+ 33, 32, 31, 30, 30, 29, 28, 27,
+ 26, 25, 24, 23, 23, 22, 21, 20,
+ 19, 19, 18, 17, 16, 16, 15, 14,
+ 14, 13, 12, 12, 11, 10, 10, 9,
+ 9, 8, 7, 7, 6, 6, 5, 4,
+ 4, 3, 3, 2, 2, 1, 1, 0,
+ 127, 125, 123, 121, 119, 118, 116, 114,
+ 113, 111, 109, 108, 106, 105, 103, 102,
+ 100, 99, 97, 96, 95, 93, 92, 91,
+ 90, 88, 87, 86, 85, 84, 83, 82,
+ 80, 79, 78, 77, 76, 75, 74, 73,
+ 72, 71, 70, 70, 69, 68, 67, 66,
+ 65, 64, 63, 63, 62, 61, 60, 59,
+ 59, 58, 57, 56, 56, 55, 54, 53
+ };
+ const int precision = 7;
+
+ if (exp == 0 && frac != 0) { /* subnormal */
+ /* Normalize the subnormal. */
+ while (extract64(frac, frac_size - 1, 1) == 0) {
+ exp--;
+ frac <<= 1;
+ }
+
+ frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
+ }
+
+ int idx = ((exp & 1) << (precision - 1)) |
+ (frac >> (frac_size - precision + 1));
+ uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
+ (frac_size - precision);
+ uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
+
+ uint64_t val = 0;
+ val = deposit64(val, 0, frac_size, out_frac);
+ val = deposit64(val, frac_size, exp_size, out_exp);
+ val = deposit64(val, frac_size + exp_size, 1, sign);
+ return val;
+}
+
+static float16 frsqrt7_h(float16 f, float_status *s)
+{
+ int exp_size = 5, frac_size = 10;
+ bool sign = float16_is_neg(f);
+
+ /*
+ * frsqrt7(sNaN) = canonical NaN
+ * frsqrt7(-inf) = canonical NaN
+ * frsqrt7(-normal) = canonical NaN
+ * frsqrt7(-subnormal) = canonical NaN
+ */
+ if (float16_is_signaling_nan(f, s) ||
+ (float16_is_infinity(f) && sign) ||
+ (float16_is_normal(f) && sign) ||
+ (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
+ s->float_exception_flags |= float_flag_invalid;
+ return float16_default_nan(s);
+ }
+
+ /* frsqrt7(qNaN) = canonical NaN */
+ if (float16_is_quiet_nan(f, s)) {
+ return float16_default_nan(s);
+ }
+
+ /* frsqrt7(+-0) = +-inf */
+ if (float16_is_zero(f)) {
+ s->float_exception_flags |= float_flag_divbyzero;
+ return float16_set_sign(float16_infinity, sign);
+ }
+
+ /* frsqrt7(+inf) = +0 */
+ if (float16_is_infinity(f) && !sign) {
+ return float16_set_sign(float16_zero, sign);
+ }
+
+ /* +normal, +subnormal */
+ uint64_t val = frsqrt7(f, exp_size, frac_size);
+ return make_float16(val);
+}
+
+static float32 frsqrt7_s(float32 f, float_status *s)
+{
+ int exp_size = 8, frac_size = 23;
+ bool sign = float32_is_neg(f);
+
+ /*
+ * frsqrt7(sNaN) = canonical NaN
+ * frsqrt7(-inf) = canonical NaN
+ * frsqrt7(-normal) = canonical NaN
+ * frsqrt7(-subnormal) = canonical NaN
+ */
+ if (float32_is_signaling_nan(f, s) ||
+ (float32_is_infinity(f) && sign) ||
+ (float32_is_normal(f) && sign) ||
+ (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
+ s->float_exception_flags |= float_flag_invalid;
+ return float32_default_nan(s);
+ }
+
+ /* frsqrt7(qNaN) = canonical NaN */
+ if (float32_is_quiet_nan(f, s)) {
+ return float32_default_nan(s);
+ }
+
+ /* frsqrt7(+-0) = +-inf */
+ if (float32_is_zero(f)) {
+ s->float_exception_flags |= float_flag_divbyzero;
+ return float32_set_sign(float32_infinity, sign);
+ }
+
+ /* frsqrt7(+inf) = +0 */
+ if (float32_is_infinity(f) && !sign) {
+ return float32_set_sign(float32_zero, sign);
+ }
+
+ /* +normal, +subnormal */
+ uint64_t val = frsqrt7(f, exp_size, frac_size);
+ return make_float32(val);
+}
+
+static float64 frsqrt7_d(float64 f, float_status *s)
+{
+ int exp_size = 11, frac_size = 52;
+ bool sign = float64_is_neg(f);
+
+ /*
+ * frsqrt7(sNaN) = canonical NaN
+ * frsqrt7(-inf) = canonical NaN
+ * frsqrt7(-normal) = canonical NaN
+ * frsqrt7(-subnormal) = canonical NaN
+ */
+ if (float64_is_signaling_nan(f, s) ||
+ (float64_is_infinity(f) && sign) ||
+ (float64_is_normal(f) && sign) ||
+ (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
+ s->float_exception_flags |= float_flag_invalid;
+ return float64_default_nan(s);
+ }
+
+ /* frsqrt7(qNaN) = canonical NaN */
+ if (float64_is_quiet_nan(f, s)) {
+ return float64_default_nan(s);
+ }
+
+ /* frsqrt7(+-0) = +-inf */
+ if (float64_is_zero(f)) {
+ s->float_exception_flags |= float_flag_divbyzero;
+ return float64_set_sign(float64_infinity, sign);
+ }
+
+ /* frsqrt7(+inf) = +0 */
+ if (float64_is_infinity(f) && !sign) {
+ return float64_set_sign(float64_zero, sign);
+ }
+
+ /* +normal, +subnormal */
+ uint64_t val = frsqrt7(f, exp_size, frac_size);
+ return make_float64(val);
+}
+
+RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
+RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
+RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
+GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
+GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
+GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
+
+/*
+ * Vector Floating-Point Reciprocal Estimate Instruction
+ *
+ * Adapted from riscv-v-spec recip.c:
+ * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
+ */
+static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
+ float_status *s)
+{
+ uint64_t sign = extract64(f, frac_size + exp_size, 1);
+ uint64_t exp = extract64(f, frac_size, exp_size);
+ uint64_t frac = extract64(f, 0, frac_size);
+
+ const uint8_t lookup_table[] = {
+ 127, 125, 123, 121, 119, 117, 116, 114,
+ 112, 110, 109, 107, 105, 104, 102, 100,
+ 99, 97, 96, 94, 93, 91, 90, 88,
+ 87, 85, 84, 83, 81, 80, 79, 77,
+ 76, 75, 74, 72, 71, 70, 69, 68,
+ 66, 65, 64, 63, 62, 61, 60, 59,
+ 58, 57, 56, 55, 54, 53, 52, 51,
+ 50, 49, 48, 47, 46, 45, 44, 43,
+ 42, 41, 40, 40, 39, 38, 37, 36,
+ 35, 35, 34, 33, 32, 31, 31, 30,
+ 29, 28, 28, 27, 26, 25, 25, 24,
+ 23, 23, 22, 21, 21, 20, 19, 19,
+ 18, 17, 17, 16, 15, 15, 14, 14,
+ 13, 12, 12, 11, 11, 10, 9, 9,
+ 8, 8, 7, 7, 6, 5, 5, 4,
+ 4, 3, 3, 2, 2, 1, 1, 0
+ };
+ const int precision = 7;
+
+ if (exp == 0 && frac != 0) { /* subnormal */
+ /* Normalize the subnormal. */
+ while (extract64(frac, frac_size - 1, 1) == 0) {
+ exp--;
+ frac <<= 1;
+ }
+
+ frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
+
+ if (exp != 0 && exp != UINT64_MAX) {
+ /*
+ * Overflow to inf or max value of same sign,
+ * depending on sign and rounding mode.
+ */
+ s->float_exception_flags |= (float_flag_inexact |
+ float_flag_overflow);
+
+ if ((s->float_rounding_mode == float_round_to_zero) ||
+ ((s->float_rounding_mode == float_round_down) && !sign) ||
+ ((s->float_rounding_mode == float_round_up) && sign)) {
+ /* Return greatest/negative finite value. */
+ return (sign << (exp_size + frac_size)) |
+ (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
+ } else {
+ /* Return +-inf. */
+ return (sign << (exp_size + frac_size)) |
+ MAKE_64BIT_MASK(frac_size, exp_size);
+ }
+ }
+ }
+
+ int idx = frac >> (frac_size - precision);
+ uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
+ (frac_size - precision);
+ uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
+
+ if (out_exp == 0 || out_exp == UINT64_MAX) {
+ /*
+ * The result is subnormal, but don't raise the underflow exception,
+ * because there's no additional loss of precision.
+ */
+ out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
+ if (out_exp == UINT64_MAX) {
+ out_frac >>= 1;
+ out_exp = 0;
+ }
+ }
+
+ uint64_t val = 0;
+ val = deposit64(val, 0, frac_size, out_frac);
+ val = deposit64(val, frac_size, exp_size, out_exp);
+ val = deposit64(val, frac_size + exp_size, 1, sign);
+ return val;
+}
+
+static float16 frec7_h(float16 f, float_status *s)
+{
+ int exp_size = 5, frac_size = 10;
+ bool sign = float16_is_neg(f);
+
+ /* frec7(+-inf) = +-0 */
+ if (float16_is_infinity(f)) {
+ return float16_set_sign(float16_zero, sign);
+ }
+
+ /* frec7(+-0) = +-inf */
+ if (float16_is_zero(f)) {
+ s->float_exception_flags |= float_flag_divbyzero;
+ return float16_set_sign(float16_infinity, sign);
+ }
+
+ /* frec7(sNaN) = canonical NaN */
+ if (float16_is_signaling_nan(f, s)) {
+ s->float_exception_flags |= float_flag_invalid;
+ return float16_default_nan(s);
+ }
+
+ /* frec7(qNaN) = canonical NaN */
+ if (float16_is_quiet_nan(f, s)) {
+ return float16_default_nan(s);
+ }
+
+ /* +-normal, +-subnormal */
+ uint64_t val = frec7(f, exp_size, frac_size, s);
+ return make_float16(val);
+}
+
+static float32 frec7_s(float32 f, float_status *s)
+{
+ int exp_size = 8, frac_size = 23;
+ bool sign = float32_is_neg(f);
+
+ /* frec7(+-inf) = +-0 */
+ if (float32_is_infinity(f)) {
+ return float32_set_sign(float32_zero, sign);
+ }
+
+ /* frec7(+-0) = +-inf */
+ if (float32_is_zero(f)) {
+ s->float_exception_flags |= float_flag_divbyzero;
+ return float32_set_sign(float32_infinity, sign);
+ }
+
+ /* frec7(sNaN) = canonical NaN */
+ if (float32_is_signaling_nan(f, s)) {
+ s->float_exception_flags |= float_flag_invalid;
+ return float32_default_nan(s);
+ }
+
+ /* frec7(qNaN) = canonical NaN */
+ if (float32_is_quiet_nan(f, s)) {
+ return float32_default_nan(s);
+ }
+
+ /* +-normal, +-subnormal */
+ uint64_t val = frec7(f, exp_size, frac_size, s);
+ return make_float32(val);
+}
+
+static float64 frec7_d(float64 f, float_status *s)
+{
+ int exp_size = 11, frac_size = 52;
+ bool sign = float64_is_neg(f);
+
+ /* frec7(+-inf) = +-0 */
+ if (float64_is_infinity(f)) {
+ return float64_set_sign(float64_zero, sign);
+ }
+
+ /* frec7(+-0) = +-inf */
+ if (float64_is_zero(f)) {
+ s->float_exception_flags |= float_flag_divbyzero;
+ return float64_set_sign(float64_infinity, sign);
+ }
+
+ /* frec7(sNaN) = canonical NaN */
+ if (float64_is_signaling_nan(f, s)) {
+ s->float_exception_flags |= float_flag_invalid;
+ return float64_default_nan(s);
+ }
+
+ /* frec7(qNaN) = canonical NaN */
+ if (float64_is_quiet_nan(f, s)) {
+ return float64_default_nan(s);
+ }
+
+ /* +-normal, +-subnormal */
+ uint64_t val = frec7(f, exp_size, frac_size, s);
+ return make_float64(val);
+}
+
+RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
+RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
+RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
+GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
+GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
+GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
/* Vector Floating-Point MIN/MAX Instructions */
-RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum)
-RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum)
-RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum)
-GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8, clearq)
-RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum)
-RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum)
-RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum)
-GEN_VEXT_VF(vfmin_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfmin_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfmin_vf_d, 8, 8, clearq)
-
-RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum)
-RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum)
-RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum)
-GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8, clearq)
-RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum)
-RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum)
-RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum)
-GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq)
+RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
+RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
+RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
+GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
+RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
+RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
+RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
+GEN_VEXT_VF(vfmin_vf_h, 2, 2)
+GEN_VEXT_VF(vfmin_vf_w, 4, 4)
+GEN_VEXT_VF(vfmin_vf_d, 8, 8)
+
+RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
+RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
+RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
+GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
+RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
+RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
+RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
+GEN_VEXT_VF(vfmax_vf_h, 2, 2)
+GEN_VEXT_VF(vfmax_vf_w, 4, 4)
+GEN_VEXT_VF(vfmax_vf_d, 8, 8)
/* Vector Floating-Point Sign-Injection Instructions */
static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
@@ -3860,15 +3845,15 @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
-GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
-GEN_VEXT_VF(vfsgnj_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfsgnj_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfsgnj_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
+GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
+GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
{
@@ -3888,15 +3873,15 @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
-GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
-GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
+GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
+GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
{
@@ -3916,39 +3901,35 @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
-GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2, clearh)
-GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4, clearl)
-GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8, clearq)
+GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
+GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
+GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
-GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh)
-GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl)
-GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq)
+GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
+GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
+GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
/* Vector Floating-Point Compare Instructions */
#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
- uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
- vext_set_elem_mask(vd, mlen, i, \
+ vext_set_elem_mask(vd, i, \
DO_OP(s2, s1, &env->fp_status)); \
} \
- for (; i < vlmax; i++) { \
- vext_set_elem_mask(vd, mlen, i, 0); \
- } \
+ env->vstart = 0; \
}
GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
@@ -3959,23 +3940,19 @@ GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
- uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
- vext_set_elem_mask(vd, mlen, i, \
+ vext_set_elem_mask(vd, i, \
DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
} \
- for (; i < vlmax; i++) { \
- vext_set_elem_mask(vd, mlen, i, 0); \
- } \
+ env->vstart = 0; \
}
GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
@@ -4068,13 +4045,6 @@ GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
-GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet)
-GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet)
-GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet)
-GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet)
-GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet)
-GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet)
-
/* Vector Floating-Point Classify Instruction */
#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
static void do_##NAME(void *vd, void *vs2, int i) \
@@ -4083,23 +4053,21 @@ static void do_##NAME(void *vd, void *vs2, int i) \
*((TD *)vd + HD(i)) = OP(s2); \
}
-#define GEN_VEXT_V(NAME, ESZ, DSZ, CLEAR_FN) \
+#define GEN_VEXT_V(NAME, ESZ, DSZ) \
void HELPER(NAME)(void *vd, void *v0, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t vlmax = vext_maxsz(desc) / ESZ; \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
do_##NAME(vd, vs2, i); \
} \
- CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \
+ env->vstart = 0; \
}
target_ulong fclass_h(uint64_t frs1)
@@ -4162,97 +4130,99 @@ target_ulong fclass_d(uint64_t frs1)
RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
-GEN_VEXT_V(vfclass_v_h, 2, 2, clearh)
-GEN_VEXT_V(vfclass_v_w, 4, 4, clearl)
-GEN_VEXT_V(vfclass_v_d, 8, 8, clearq)
+GEN_VEXT_V(vfclass_v_h, 2, 2)
+GEN_VEXT_V(vfclass_v_w, 4, 4)
+GEN_VEXT_V(vfclass_v_d, 8, 8)
/* Vector Floating-Point Merge Instruction */
-#define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
- uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
*((ETYPE *)vd + H(i)) \
- = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1); \
+ = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
} \
- CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \
+ env->vstart = 0; \
}
-GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh)
-GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl)
-GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq)
+GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
+GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
+GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
/* Single-Width Floating-Point/Integer Type-Convert Instructions */
/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2, clearh)
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4, clearl)
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8, clearq)
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
-GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2, clearh)
-GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4, clearl)
-GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8, clearq)
+GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
+GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
+GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2, clearh)
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4, clearl)
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8, clearq)
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
-GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh)
-GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl)
-GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq)
+GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
+GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
+GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
/* Widening Floating-Point/Integer Type-Convert Instructions */
/* (TD, T2, TX2) */
+#define WOP_UU_B uint16_t, uint8_t, uint8_t
#define WOP_UU_H uint32_t, uint16_t, uint16_t
#define WOP_UU_W uint64_t, uint32_t, uint32_t
/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
-GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4, clearl)
-GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8, clearq)
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
-GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4, clearl)
-GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8, clearq)
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
+RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4, clearl)
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8, clearq)
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
+RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4, clearl)
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8, clearq)
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
/*
- * vfwcvt.f.f.v vd, vs2, vm #
+ * vfwcvt.f.f.v vd, vs2, vm
* Convert single-width float to double-width float.
*/
static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
@@ -4262,36 +4232,41 @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
-GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl)
-GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq)
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
/* Narrowing Floating-Point/Integer Type-Convert Instructions */
/* (TD, T2, TX2) */
+#define NOP_UU_B uint8_t, uint16_t, uint32_t
#define NOP_UU_H uint16_t, uint32_t, uint32_t
#define NOP_UU_W uint32_t, uint64_t, uint64_t
/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
-RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16)
-RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32)
-GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2, clearh)
-GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4, clearl)
+RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
+RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
+RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
-RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16)
-RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32)
-GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2, clearh)
-GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4, clearl)
+RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
+RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
+RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
+GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
+GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
+GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
-RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16)
-RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32)
-GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2, clearh)
-GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4, clearl)
+RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
+RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
+GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
+GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
-RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16)
-RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32)
-GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2, clearh)
-GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4, clearl)
+RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
+RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
+GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
+GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
static uint16_t vfncvtffv16(uint32_t a, float_status *s)
@@ -4299,179 +4274,171 @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s)
return float32_to_float16(a, true, s);
}
-RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
-RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
-GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh)
-GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl)
+RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
+RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
+GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
+GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
/*
*** Vector Reduction Operations
*/
/* Vector Single-Width Integer Reduction Instructions */
-#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
+#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint32_t i; \
- uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \
TD s1 = *((TD *)vs1 + HD(0)); \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
s1 = OP(s1, (TD)s2); \
} \
*((TD *)vd + HD(0)) = s1; \
- CLEAR_FN(vd, 1, sizeof(TD), tot); \
+ env->vstart = 0; \
}
/* vd[0] = sum(vs1[0], vs2[*]) */
-GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD, clearb)
-GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD, clearh)
-GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD, clearl)
-GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD, clearq)
+GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
+GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
+GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
+GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
/* vd[0] = maxu(vs1[0], vs2[*]) */
-GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX, clearb)
-GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX, clearh)
-GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX, clearl)
-GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX, clearq)
+GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
+GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
+GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
+GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
/* vd[0] = max(vs1[0], vs2[*]) */
-GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX, clearb)
-GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX, clearh)
-GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX, clearl)
-GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX, clearq)
+GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
+GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
+GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
+GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
/* vd[0] = minu(vs1[0], vs2[*]) */
-GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN, clearb)
-GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN, clearh)
-GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN, clearl)
-GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN, clearq)
+GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
+GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
+GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
+GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
/* vd[0] = min(vs1[0], vs2[*]) */
-GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN, clearb)
-GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN, clearh)
-GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN, clearl)
-GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN, clearq)
+GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
+GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
+GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
+GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
/* vd[0] = and(vs1[0], vs2[*]) */
-GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND, clearb)
-GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND, clearh)
-GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND, clearl)
-GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND, clearq)
+GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
+GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
+GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
+GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
/* vd[0] = or(vs1[0], vs2[*]) */
-GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR, clearb)
-GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR, clearh)
-GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR, clearl)
-GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR, clearq)
+GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
+GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
+GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
+GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
/* vd[0] = xor(vs1[0], vs2[*]) */
-GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb)
-GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh)
-GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl)
-GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq)
+GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
+GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
+GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
+GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
/* Vector Widening Integer Reduction Instructions */
/* signed sum reduction into double-width accumulator */
-GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD, clearh)
-GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD, clearl)
-GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq)
+GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
+GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
+GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
/* Unsigned sum reduction into double-width accumulator */
-GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh)
-GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl)
-GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq)
+GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
+GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
+GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
/* Vector Single-Width Floating-Point Reduction Instructions */
-#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
+#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint32_t i; \
- uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \
TD s1 = *((TD *)vs1 + HD(0)); \
\
- for (i = 0; i < vl; i++) { \
+ for (i = env->vstart; i < vl; i++) { \
TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
s1 = OP(s1, (TD)s2, &env->fp_status); \
} \
*((TD *)vd + HD(0)) = s1; \
- CLEAR_FN(vd, 1, sizeof(TD), tot); \
+ env->vstart = 0; \
}
/* Unordered sum */
-GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh)
-GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl)
-GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq)
+GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
+GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
+GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
/* Maximum value */
-GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, clearh)
-GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, clearl)
-GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq)
+GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
+GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
+GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
/* Minimum value */
-GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh)
-GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl)
-GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq)
+GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
+GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
+GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
/* Vector Widening Floating-Point Reduction Instructions */
/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
void *vs2, CPURISCVState *env, uint32_t desc)
{
- uint32_t mlen = vext_mlen(desc);
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
uint32_t i;
- uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
uint32_t s1 = *((uint32_t *)vs1 + H4(0));
- for (i = 0; i < vl; i++) {
+ for (i = env->vstart; i < vl; i++) {
uint16_t s2 = *((uint16_t *)vs2 + H2(i));
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
&env->fp_status);
}
*((uint32_t *)vd + H4(0)) = s1;
- clearl(vd, 1, sizeof(uint32_t), tot);
+ env->vstart = 0;
}
void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
void *vs2, CPURISCVState *env, uint32_t desc)
{
- uint32_t mlen = vext_mlen(desc);
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
uint32_t i;
- uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
uint64_t s1 = *((uint64_t *)vs1);
- for (i = 0; i < vl; i++) {
+ for (i = env->vstart; i < vl; i++) {
uint32_t s2 = *((uint32_t *)vs2 + H4(i));
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
&env->fp_status);
}
*((uint64_t *)vd) = s1;
- clearq(vd, 1, sizeof(uint64_t), tot);
+ env->vstart = 0;
}
/*
@@ -4483,20 +4450,16 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
uint32_t vl = env->vl; \
uint32_t i; \
int a, b; \
\
- for (i = 0; i < vl; i++) { \
- a = vext_elem_mask(vs1, mlen, i); \
- b = vext_elem_mask(vs2, mlen, i); \
- vext_set_elem_mask(vd, mlen, i, OP(b, a)); \
- } \
- for (; i < vlmax; i++) { \
- vext_set_elem_mask(vd, mlen, i, 0); \
+ for (i = env->vstart; i < vl; i++) { \
+ a = vext_elem_mask(vs1, i); \
+ b = vext_elem_mask(vs2, i); \
+ vext_set_elem_mask(vd, i, OP(b, a)); \
} \
+ env->vstart = 0; \
}
#define DO_NAND(N, M) (!(N & M))
@@ -4507,49 +4470,49 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
-GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
+GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
-GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
+GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
-/* Vector mask population count vmpopc */
-target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env,
- uint32_t desc)
+/* Vector count population in mask vcpop */
+target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
+ uint32_t desc)
{
target_ulong cnt = 0;
- uint32_t mlen = vext_mlen(desc);
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
int i;
- for (i = 0; i < vl; i++) {
- if (vm || vext_elem_mask(v0, mlen, i)) {
- if (vext_elem_mask(vs2, mlen, i)) {
+ for (i = env->vstart; i < vl; i++) {
+ if (vm || vext_elem_mask(v0, i)) {
+ if (vext_elem_mask(vs2, i)) {
cnt++;
}
}
}
+ env->vstart = 0;
return cnt;
}
-/* vmfirst find-first-set mask bit*/
-target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env,
- uint32_t desc)
+/* vfirst find-first-set mask bit*/
+target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
+ uint32_t desc)
{
- uint32_t mlen = vext_mlen(desc);
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
int i;
- for (i = 0; i < vl; i++) {
- if (vm || vext_elem_mask(v0, mlen, i)) {
- if (vext_elem_mask(vs2, mlen, i)) {
+ for (i = env->vstart; i < vl; i++) {
+ if (vm || vext_elem_mask(v0, i)) {
+ if (vext_elem_mask(vs2, i)) {
return i;
}
}
}
+ env->vstart = 0;
return -1LL;
}
@@ -4562,40 +4525,36 @@ enum set_mask_type {
static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
uint32_t desc, enum set_mask_type type)
{
- uint32_t mlen = vext_mlen(desc);
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
int i;
bool first_mask_bit = false;
- for (i = 0; i < vl; i++) {
- if (!vm && !vext_elem_mask(v0, mlen, i)) {
+ for (i = env->vstart; i < vl; i++) {
+ if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
/* write a zero to all following active elements */
if (first_mask_bit) {
- vext_set_elem_mask(vd, mlen, i, 0);
+ vext_set_elem_mask(vd, i, 0);
continue;
}
- if (vext_elem_mask(vs2, mlen, i)) {
+ if (vext_elem_mask(vs2, i)) {
first_mask_bit = true;
if (type == BEFORE_FIRST) {
- vext_set_elem_mask(vd, mlen, i, 0);
+ vext_set_elem_mask(vd, i, 0);
} else {
- vext_set_elem_mask(vd, mlen, i, 1);
+ vext_set_elem_mask(vd, i, 1);
}
} else {
if (type == ONLY_FIRST) {
- vext_set_elem_mask(vd, mlen, i, 0);
+ vext_set_elem_mask(vd, i, 0);
} else {
- vext_set_elem_mask(vd, mlen, i, 1);
+ vext_set_elem_mask(vd, i, 1);
}
}
}
- for (; i < vlmax; i++) {
- vext_set_elem_mask(vd, mlen, i, 0);
- }
+ env->vstart = 0;
}
void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
@@ -4617,217 +4576,266 @@ void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
}
/* Vector Iota Instruction */
-#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint32_t sum = 0; \
int i; \
\
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
*((ETYPE *)vd + H(i)) = sum; \
- if (vext_elem_mask(vs2, mlen, i)) { \
+ if (vext_elem_mask(vs2, i)) { \
sum++; \
} \
} \
- CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
+ env->vstart = 0; \
}
-GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb)
-GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh)
-GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl)
-GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq)
+GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
+GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
+GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
+GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
/* Vector Element Index Instruction */
-#define GEN_VEXT_VID_V(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
int i; \
\
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
*((ETYPE *)vd + H(i)) = i; \
} \
- CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
+ env->vstart = 0; \
}
-GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb)
-GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh)
-GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl)
-GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq)
+GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
+GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
+GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
+GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
/*
*** Vector Permutation Instructions
*/
/* Vector Slide Instructions */
-#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
- target_ulong offset = s1, i; \
+ target_ulong offset = s1, i_min, i; \
\
- for (i = offset; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ i_min = MAX(env->vstart, offset); \
+ for (i = i_min; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
*((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
} \
- CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
}
/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
-GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1, clearb)
-GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2, clearh)
-GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4, clearl)
-GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8, clearq)
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
+GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
-#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
+ uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
- target_ulong offset = s1, i; \
+ target_ulong i_max, i; \
\
- for (i = 0; i < vl; ++i) { \
- target_ulong j = i + offset; \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
- continue; \
+ i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
+ for (i = env->vstart; i < i_max; ++i) { \
+ if (vm || vext_elem_mask(v0, i)) { \
+ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
} \
- *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \
} \
- CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
-}
-
-/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
-GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1, clearb)
-GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2, clearh)
-GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4, clearl)
-GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8, clearq)
-
-#define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H, CLEAR_FN) \
-void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
- CPURISCVState *env, uint32_t desc) \
-{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
- uint32_t vm = vext_vm(desc); \
- uint32_t vl = env->vl; \
- uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
- continue; \
- } \
- if (i == 0) { \
- *((ETYPE *)vd + H(i)) = s1; \
- } else { \
- *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
+ for (i = i_max; i < vl; ++i) { \
+ if (vm || vext_elem_mask(v0, i)) { \
+ *((ETYPE *)vd + H(i)) = 0; \
} \
} \
- CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
+ \
+ env->vstart = 0; \
}
-/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
-GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1, clearb)
-GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2, clearh)
-GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4, clearl)
-GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8, clearq)
+/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
+GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
+
+#define GEN_VEXT_VSLIE1UP(ESZ, H) \
+static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ typedef uint##ESZ##_t ETYPE; \
+ uint32_t vm = vext_vm(desc); \
+ uint32_t vl = env->vl; \
+ uint32_t i; \
+ \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
+ continue; \
+ } \
+ if (i == 0) { \
+ *((ETYPE *)vd + H(i)) = s1; \
+ } else { \
+ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
+ } \
+ } \
+ env->vstart = 0; \
+}
+
+GEN_VEXT_VSLIE1UP(8, H1)
+GEN_VEXT_VSLIE1UP(16, H2)
+GEN_VEXT_VSLIE1UP(32, H4)
+GEN_VEXT_VSLIE1UP(64, H8)
+
+#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
+}
-#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H, CLEAR_FN) \
-void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
- CPURISCVState *env, uint32_t desc) \
-{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
- uint32_t vm = vext_vm(desc); \
- uint32_t vl = env->vl; \
- uint32_t i; \
- \
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
- continue; \
- } \
- if (i == vl - 1) { \
- *((ETYPE *)vd + H(i)) = s1; \
- } else { \
- *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
- } \
- } \
- CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
+/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
+GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
+
+#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \
+static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ typedef uint##ESZ##_t ETYPE; \
+ uint32_t vm = vext_vm(desc); \
+ uint32_t vl = env->vl; \
+ uint32_t i; \
+ \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
+ continue; \
+ } \
+ if (i == vl - 1) { \
+ *((ETYPE *)vd + H(i)) = s1; \
+ } else { \
+ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
+ } \
+ } \
+ env->vstart = 0; \
+}
+
+GEN_VEXT_VSLIDE1DOWN(8, H1)
+GEN_VEXT_VSLIDE1DOWN(16, H2)
+GEN_VEXT_VSLIDE1DOWN(32, H4)
+GEN_VEXT_VSLIDE1DOWN(64, H8)
+
+#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
}
/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
-GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb)
-GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh)
-GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl)
-GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq)
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
+GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
+
+/* Vector Floating-Point Slide Instructions */
+#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \
+void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
+}
+
+/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
+GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
+GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
+GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
+
+#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \
+void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
+}
+
+/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
+GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
+GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
+GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
/* Vector Register Gather Instruction */
-#define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
+ uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint64_t index; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
- index = *((ETYPE *)vs1 + H(i)); \
+ index = *((TS1 *)vs1 + HS1(i)); \
if (index >= vlmax) { \
- *((ETYPE *)vd + H(i)) = 0; \
+ *((TS2 *)vd + HS2(i)) = 0; \
} else { \
- *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
+ *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
} \
} \
- CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
+ env->vstart = 0; \
}
/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
-GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1, clearb)
-GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2, clearh)
-GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4, clearl)
-GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq)
+GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
+GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
+GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
+GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
+
+GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
+GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
+GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
+GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
-#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
+ uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint64_t index = s1; \
uint32_t i; \
\
- for (i = 0; i < vl; i++) { \
- if (!vm && !vext_elem_mask(v0, mlen, i)) { \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
continue; \
} \
if (index >= vlmax) { \
@@ -4836,37 +4844,88 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
*((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
} \
} \
- CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
+ env->vstart = 0; \
}
/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
-GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb)
-GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh)
-GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl)
-GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq)
+GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
+GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
+GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
+GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
/* Vector Compress Instruction */
-#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H, CLEAR_FN) \
+#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t mlen = vext_mlen(desc); \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \
uint32_t vl = env->vl; \
uint32_t num = 0, i; \
\
- for (i = 0; i < vl; i++) { \
- if (!vext_elem_mask(vs1, mlen, i)) { \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vext_elem_mask(vs1, i)) { \
continue; \
} \
*((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
num++; \
} \
- CLEAR_FN(vd, num, num * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \
+ env->vstart = 0; \
}
/* Compress into vd elements of vs2 where vs1 is enabled */
-GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb)
-GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh)
-GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl)
-GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq)
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
+GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
+
+/* Vector Whole Register Move */
+#define GEN_VEXT_VMV_WHOLE(NAME, LEN) \
+void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
+ uint32_t desc) \
+{ \
+ /* EEW = 8 */ \
+ uint32_t maxsz = simd_maxsz(desc); \
+ uint32_t i = env->vstart; \
+ \
+ memcpy((uint8_t *)vd + H1(i), \
+ (uint8_t *)vs2 + H1(i), \
+ maxsz - env->vstart); \
+ \
+ env->vstart = 0; \
+}
+
+GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
+GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
+GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
+GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
+
+/* Vector Integer Extension */
+#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
+void HELPER(NAME)(void *vd, void *v0, void *vs2, \
+ CPURISCVState *env, uint32_t desc) \
+{ \
+ uint32_t vl = env->vl; \
+ uint32_t vm = vext_vm(desc); \
+ uint32_t i; \
+ \
+ for (i = env->vstart; i < vl; i++) { \
+ if (!vm && !vext_elem_mask(v0, i)) { \
+ continue; \
+ } \
+ *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
+ } \
+ env->vstart = 0; \
+}
+
+GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
+GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
+GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
+GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
+GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
+GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
+
+GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
+GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
+GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
+GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
+GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
+GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)