summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--target/arm/neon-dp.decode43
-rw-r--r--target/arm/translate-neon.inc.c104
-rw-r--r--target/arm/translate.c16
3 files changed, 151 insertions, 12 deletions
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index bd1b0e13f7..144a527ee6 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -397,3 +397,46 @@ VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
# So we have a single decode line and check the cmode/op in the
# trans function.
Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
+
+######################################################################
+# Within the "two registers, or three registers of different lengths"
+# grouping ([23,4]=0b10), bits [21:20] are either part of the opcode
+# decode: 0b11 for VEXT, two-reg-misc, VTBL, and duplicate-scalar;
+# or they are a size field for the three-reg-different-lengths and
+# two-reg-and-scalar insn groups (where size cannot be 0b11). This
+# is slightly awkward for decodetree: we handle it with this
+# non-exclusive group which contains within it two exclusive groups:
+# one for the size=0b11 patterns, and one for the size-not-0b11
+# patterns. This allows us to check that none of the insns within
+# each subgroup accidentally overlap each other. Note that all the
+# trans functions for the size-not-0b11 patterns must check and
+# return false for size==3.
+######################################################################
+{
+ # 0b11 subgroup will go here
+
+ # Subgroup for size != 0b11
+ [
+ ##################################################################
+ # 3-reg-different-length grouping:
+ # 1111 001 U 1 D sz!=11 Vn:4 Vd:4 opc:4 N 0 M 0 Vm:4
+ ##################################################################
+
+ &3diff vm vn vd size
+
+ @3diff .... ... . . . size:2 .... .... .... . . . . .... \
+ &3diff vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+ VADDL_S_3d 1111 001 0 1 . .. .... .... 0000 . 0 . 0 .... @3diff
+ VADDL_U_3d 1111 001 1 1 . .. .... .... 0000 . 0 . 0 .... @3diff
+
+ VADDW_S_3d 1111 001 0 1 . .. .... .... 0001 . 0 . 0 .... @3diff
+ VADDW_U_3d 1111 001 1 1 . .. .... .... 0001 . 0 . 0 .... @3diff
+
+ VSUBL_S_3d 1111 001 0 1 . .. .... .... 0010 . 0 . 0 .... @3diff
+ VSUBL_U_3d 1111 001 1 1 . .. .... .... 0010 . 0 . 0 .... @3diff
+
+ VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
+ VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
+ ]
+}
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 299a61f067..9b9d411107 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1828,3 +1828,107 @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
}
return do_1reg_imm(s, a, fn);
}
+
+static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
+ NeonGenWidenFn *widenfn,
+ NeonGenTwo64OpFn *opfn,
+ bool src1_wide)
+{
+ /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
+ TCGv_i64 rn0_64, rn1_64, rm_64;
+ TCGv_i32 rm;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!widenfn || !opfn) {
+ /* size == 3 case, which is an entirely different insn group */
+ return false;
+ }
+
+ if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rn0_64 = tcg_temp_new_i64();
+ rn1_64 = tcg_temp_new_i64();
+ rm_64 = tcg_temp_new_i64();
+
+ if (src1_wide) {
+ neon_load_reg64(rn0_64, a->vn);
+ } else {
+ TCGv_i32 tmp = neon_load_reg(a->vn, 0);
+ widenfn(rn0_64, tmp);
+ tcg_temp_free_i32(tmp);
+ }
+ rm = neon_load_reg(a->vm, 0);
+
+ widenfn(rm_64, rm);
+ tcg_temp_free_i32(rm);
+ opfn(rn0_64, rn0_64, rm_64);
+
+ /*
+ * Load second pass inputs before storing the first pass result, to
+ * avoid incorrect results if a narrow input overlaps with the result.
+ */
+ if (src1_wide) {
+ neon_load_reg64(rn1_64, a->vn + 1);
+ } else {
+ TCGv_i32 tmp = neon_load_reg(a->vn, 1);
+ widenfn(rn1_64, tmp);
+ tcg_temp_free_i32(tmp);
+ }
+ rm = neon_load_reg(a->vm, 1);
+
+ neon_store_reg64(rn0_64, a->vd);
+
+ widenfn(rm_64, rm);
+ tcg_temp_free_i32(rm);
+ opfn(rn1_64, rn1_64, rm_64);
+ neon_store_reg64(rn1_64, a->vd + 1);
+
+ tcg_temp_free_i64(rn0_64);
+ tcg_temp_free_i64(rn1_64);
+ tcg_temp_free_i64(rm_64);
+
+ return true;
+}
+
+#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
+ { \
+ static NeonGenWidenFn * const widenfn[] = { \
+ gen_helper_neon_widen_##S##8, \
+ gen_helper_neon_widen_##S##16, \
+ tcg_gen_##EXT##_i32_i64, \
+ NULL, \
+ }; \
+ static NeonGenTwo64OpFn * const addfn[] = { \
+ gen_helper_neon_##OP##l_u16, \
+ gen_helper_neon_##OP##l_u32, \
+ tcg_gen_##OP##_i64, \
+ NULL, \
+ }; \
+ return do_prewiden_3d(s, a, widenfn[a->size], \
+ addfn[a->size], SRC1WIDE); \
+ }
+
+DO_PREWIDEN(VADDL_S, s, ext, add, false)
+DO_PREWIDEN(VADDL_U, u, extu, add, false)
+DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
+DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
+DO_PREWIDEN(VADDW_S, s, ext, add, true)
+DO_PREWIDEN(VADDW_U, u, extu, add, true)
+DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
+DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
diff --git a/target/arm/translate.c b/target/arm/translate.c
index bcdfec34d2..9376534441 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5241,7 +5241,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
/* Three registers of different lengths. */
int src1_wide;
int src2_wide;
- int prewiden;
/* undefreq: bit 0 : UNDEF if size == 0
* bit 1 : UNDEF if size == 1
* bit 2 : UNDEF if size == 2
@@ -5251,10 +5250,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
int undefreq;
/* prewiden, src1_wide, src2_wide, undefreq */
static const int neon_3reg_wide[16][4] = {
- {1, 0, 0, 0}, /* VADDL */
- {1, 1, 0, 0}, /* VADDW */
- {1, 0, 0, 0}, /* VSUBL */
- {1, 1, 0, 0}, /* VSUBW */
+ {0, 0, 0, 7}, /* VADDL: handled by decodetree */
+ {0, 0, 0, 7}, /* VADDW: handled by decodetree */
+ {0, 0, 0, 7}, /* VSUBL: handled by decodetree */
+ {0, 0, 0, 7}, /* VSUBW: handled by decodetree */
{0, 1, 1, 0}, /* VADDHN */
{0, 0, 0, 0}, /* VABAL */
{0, 1, 1, 0}, /* VSUBHN */
@@ -5269,7 +5268,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
{0, 0, 0, 7}, /* Reserved: always UNDEF */
};
- prewiden = neon_3reg_wide[op][0];
src1_wide = neon_3reg_wide[op][1];
src2_wide = neon_3reg_wide[op][2];
undefreq = neon_3reg_wide[op][3];
@@ -5322,9 +5320,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
} else {
tmp = neon_load_reg(rn, pass);
}
- if (prewiden) {
- gen_neon_widen(cpu_V0, tmp, size, u);
- }
}
if (src2_wide) {
neon_load_reg64(cpu_V1, rm + pass);
@@ -5335,9 +5330,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
} else {
tmp2 = neon_load_reg(rm, pass);
}
- if (prewiden) {
- gen_neon_widen(cpu_V1, tmp2, size, u);
- }
}
switch (op) {
case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */