summaryrefslogtreecommitdiff
path: root/target
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-06-16 13:36:31 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-06-16 13:36:31 +0100
commitcb8278cd997f4776b5a38fce7859bbe3b2d8d139 (patch)
treecf41c46b445f6ca9bfeb4c078a14d2ffbd98b159 /target
parent6675a653d2e57ab09c32c0ea7b44a1d6c40a7f58 (diff)
parent64b397417a26509bcdff44ab94356a35c7901c79 (diff)
downloadqemu-cb8278cd997f4776b5a38fce7859bbe3b2d8d139.zip
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200616' into staging
* hw: arm: Set vendor property for IMX SDHCI emulations * sd: sdhci: Implement basic vendor specific register support * hw/net/imx_fec: Convert debug fprintf() to trace events * target/arm/cpu: adjust virtual time for all KVM arm cpus * Implement configurable descriptor size in ftgmac100 * hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers * target/arm: More Neon decodetree conversion work # gpg: Signature made Tue 16 Jun 2020 10:56:10 BST # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20200616: (23 commits) hw: arm: Set vendor property for IMX SDHCI emulations sd: sdhci: Implement basic vendor specific register support hw/net/imx_fec: Convert debug fprintf() to trace events target/arm/cpu: adjust virtual time for all KVM arm cpus Implement configurable descriptor size in ftgmac100 hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers target/arm: Convert Neon VDUP (scalar) to decodetree target/arm: Convert Neon VTBL, VTBX to decodetree target/arm: Convert Neon VEXT to decodetree target/arm: Convert Neon 2-reg-scalar long multiplies to decodetree target/arm: Convert Neon 2-reg-scalar VQRDMLAH, VQRDMLSH to decodetree target/arm: Convert Neon 2-reg-scalar VQDMULH, VQRDMULH to decodetree target/arm: Convert Neon 2-reg-scalar float multiplies to decodetree target/arm: Convert Neon 2-reg-scalar integer multiplies to decodetree target/arm: Add missing TCG temp free in do_2shift_env_64() target/arm: Add 'static' and 'const' annotations to VSHLL function arrays target/arm: Convert Neon 3-reg-diff polynomial VMULL target/arm: Convert Neon 3-reg-diff saturating doubling multiplies target/arm: Convert Neon 3-reg-diff long multiplies target/arm: Convert Neon 3-reg-diff VABAL, VABDL to decodetree ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org> # Conflicts: # hw/arm/fsl-imx25.c # hw/arm/fsl-imx6.c # hw/arm/fsl-imx6ul.c # hw/arm/fsl-imx7.c
Diffstat (limited to 'target')
-rw-r--r--target/arm/cpu.c6
-rw-r--r--target/arm/cpu64.c1
-rw-r--r--target/arm/kvm.c21
-rw-r--r--target/arm/neon-dp.decode130
-rw-r--r--target/arm/translate-neon.inc.c1148
-rw-r--r--target/arm/translate.c684
-rw-r--r--target/arm/translate.h1
7 files changed, 1302 insertions, 689 deletions
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 32bec156f2..5b7a36b5d7 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1245,6 +1245,10 @@ void arm_cpu_post_init(Object *obj)
if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) {
qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property);
}
+
+ if (kvm_enabled()) {
+ kvm_arm_add_vcpu_properties(obj);
+ }
}
static void arm_cpu_finalizefn(Object *obj)
@@ -2029,7 +2033,6 @@ static void arm_max_initfn(Object *obj)
if (kvm_enabled()) {
kvm_arm_set_cpu_features_from_host(cpu);
- kvm_arm_add_vcpu_properties(obj);
} else {
cortex_a15_initfn(obj);
@@ -2183,7 +2186,6 @@ static void arm_host_initfn(Object *obj)
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
aarch64_add_sve_properties(obj);
}
- kvm_arm_add_vcpu_properties(obj);
arm_cpu_post_init(obj);
}
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index cbc5c3868f..778cecc2e6 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -592,7 +592,6 @@ static void aarch64_max_initfn(Object *obj)
if (kvm_enabled()) {
kvm_arm_set_cpu_features_from_host(cpu);
- kvm_arm_add_vcpu_properties(obj);
} else {
uint64_t t;
uint32_t u;
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 4bdbe6dcac..eef3bbd1cc 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -194,17 +194,18 @@ static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp)
/* KVM VCPU properties should be prefixed with "kvm-". */
void kvm_arm_add_vcpu_properties(Object *obj)
{
- if (!kvm_enabled()) {
- return;
- }
+ ARMCPU *cpu = ARM_CPU(obj);
+ CPUARMState *env = &cpu->env;
- ARM_CPU(obj)->kvm_adjvtime = true;
- object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
- kvm_no_adjvtime_set);
- object_property_set_description(obj, "kvm-no-adjvtime",
- "Set on to disable the adjustment of "
- "the virtual counter. VM stopped time "
- "will be counted.");
+ if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
+ cpu->kvm_adjvtime = true;
+ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
+ kvm_no_adjvtime_set);
+ object_property_set_description(obj, "kvm-no-adjvtime",
+ "Set on to disable the adjustment of "
+ "the virtual counter. VM stopped time "
+ "will be counted.");
+ }
}
bool kvm_arm_pmu_supported(CPUState *cpu)
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index bd1b0e13f7..6d890b2161 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -397,3 +397,133 @@ VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
# So we have a single decode line and check the cmode/op in the
# trans function.
Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
+
+######################################################################
+# Within the "two registers, or three registers of different lengths"
+# grouping ([23,4]=0b10), bits [21:20] are either part of the opcode
+# decode: 0b11 for VEXT, two-reg-misc, VTBL, and duplicate-scalar;
+# or they are a size field for the three-reg-different-lengths and
+# two-reg-and-scalar insn groups (where size cannot be 0b11). This
+# is slightly awkward for decodetree: we handle it with this
+# non-exclusive group which contains within it two exclusive groups:
+# one for the size=0b11 patterns, and one for the size-not-0b11
+# patterns. This allows us to check that none of the insns within
+# each subgroup accidentally overlap each other. Note that all the
+# trans functions for the size-not-0b11 patterns must check and
+# return false for size==3.
+######################################################################
+{
+ [
+ ##################################################################
+ # Miscellaneous size=0b11 insns
+ ##################################################################
+ VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+ VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+ VDUP_scalar 1111 001 1 1 . 11 index:3 1 .... 11 000 q:1 . 0 .... \
+ vm=%vm_dp vd=%vd_dp size=0
+ VDUP_scalar 1111 001 1 1 . 11 index:2 10 .... 11 000 q:1 . 0 .... \
+ vm=%vm_dp vd=%vd_dp size=1
+ VDUP_scalar 1111 001 1 1 . 11 index:1 100 .... 11 000 q:1 . 0 .... \
+ vm=%vm_dp vd=%vd_dp size=2
+ ]
+
+ # Subgroup for size != 0b11
+ [
+ ##################################################################
+ # 3-reg-different-length grouping:
+ # 1111 001 U 1 D sz!=11 Vn:4 Vd:4 opc:4 N 0 M 0 Vm:4
+ ##################################################################
+
+ &3diff vm vn vd size
+
+ @3diff .... ... . . . size:2 .... .... .... . . . . .... \
+ &3diff vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+ VADDL_S_3d 1111 001 0 1 . .. .... .... 0000 . 0 . 0 .... @3diff
+ VADDL_U_3d 1111 001 1 1 . .. .... .... 0000 . 0 . 0 .... @3diff
+
+ VADDW_S_3d 1111 001 0 1 . .. .... .... 0001 . 0 . 0 .... @3diff
+ VADDW_U_3d 1111 001 1 1 . .. .... .... 0001 . 0 . 0 .... @3diff
+
+ VSUBL_S_3d 1111 001 0 1 . .. .... .... 0010 . 0 . 0 .... @3diff
+ VSUBL_U_3d 1111 001 1 1 . .. .... .... 0010 . 0 . 0 .... @3diff
+
+ VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
+ VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
+
+ VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff
+ VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff
+
+ VABAL_S_3d 1111 001 0 1 . .. .... .... 0101 . 0 . 0 .... @3diff
+ VABAL_U_3d 1111 001 1 1 . .. .... .... 0101 . 0 . 0 .... @3diff
+
+ VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff
+ VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff
+
+ VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff
+ VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff
+
+ VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff
+ VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff
+
+ VQDMLAL_3d 1111 001 0 1 . .. .... .... 1001 . 0 . 0 .... @3diff
+
+ VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff
+ VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff
+
+ VQDMLSL_3d 1111 001 0 1 . .. .... .... 1011 . 0 . 0 .... @3diff
+
+ VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff
+ VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
+
+ VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
+
+ VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff
+
+ ##################################################################
+ # 2-regs-plus-scalar grouping:
+ # 1111 001 Q 1 D sz!=11 Vn:4 Vd:4 opc:4 N 1 M 0 Vm:4
+ ##################################################################
+ &2scalar vm vn vd size q
+
+ @2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
+ # For the 'long' ops the Q bit is part of insn decode
+ @2scalar_q0 .... ... . . . size:2 .... .... .... . . . . .... \
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
+
+ VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
+ VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
+
+ VMLAL_S_2sc 1111 001 0 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
+ VMLAL_U_2sc 1111 001 1 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
+
+ VQDMLAL_2sc 1111 001 0 1 . .. .... .... 0011 . 1 . 0 .... @2scalar_q0
+
+ VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
+ VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
+
+ VMLSL_S_2sc 1111 001 0 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
+ VMLSL_U_2sc 1111 001 1 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
+
+ VQDMLSL_2sc 1111 001 0 1 . .. .... .... 0111 . 1 . 0 .... @2scalar_q0
+
+ VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
+ VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
+
+ VMULL_S_2sc 1111 001 0 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
+ VMULL_U_2sc 1111 001 1 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
+
+ VQDMULL_2sc 1111 001 0 1 . .. .... .... 1011 . 1 . 0 .... @2scalar_q0
+
+ VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
+ VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
+
+ VQRDMLAH_2sc 1111 001 . 1 . .. .... .... 1110 . 1 . 0 .... @2scalar
+ VQRDMLSH_2sc 1111 001 . 1 . .. .... .... 1111 . 1 . 0 .... @2scalar
+ ]
+}
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 664d361260..a5aa56bbde 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1329,6 +1329,7 @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
neon_load_reg64(tmp, a->vm + pass);
fn(tmp, cpu_env, tmp, constimm);
neon_store_reg64(tmp, a->vd + pass);
+ tcg_temp_free_i64(tmp);
}
tcg_temp_free_i64(constimm);
return true;
@@ -1624,6 +1625,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
tmp = tcg_temp_new_i64();
widenfn(tmp, rm0);
+ tcg_temp_free_i32(rm0);
if (a->shift != 0) {
tcg_gen_shli_i64(tmp, tmp, a->shift);
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
@@ -1631,6 +1633,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
neon_store_reg64(tmp, a->vd);
widenfn(tmp, rm1);
+ tcg_temp_free_i32(rm1);
if (a->shift != 0) {
tcg_gen_shli_i64(tmp, tmp, a->shift);
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
@@ -1642,7 +1645,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
{
- NeonGenWidenFn *widenfn[] = {
+ static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_s8,
gen_helper_neon_widen_s16,
tcg_gen_ext_i32_i64,
@@ -1652,7 +1655,7 @@ static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
{
- NeonGenWidenFn *widenfn[] = {
+ static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_u8,
gen_helper_neon_widen_u16,
tcg_gen_extu_i32_i64,
@@ -1826,3 +1829,1144 @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
}
return do_1reg_imm(s, a, fn);
}
+
+static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
+ NeonGenWidenFn *widenfn,
+ NeonGenTwo64OpFn *opfn,
+ bool src1_wide)
+{
+ /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
+ TCGv_i64 rn0_64, rn1_64, rm_64;
+ TCGv_i32 rm;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!widenfn || !opfn) {
+ /* size == 3 case, which is an entirely different insn group */
+ return false;
+ }
+
+ if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rn0_64 = tcg_temp_new_i64();
+ rn1_64 = tcg_temp_new_i64();
+ rm_64 = tcg_temp_new_i64();
+
+ if (src1_wide) {
+ neon_load_reg64(rn0_64, a->vn);
+ } else {
+ TCGv_i32 tmp = neon_load_reg(a->vn, 0);
+ widenfn(rn0_64, tmp);
+ tcg_temp_free_i32(tmp);
+ }
+ rm = neon_load_reg(a->vm, 0);
+
+ widenfn(rm_64, rm);
+ tcg_temp_free_i32(rm);
+ opfn(rn0_64, rn0_64, rm_64);
+
+ /*
+ * Load second pass inputs before storing the first pass result, to
+ * avoid incorrect results if a narrow input overlaps with the result.
+ */
+ if (src1_wide) {
+ neon_load_reg64(rn1_64, a->vn + 1);
+ } else {
+ TCGv_i32 tmp = neon_load_reg(a->vn, 1);
+ widenfn(rn1_64, tmp);
+ tcg_temp_free_i32(tmp);
+ }
+ rm = neon_load_reg(a->vm, 1);
+
+ neon_store_reg64(rn0_64, a->vd);
+
+ widenfn(rm_64, rm);
+ tcg_temp_free_i32(rm);
+ opfn(rn1_64, rn1_64, rm_64);
+ neon_store_reg64(rn1_64, a->vd + 1);
+
+ tcg_temp_free_i64(rn0_64);
+ tcg_temp_free_i64(rn1_64);
+ tcg_temp_free_i64(rm_64);
+
+ return true;
+}
+
+#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
+ { \
+ static NeonGenWidenFn * const widenfn[] = { \
+ gen_helper_neon_widen_##S##8, \
+ gen_helper_neon_widen_##S##16, \
+ tcg_gen_##EXT##_i32_i64, \
+ NULL, \
+ }; \
+ static NeonGenTwo64OpFn * const addfn[] = { \
+ gen_helper_neon_##OP##l_u16, \
+ gen_helper_neon_##OP##l_u32, \
+ tcg_gen_##OP##_i64, \
+ NULL, \
+ }; \
+ return do_prewiden_3d(s, a, widenfn[a->size], \
+ addfn[a->size], SRC1WIDE); \
+ }
+
+DO_PREWIDEN(VADDL_S, s, ext, add, false)
+DO_PREWIDEN(VADDL_U, u, extu, add, false)
+DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
+DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
+DO_PREWIDEN(VADDW_S, s, ext, add, true)
+DO_PREWIDEN(VADDW_U, u, extu, add, true)
+DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
+DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
+
+static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
+ NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
+{
+ /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
+ TCGv_i64 rn_64, rm_64;
+ TCGv_i32 rd0, rd1;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn || !narrowfn) {
+ /* size == 3 case, which is an entirely different insn group */
+ return false;
+ }
+
+ if ((a->vn | a->vm) & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rn_64 = tcg_temp_new_i64();
+ rm_64 = tcg_temp_new_i64();
+ rd0 = tcg_temp_new_i32();
+ rd1 = tcg_temp_new_i32();
+
+ neon_load_reg64(rn_64, a->vn);
+ neon_load_reg64(rm_64, a->vm);
+
+ opfn(rn_64, rn_64, rm_64);
+
+ narrowfn(rd0, rn_64);
+
+ neon_load_reg64(rn_64, a->vn + 1);
+ neon_load_reg64(rm_64, a->vm + 1);
+
+ opfn(rn_64, rn_64, rm_64);
+
+ narrowfn(rd1, rn_64);
+
+ neon_store_reg(a->vd, 0, rd0);
+ neon_store_reg(a->vd, 1, rd1);
+
+ tcg_temp_free_i64(rn_64);
+ tcg_temp_free_i64(rm_64);
+
+ return true;
+}
+
+#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
+ { \
+ static NeonGenTwo64OpFn * const addfn[] = { \
+ gen_helper_neon_##OP##l_u16, \
+ gen_helper_neon_##OP##l_u32, \
+ tcg_gen_##OP##_i64, \
+ NULL, \
+ }; \
+ static NeonGenNarrowFn * const narrowfn[] = { \
+ gen_helper_neon_##NARROWTYPE##_high_u8, \
+ gen_helper_neon_##NARROWTYPE##_high_u16, \
+ EXTOP, \
+ NULL, \
+ }; \
+ return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
+ }
+
+static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
+{
+ tcg_gen_addi_i64(rn, rn, 1u << 31);
+ tcg_gen_extrh_i64_i32(rd, rn);
+}
+
+DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
+DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
+DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
+DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
+
+static bool do_long_3d(DisasContext *s, arg_3diff *a,
+ NeonGenTwoOpWidenFn *opfn,
+ NeonGenTwo64OpFn *accfn)
+{
+ /*
+ * 3-regs different lengths, long operations.
+ * These perform an operation on two inputs that returns a double-width
+ * result, and then possibly perform an accumulation operation of
+ * that result into the double-width destination.
+ */
+ TCGv_i64 rd0, rd1, tmp;
+ TCGv_i32 rn, rm;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* size == 3 case, which is an entirely different insn group */
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rd0 = tcg_temp_new_i64();
+ rd1 = tcg_temp_new_i64();
+
+ rn = neon_load_reg(a->vn, 0);
+ rm = neon_load_reg(a->vm, 0);
+ opfn(rd0, rn, rm);
+ tcg_temp_free_i32(rn);
+ tcg_temp_free_i32(rm);
+
+ rn = neon_load_reg(a->vn, 1);
+ rm = neon_load_reg(a->vm, 1);
+ opfn(rd1, rn, rm);
+ tcg_temp_free_i32(rn);
+ tcg_temp_free_i32(rm);
+
+ /* Don't store results until after all loads: they might overlap */
+ if (accfn) {
+ tmp = tcg_temp_new_i64();
+ neon_load_reg64(tmp, a->vd);
+ accfn(tmp, tmp, rd0);
+ neon_store_reg64(tmp, a->vd);
+ neon_load_reg64(tmp, a->vd + 1);
+ accfn(tmp, tmp, rd1);
+ neon_store_reg64(tmp, a->vd + 1);
+ tcg_temp_free_i64(tmp);
+ } else {
+ neon_store_reg64(rd0, a->vd);
+ neon_store_reg64(rd1, a->vd + 1);
+ }
+
+ tcg_temp_free_i64(rd0);
+ tcg_temp_free_i64(rd1);
+
+ return true;
+}
+
+static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_s16,
+ gen_helper_neon_abdl_s32,
+ gen_helper_neon_abdl_s64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_u16,
+ gen_helper_neon_abdl_u32,
+ gen_helper_neon_abdl_u64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_s16,
+ gen_helper_neon_abdl_s32,
+ gen_helper_neon_abdl_s64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const addfn[] = {
+ gen_helper_neon_addl_u16,
+ gen_helper_neon_addl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
+}
+
+static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_u16,
+ gen_helper_neon_abdl_u32,
+ gen_helper_neon_abdl_u64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const addfn[] = {
+ gen_helper_neon_addl_u16,
+ gen_helper_neon_addl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
+}
+
+static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ TCGv_i32 lo = tcg_temp_new_i32();
+ TCGv_i32 hi = tcg_temp_new_i32();
+
+ tcg_gen_muls2_i32(lo, hi, rn, rm);
+ tcg_gen_concat_i32_i64(rd, lo, hi);
+
+ tcg_temp_free_i32(lo);
+ tcg_temp_free_i32(hi);
+}
+
+static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ TCGv_i32 lo = tcg_temp_new_i32();
+ TCGv_i32 hi = tcg_temp_new_i32();
+
+ tcg_gen_mulu2_i32(lo, hi, rn, rm);
+ tcg_gen_concat_i32_i64(rd, lo, hi);
+
+ tcg_temp_free_i32(lo);
+ tcg_temp_free_i32(hi);
+}
+
+static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_mull_s8,
+ gen_helper_neon_mull_s16,
+ gen_mull_s32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_mull_u8,
+ gen_helper_neon_mull_u16,
+ gen_mull_u32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+#define DO_VMLAL(INSN,MULL,ACC) \
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
+ { \
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
+ gen_helper_neon_##MULL##8, \
+ gen_helper_neon_##MULL##16, \
+ gen_##MULL##32, \
+ NULL, \
+ }; \
+ static NeonGenTwo64OpFn * const accfn[] = { \
+ gen_helper_neon_##ACC##l_u16, \
+ gen_helper_neon_##ACC##l_u32, \
+ tcg_gen_##ACC##_i64, \
+ NULL, \
+ }; \
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
+ }
+
+DO_VMLAL(VMLAL_S,mull_s,add)
+DO_VMLAL(VMLAL_U,mull_u,add)
+DO_VMLAL(VMLSL_S,mull_s,sub)
+DO_VMLAL(VMLSL_U,mull_u,sub)
+
+static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ gen_helper_neon_mull_s16(rd, rn, rm);
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
+}
+
+static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ gen_mull_s32(rd, rn, rm);
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
+}
+
+static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
+}
+
+static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
+}
+
+static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLAL_acc_16,
+ gen_VQDMLAL_acc_32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ gen_helper_neon_negl_u32(rm, rm);
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
+}
+
+static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ tcg_gen_neg_i64(rm, rm);
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
+}
+
+static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLSL_acc_16,
+ gen_VQDMLSL_acc_32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
+{
+ gen_helper_gvec_3 *fn_gvec;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ switch (a->size) {
+ case 0:
+ fn_gvec = gen_helper_neon_pmull_h;
+ break;
+ case 2:
+ if (!dc_isar_feature(aa32_pmull, s)) {
+ return false;
+ }
+ fn_gvec = gen_helper_gvec_pmull_q;
+ break;
+ default:
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
+ neon_reg_offset(a->vn, 0),
+ neon_reg_offset(a->vm, 0),
+ 16, 16, 0, fn_gvec);
+ return true;
+}
+
+static void gen_neon_dup_low16(TCGv_i32 var)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_ext16u_i32(var, var);
+ tcg_gen_shli_i32(tmp, var, 16);
+ tcg_gen_or_i32(var, var, tmp);
+ tcg_temp_free_i32(tmp);
+}
+
+static void gen_neon_dup_high16(TCGv_i32 var)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_andi_i32(var, var, 0xffff0000);
+ tcg_gen_shri_i32(tmp, var, 16);
+ tcg_gen_or_i32(var, var, tmp);
+ tcg_temp_free_i32(tmp);
+}
+
+static inline TCGv_i32 neon_get_scalar(int size, int reg)
+{
+ TCGv_i32 tmp;
+ if (size == 1) {
+ tmp = neon_load_reg(reg & 7, reg >> 4);
+ if (reg & 8) {
+ gen_neon_dup_high16(tmp);
+ } else {
+ gen_neon_dup_low16(tmp);
+ }
+ } else {
+ tmp = neon_load_reg(reg & 15, reg >> 4);
+ }
+ return tmp;
+}
+
+static bool do_2scalar(DisasContext *s, arg_2scalar *a,
+ NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
+{
+ /*
+ * Two registers and a scalar: perform an operation between
+ * the input elements and the scalar, and then possibly
+ * perform an accumulation operation of that result into the
+ * destination.
+ */
+ TCGv_i32 scalar;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
+
+ if (a->q && ((a->vd | a->vn) & 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ scalar = neon_get_scalar(a->size, a->vm);
+
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+ TCGv_i32 tmp = neon_load_reg(a->vn, pass);
+ opfn(tmp, tmp, scalar);
+ if (accfn) {
+ TCGv_i32 rd = neon_load_reg(a->vd, pass);
+ accfn(tmp, rd, tmp);
+ tcg_temp_free_i32(rd);
+ }
+ neon_store_reg(a->vd, pass, tmp);
+ }
+ tcg_temp_free_i32(scalar);
+ return true;
+}
+
+static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ NULL,
+ };
+ static NeonGenTwoOpFn * const accfn[] = {
+ NULL,
+ gen_helper_neon_add_u16,
+ tcg_gen_add_i32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ NULL,
+ };
+ static NeonGenTwoOpFn * const accfn[] = {
+ NULL,
+ gen_helper_neon_sub_u16,
+ tcg_gen_sub_i32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+/*
+ * Rather than have a float-specific version of do_2scalar just for
+ * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
+ * a NeonGenTwoOpFn.
+ */
+#define WRAP_FP_FN(WRAPNAME, FUNC) \
+ static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \
+ { \
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1); \
+ FUNC(rd, rn, rm, fpstatus); \
+ tcg_temp_free_ptr(fpstatus); \
+ }
+
+WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
+WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
+WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
+
+static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ NULL, /* TODO: fp16 support */
+ gen_VMUL_F_mul,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ NULL, /* TODO: fp16 support */
+ gen_VMUL_F_mul,
+ NULL,
+ };
+ static NeonGenTwoOpFn * const accfn[] = {
+ NULL,
+ NULL, /* TODO: fp16 support */
+ gen_VMUL_F_add,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ NULL, /* TODO: fp16 support */
+ gen_VMUL_F_mul,
+ NULL,
+ };
+ static NeonGenTwoOpFn * const accfn[] = {
+ NULL,
+ NULL, /* TODO: fp16 support */
+ gen_VMUL_F_sub,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
+WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
+WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
+WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
+
+static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_VQDMULH_16,
+ gen_VQDMULH_32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_VQRDMULH_16,
+ gen_VQRDMULH_32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
+ NeonGenThreeOpEnvFn *opfn)
+{
+ /*
+ * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
+ * performs a kind of fused op-then-accumulate using a helper
+ * function that takes all of rd, rn and the scalar at once.
+ */
+ TCGv_i32 scalar;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_rdm, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
+
+ if (a->q && ((a->vd | a->vn) & 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ scalar = neon_get_scalar(a->size, a->vm);
+
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+ TCGv_i32 rn = neon_load_reg(a->vn, pass);
+ TCGv_i32 rd = neon_load_reg(a->vd, pass);
+ opfn(rd, cpu_env, rn, scalar, rd);
+ tcg_temp_free_i32(rn);
+ neon_store_reg(a->vd, pass, rd);
+ }
+ tcg_temp_free_i32(scalar);
+
+ return true;
+}
+
+static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenThreeOpEnvFn *opfn[] = {
+ NULL,
+ gen_helper_neon_qrdmlah_s16,
+ gen_helper_neon_qrdmlah_s32,
+ NULL,
+ };
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
+}
+
+static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenThreeOpEnvFn *opfn[] = {
+ NULL,
+ gen_helper_neon_qrdmlsh_s16,
+ gen_helper_neon_qrdmlsh_s32,
+ NULL,
+ };
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
+}
+
+static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
+ NeonGenTwoOpWidenFn *opfn,
+ NeonGenTwo64OpFn *accfn)
+{
+ /*
+ * Two registers and a scalar, long operations: perform an
+ * operation on the input elements and the scalar which produces
+ * a double-width result, and then possibly perform an accumulation
+ * operation of that result into the destination.
+ */
+ TCGv_i32 scalar, rn;
+ TCGv_i64 rn0_64, rn1_64;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ scalar = neon_get_scalar(a->size, a->vm);
+
+ /* Load all inputs before writing any outputs, in case of overlap */
+ rn = neon_load_reg(a->vn, 0);
+ rn0_64 = tcg_temp_new_i64();
+ opfn(rn0_64, rn, scalar);
+ tcg_temp_free_i32(rn);
+
+ rn = neon_load_reg(a->vn, 1);
+ rn1_64 = tcg_temp_new_i64();
+ opfn(rn1_64, rn, scalar);
+ tcg_temp_free_i32(rn);
+ tcg_temp_free_i32(scalar);
+
+ if (accfn) {
+ TCGv_i64 t64 = tcg_temp_new_i64();
+ neon_load_reg64(t64, a->vd);
+ accfn(t64, t64, rn0_64);
+ neon_store_reg64(t64, a->vd);
+ neon_load_reg64(t64, a->vd + 1);
+ accfn(t64, t64, rn1_64);
+ neon_store_reg64(t64, a->vd + 1);
+ tcg_temp_free_i64(t64);
+ } else {
+ neon_store_reg64(rn0_64, a->vd);
+ neon_store_reg64(rn1_64, a->vd + 1);
+ }
+ tcg_temp_free_i64(rn0_64);
+ tcg_temp_free_i64(rn1_64);
+ return true;
+}
+
+static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mull_s16,
+ gen_mull_s32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mull_u16,
+ gen_mull_u32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+#define DO_VMLAL_2SC(INSN, MULL, ACC) \
+ static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
+ { \
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
+ NULL, \
+ gen_helper_neon_##MULL##16, \
+ gen_##MULL##32, \
+ NULL, \
+ }; \
+ static NeonGenTwo64OpFn * const accfn[] = { \
+ NULL, \
+ gen_helper_neon_##ACC##l_u32, \
+ tcg_gen_##ACC##_i64, \
+ NULL, \
+ }; \
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
+ }
+
+DO_VMLAL_2SC(VMLAL_S, mull_s, add)
+DO_VMLAL_2SC(VMLAL_U, mull_u, add)
+DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
+DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
+
+static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLAL_acc_16,
+ gen_VQDMLAL_acc_32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLSL_acc_16,
+ gen_VQDMLSL_acc_32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (a->imm > 7 && !a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (!a->q) {
+ /* Extract 64 bits from <Vm:Vn> */
+ TCGv_i64 left, right, dest;
+
+ left = tcg_temp_new_i64();
+ right = tcg_temp_new_i64();
+ dest = tcg_temp_new_i64();
+
+ neon_load_reg64(right, a->vn);
+ neon_load_reg64(left, a->vm);
+ tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
+ neon_store_reg64(dest, a->vd);
+
+ tcg_temp_free_i64(left);
+ tcg_temp_free_i64(right);
+ tcg_temp_free_i64(dest);
+ } else {
+ /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
+ TCGv_i64 left, middle, right, destleft, destright;
+
+ left = tcg_temp_new_i64();
+ middle = tcg_temp_new_i64();
+ right = tcg_temp_new_i64();
+ destleft = tcg_temp_new_i64();
+ destright = tcg_temp_new_i64();
+
+ if (a->imm < 8) {
+ neon_load_reg64(right, a->vn);
+ neon_load_reg64(middle, a->vn + 1);
+ tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
+ neon_load_reg64(left, a->vm);
+ tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
+ } else {
+ neon_load_reg64(right, a->vn + 1);
+ neon_load_reg64(middle, a->vm);
+ tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
+ neon_load_reg64(left, a->vm + 1);
+ tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
+ }
+
+ neon_store_reg64(destright, a->vd);
+ neon_store_reg64(destleft, a->vd + 1);
+
+ tcg_temp_free_i64(destright);
+ tcg_temp_free_i64(destleft);
+ tcg_temp_free_i64(right);
+ tcg_temp_free_i64(middle);
+ tcg_temp_free_i64(left);
+ }
+ return true;
+}
+
+static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
+{
+ int n;
+ TCGv_i32 tmp, tmp2, tmp3, tmp4;
+ TCGv_ptr ptr1;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ n = a->len + 1;
+ if ((a->vn + n) > 32) {
+ /*
+ * This is UNPREDICTABLE; we choose to UNDEF to avoid the
+ * helper function running off the end of the register file.
+ */
+ return false;
+ }
+ n <<= 3;
+ if (a->op) {
+ tmp = neon_load_reg(a->vd, 0);
+ } else {
+ tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, 0);
+ }
+ tmp2 = neon_load_reg(a->vm, 0);
+ ptr1 = vfp_reg_ptr(true, a->vn);
+ tmp4 = tcg_const_i32(n);
+ gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
+ tcg_temp_free_i32(tmp);
+ if (a->op) {
+ tmp = neon_load_reg(a->vd, 1);
+ } else {
+ tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, 0);
+ }
+ tmp3 = neon_load_reg(a->vm, 1);
+ gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
+ tcg_temp_free_i32(tmp4);
+ tcg_temp_free_ptr(ptr1);
+ neon_store_reg(a->vd, 0, tmp2);
+ neon_store_reg(a->vd, 1, tmp3);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
+static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
+ neon_element_offset(a->vm, a->index, a->size),
+ a->q ? 16 : 8, a->q ? 16 : 8);
+ return true;
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index bcdfec34d2..6d18892ade 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -377,43 +377,6 @@ static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
tcg_gen_ext16s_i32(dest, var);
}
-/* 32x32->64 multiply. Marks inputs as dead. */
-static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 lo = tcg_temp_new_i32();
- TCGv_i32 hi = tcg_temp_new_i32();
- TCGv_i64 ret;
-
- tcg_gen_mulu2_i32(lo, hi, a, b);
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
-
- ret = tcg_temp_new_i64();
- tcg_gen_concat_i32_i64(ret, lo, hi);
- tcg_temp_free_i32(lo);
- tcg_temp_free_i32(hi);
-
- return ret;
-}
-
-static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 lo = tcg_temp_new_i32();
- TCGv_i32 hi = tcg_temp_new_i32();
- TCGv_i64 ret;
-
- tcg_gen_muls2_i32(lo, hi, a, b);
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
-
- ret = tcg_temp_new_i64();
- tcg_gen_concat_i32_i64(ret, lo, hi);
- tcg_temp_free_i32(lo);
- tcg_temp_free_i32(hi);
-
- return ret;
-}
-
/* Swap low and high halfwords. */
static void gen_swap_half(TCGv_i32 var)
{
@@ -2624,24 +2587,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
-static void gen_neon_dup_low16(TCGv_i32 var)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(var, var);
- tcg_gen_shli_i32(tmp, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
-static void gen_neon_dup_high16(TCGv_i32 var)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_andi_i32(var, var, 0xffff0000);
- tcg_gen_shri_i32(tmp, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
{
#ifndef CONFIG_USER_ONLY
@@ -2991,55 +2936,6 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
-static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
-{
- switch (size) {
- case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
- case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
- case 2: tcg_gen_add_i32(t0, t0, t1); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
-{
- switch (size) {
- case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
- case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
- case 2: tcg_gen_sub_i32(t0, t1, t0); break;
- default: return;
- }
-}
-
-static TCGv_i32 neon_load_scratch(int scratch)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
- return tmp;
-}
-
-static void neon_store_scratch(int scratch, TCGv_i32 var)
-{
- tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
- tcg_temp_free_i32(var);
-}
-
-static inline TCGv_i32 neon_get_scalar(int size, int reg)
-{
- TCGv_i32 tmp;
- if (size == 1) {
- tmp = neon_load_reg(reg & 7, reg >> 4);
- if (reg & 8) {
- gen_neon_dup_high16(tmp);
- } else {
- gen_neon_dup_low16(tmp);
- }
- } else {
- tmp = neon_load_reg(reg & 15, reg >> 4);
- }
- return tmp;
-}
-
static int gen_neon_unzip(int rd, int rm, int size, int q)
{
TCGv_ptr pd, pm;
@@ -3231,68 +3127,6 @@ static inline void gen_neon_addl(int size)
}
}
-static inline void gen_neon_subl(int size)
-{
- switch (size) {
- case 0: gen_helper_neon_subl_u16(CPU_V001); break;
- case 1: gen_helper_neon_subl_u32(CPU_V001); break;
- case 2: tcg_gen_sub_i64(CPU_V001); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_negl(TCGv_i64 var, int size)
-{
- switch (size) {
- case 0: gen_helper_neon_negl_u16(var, var); break;
- case 1: gen_helper_neon_negl_u32(var, var); break;
- case 2:
- tcg_gen_neg_i64(var, var);
- break;
- default: abort();
- }
-}
-
-static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
-{
- switch (size) {
- case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
- case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
- int size, int u)
-{
- TCGv_i64 tmp;
-
- switch ((size << 1) | u) {
- case 0: gen_helper_neon_mull_s8(dest, a, b); break;
- case 1: gen_helper_neon_mull_u8(dest, a, b); break;
- case 2: gen_helper_neon_mull_s16(dest, a, b); break;
- case 3: gen_helper_neon_mull_u16(dest, a, b); break;
- case 4:
- tmp = gen_muls_i64_i32(a, b);
- tcg_gen_mov_i64(dest, tmp);
- tcg_temp_free_i64(tmp);
- break;
- case 5:
- tmp = gen_mulu_i64_i32(a, b);
- tcg_gen_mov_i64(dest, tmp);
- tcg_temp_free_i64(tmp);
- break;
- default: abort();
- }
-
- /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
- Don't forget to clean them now. */
- if (size < 2) {
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
- }
-}
-
static void gen_neon_narrow_op(int op, int u, int size,
TCGv_i32 dest, TCGv_i64 src)
{
@@ -5191,15 +5025,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
{
int op;
int q;
- int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
+ int rd, rm, rd_ofs, rm_ofs;
int size;
int pass;
int u;
int vec_size;
- uint32_t imm;
- TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
- TCGv_ptr ptr1;
- TCGv_i64 tmp64;
+ TCGv_i32 tmp, tmp2, tmp3;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return 1;
@@ -5220,12 +5051,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
q = (insn & (1 << 6)) != 0;
u = (insn >> 24) & 1;
VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
VFP_DREG_M(rm, insn);
size = (insn >> 20) & 3;
vec_size = q ? 16 : 8;
rd_ofs = neon_reg_offset(rd, 0);
- rn_ofs = neon_reg_offset(rn, 0);
rm_ofs = neon_reg_offset(rm, 0);
if ((insn & (1 << 23)) == 0) {
@@ -5236,454 +5065,15 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
return 1;
} else { /* (insn & 0x00800010 == 0x00800000) */
if (size != 3) {
- op = (insn >> 8) & 0xf;
- if ((insn & (1 << 6)) == 0) {
- /* Three registers of different lengths. */
- int src1_wide;
- int src2_wide;
- int prewiden;
- /* undefreq: bit 0 : UNDEF if size == 0
- * bit 1 : UNDEF if size == 1
- * bit 2 : UNDEF if size == 2
- * bit 3 : UNDEF if U == 1
- * Note that [2:0] set implies 'always UNDEF'
- */
- int undefreq;
- /* prewiden, src1_wide, src2_wide, undefreq */
- static const int neon_3reg_wide[16][4] = {
- {1, 0, 0, 0}, /* VADDL */
- {1, 1, 0, 0}, /* VADDW */
- {1, 0, 0, 0}, /* VSUBL */
- {1, 1, 0, 0}, /* VSUBW */
- {0, 1, 1, 0}, /* VADDHN */
- {0, 0, 0, 0}, /* VABAL */
- {0, 1, 1, 0}, /* VSUBHN */
- {0, 0, 0, 0}, /* VABDL */
- {0, 0, 0, 0}, /* VMLAL */
- {0, 0, 0, 9}, /* VQDMLAL */
- {0, 0, 0, 0}, /* VMLSL */
- {0, 0, 0, 9}, /* VQDMLSL */
- {0, 0, 0, 0}, /* Integer VMULL */
- {0, 0, 0, 9}, /* VQDMULL */
- {0, 0, 0, 0xa}, /* Polynomial VMULL */
- {0, 0, 0, 7}, /* Reserved: always UNDEF */
- };
-
- prewiden = neon_3reg_wide[op][0];
- src1_wide = neon_3reg_wide[op][1];
- src2_wide = neon_3reg_wide[op][2];
- undefreq = neon_3reg_wide[op][3];
-
- if ((undefreq & (1 << size)) ||
- ((undefreq & 8) && u)) {
- return 1;
- }
- if ((src1_wide && (rn & 1)) ||
- (src2_wide && (rm & 1)) ||
- (!src2_wide && (rd & 1))) {
- return 1;
- }
-
- /* Handle polynomial VMULL in a single pass. */
- if (op == 14) {
- if (size == 0) {
- /* VMULL.P8 */
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
- 0, gen_helper_neon_pmull_h);
- } else {
- /* VMULL.P64 */
- if (!dc_isar_feature(aa32_pmull, s)) {
- return 1;
- }
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
- 0, gen_helper_gvec_pmull_q);
- }
- return 0;
- }
-
- /* Avoid overlapping operands. Wide source operands are
- always aligned so will never overlap with wide
- destinations in problematic ways. */
- if (rd == rm && !src2_wide) {
- tmp = neon_load_reg(rm, 1);
- neon_store_scratch(2, tmp);
- } else if (rd == rn && !src1_wide) {
- tmp = neon_load_reg(rn, 1);
- neon_store_scratch(2, tmp);
- }
- tmp3 = NULL;
- for (pass = 0; pass < 2; pass++) {
- if (src1_wide) {
- neon_load_reg64(cpu_V0, rn + pass);
- tmp = NULL;
- } else {
- if (pass == 1 && rd == rn) {
- tmp = neon_load_scratch(2);
- } else {
- tmp = neon_load_reg(rn, pass);
- }
- if (prewiden) {
- gen_neon_widen(cpu_V0, tmp, size, u);
- }
- }
- if (src2_wide) {
- neon_load_reg64(cpu_V1, rm + pass);
- tmp2 = NULL;
- } else {
- if (pass == 1 && rd == rm) {
- tmp2 = neon_load_scratch(2);
- } else {
- tmp2 = neon_load_reg(rm, pass);
- }
- if (prewiden) {
- gen_neon_widen(cpu_V1, tmp2, size, u);
- }
- }
- switch (op) {
- case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
- gen_neon_addl(size);
- break;
- case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
- gen_neon_subl(size);
- break;
- case 5: case 7: /* VABAL, VABDL */
- switch ((size << 1) | u) {
- case 0:
- gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
- break;
- case 1:
- gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
- break;
- case 3:
- gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
- break;
- case 4:
- gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
- break;
- case 5:
- gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
- break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- break;
- case 8: case 9: case 10: case 11: case 12: case 13:
- /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
- break;
- default: /* 15 is RESERVED: caught earlier */
- abort();
- }
- if (op == 13) {
- /* VQDMULL */
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- neon_store_reg64(cpu_V0, rd + pass);
- } else if (op == 5 || (op >= 8 && op <= 11)) {
- /* Accumulate. */
- neon_load_reg64(cpu_V1, rd + pass);
- switch (op) {
- case 10: /* VMLSL */
- gen_neon_negl(cpu_V0, size);
- /* Fall through */
- case 5: case 8: /* VABAL, VMLAL */
- gen_neon_addl(size);
- break;
- case 9: case 11: /* VQDMLAL, VQDMLSL */
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- if (op == 11) {
- gen_neon_negl(cpu_V0, size);
- }
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- } else if (op == 4 || op == 6) {
- /* Narrowing operation. */
- tmp = tcg_temp_new_i32();
- if (!u) {
- switch (size) {
- case 0:
- gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
- break;
- case 1:
- gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
- break;
- case 2:
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
- break;
- default: abort();
- }
- } else {
- switch (size) {
- case 0:
- gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
- break;
- case 1:
- gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
- break;
- case 2:
- tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
- break;
- default: abort();
- }
- }
- if (pass == 0) {
- tmp3 = tmp;
- } else {
- neon_store_reg(rd, 0, tmp3);
- neon_store_reg(rd, 1, tmp);
- }
- } else {
- /* Write back the result. */
- neon_store_reg64(cpu_V0, rd + pass);
- }
- }
- } else {
- /* Two registers and a scalar. NB that for ops of this form
- * the ARM ARM labels bit 24 as Q, but it is in our variable
- * 'u', not 'q'.
- */
- if (size == 0) {
- return 1;
- }
- switch (op) {
- case 1: /* Float VMLA scalar */
- case 5: /* Floating point VMLS scalar */
- case 9: /* Floating point VMUL scalar */
- if (size == 1) {
- return 1;
- }
- /* fall through */
- case 0: /* Integer VMLA scalar */
- case 4: /* Integer VMLS scalar */
- case 8: /* Integer VMUL scalar */
- case 12: /* VQDMULH scalar */
- case 13: /* VQRDMULH scalar */
- if (u && ((rd | rn) & 1)) {
- return 1;
- }
- tmp = neon_get_scalar(size, rm);
- neon_store_scratch(0, tmp);
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
- tmp = neon_load_scratch(0);
- tmp2 = neon_load_reg(rn, pass);
- if (op == 12) {
- if (size == 1) {
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
- } else {
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
- }
- } else if (op == 13) {
- if (size == 1) {
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
- } else {
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
- }
- } else if (op & 1) {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- } else {
- switch (size) {
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- }
- tcg_temp_free_i32(tmp2);
- if (op < 8) {
- /* Accumulate. */
- tmp2 = neon_load_reg(rd, pass);
- switch (op) {
- case 0:
- gen_neon_add(size, tmp, tmp2);
- break;
- case 1:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case 4:
- gen_neon_rsb(size, tmp, tmp2);
- break;
- case 5:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- default:
- abort();
- }
- tcg_temp_free_i32(tmp2);
- }
- neon_store_reg(rd, pass, tmp);
- }
- break;
- case 3: /* VQDMLAL scalar */
- case 7: /* VQDMLSL scalar */
- case 11: /* VQDMULL scalar */
- if (u == 1) {
- return 1;
- }
- /* fall through */
- case 2: /* VMLAL sclar */
- case 6: /* VMLSL scalar */
- case 10: /* VMULL scalar */
- if (rd & 1) {
- return 1;
- }
- tmp2 = neon_get_scalar(size, rm);
- /* We need a copy of tmp2 because gen_neon_mull
- * deletes it during pass 0. */
- tmp4 = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp4, tmp2);
- tmp3 = neon_load_reg(rn, 1);
-
- for (pass = 0; pass < 2; pass++) {
- if (pass == 0) {
- tmp = neon_load_reg(rn, 0);
- } else {
- tmp = tmp3;
- tmp2 = tmp4;
- }
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
- if (op != 11) {
- neon_load_reg64(cpu_V1, rd + pass);
- }
- switch (op) {
- case 6:
- gen_neon_negl(cpu_V0, size);
- /* Fall through */
- case 2:
- gen_neon_addl(size);
- break;
- case 3: case 7:
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- if (op == 7) {
- gen_neon_negl(cpu_V0, size);
- }
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
- break;
- case 10:
- /* no-op */
- break;
- case 11:
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- break;
- case 14: /* VQRDMLAH scalar */
- case 15: /* VQRDMLSH scalar */
- {
- NeonGenThreeOpEnvFn *fn;
-
- if (!dc_isar_feature(aa32_rdm, s)) {
- return 1;
- }
- if (u && ((rd | rn) & 1)) {
- return 1;
- }
- if (op == 14) {
- if (size == 1) {
- fn = gen_helper_neon_qrdmlah_s16;
- } else {
- fn = gen_helper_neon_qrdmlah_s32;
- }
- } else {
- if (size == 1) {
- fn = gen_helper_neon_qrdmlsh_s16;
- } else {
- fn = gen_helper_neon_qrdmlsh_s32;
- }
- }
-
- tmp2 = neon_get_scalar(size, rm);
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
- tmp = neon_load_reg(rn, pass);
- tmp3 = neon_load_reg(rd, pass);
- fn(tmp, cpu_env, tmp, tmp2, tmp3);
- tcg_temp_free_i32(tmp3);
- neon_store_reg(rd, pass, tmp);
- }
- tcg_temp_free_i32(tmp2);
- }
- break;
- default:
- g_assert_not_reached();
- }
- }
+ /*
+ * Three registers of different lengths, or two registers and
+ * a scalar: handled by decodetree
+ */
+ return 1;
} else { /* size == 3 */
if (!u) {
- /* Extract. */
- imm = (insn >> 8) & 0xf;
-
- if (imm > 7 && !q)
- return 1;
-
- if (q && ((rd | rn | rm) & 1)) {
- return 1;
- }
-
- if (imm == 0) {
- neon_load_reg64(cpu_V0, rn);
- if (q) {
- neon_load_reg64(cpu_V1, rn + 1);
- }
- } else if (imm == 8) {
- neon_load_reg64(cpu_V0, rn + 1);
- if (q) {
- neon_load_reg64(cpu_V1, rm);
- }
- } else if (q) {
- tmp64 = tcg_temp_new_i64();
- if (imm < 8) {
- neon_load_reg64(cpu_V0, rn);
- neon_load_reg64(tmp64, rn + 1);
- } else {
- neon_load_reg64(cpu_V0, rn + 1);
- neon_load_reg64(tmp64, rm);
- }
- tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
- tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- if (imm < 8) {
- neon_load_reg64(cpu_V1, rm);
- } else {
- neon_load_reg64(cpu_V1, rm + 1);
- imm -= 8;
- }
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
- tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
- tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
- tcg_temp_free_i64(tmp64);
- } else {
- /* BUGFIX */
- neon_load_reg64(cpu_V0, rn);
- tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
- neon_load_reg64(cpu_V1, rm);
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- }
- neon_store_reg64(cpu_V0, rd);
- if (q) {
- neon_store_reg64(cpu_V1, rd + 1);
- }
+ /* Extract: handled by decodetree */
+ return 1;
} else if ((insn & (1 << 11)) == 0) {
/* Two register misc. */
op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
@@ -6184,62 +5574,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
break;
}
- } else if ((insn & (1 << 10)) == 0) {
- /* VTBL, VTBX. */
- int n = ((insn >> 8) & 3) + 1;
- if ((rn + n) > 32) {
- /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
- * helper function running off the end of the register file.
- */
- return 1;
- }
- n <<= 3;
- if (insn & (1 << 6)) {
- tmp = neon_load_reg(rd, 0);
- } else {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- }
- tmp2 = neon_load_reg(rm, 0);
- ptr1 = vfp_reg_ptr(true, rn);
- tmp5 = tcg_const_i32(n);
- gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
- tcg_temp_free_i32(tmp);
- if (insn & (1 << 6)) {
- tmp = neon_load_reg(rd, 1);
- } else {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- }
- tmp3 = neon_load_reg(rm, 1);
- gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
- tcg_temp_free_i32(tmp5);
- tcg_temp_free_ptr(ptr1);
- neon_store_reg(rd, 0, tmp2);
- neon_store_reg(rd, 1, tmp3);
- tcg_temp_free_i32(tmp);
- } else if ((insn & 0x380) == 0) {
- /* VDUP */
- int element;
- MemOp size;
-
- if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
- return 1;
- }
- if (insn & (1 << 16)) {
- size = MO_8;
- element = (insn >> 17) & 7;
- } else if (insn & (1 << 17)) {
- size = MO_16;
- element = (insn >> 18) & 3;
- } else {
- size = MO_32;
- element = (insn >> 19) & 1;
- }
- tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
- neon_element_offset(rm, element, size),
- q ? 16 : 8, q ? 16 : 8);
} else {
+ /* VTBL, VTBX, VDUP: handled by decodetree */
return 1;
}
}
diff --git a/target/arm/translate.h b/target/arm/translate.h
index c937dfe9bf..62ed5c4780 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -371,6 +371,7 @@ typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
+typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);