summaryrefslogtreecommitdiff
path: root/tcg/tcg-op-gvec.c
diff options
context:
space:
mode:
Diffstat (limited to 'tcg/tcg-op-gvec.c')
-rw-r--r--tcg/tcg-op-gvec.c195
1 files changed, 195 insertions, 0 deletions
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 35ebc5a201..061ef329f1 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -2555,6 +2555,201 @@ void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
}
}
+/*
+ * Expand D = A << (B % element bits)
+ *
+ * Unlike scalar shifts, where it is easy for the target front end
+ * to include the modulo as part of the expansion. If the target
+ * naturally includes the modulo as part of the operation, great!
+ * If the target has some other behaviour from out-of-range shifts,
+ * then it could not use this function anyway, and would need to
+ * do it's own expansion with custom functions.
+ */
+static void tcg_gen_shlv_mod_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
+ tcg_gen_and_vec(vece, t, t, b);
+ tcg_gen_shlv_vec(vece, d, a, t);
+ tcg_temp_free_vec(t);
+}
+
+static void tcg_gen_shl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t, b, 31);
+ tcg_gen_shl_i32(d, a, t);
+ tcg_temp_free_i32(t);
+}
+
+static void tcg_gen_shl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t, b, 63);
+ tcg_gen_shl_i64(d, a, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_shlv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shlv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_shlv_mod_vec,
+ .fno = gen_helper_gvec_shl8v,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_shlv_mod_vec,
+ .fno = gen_helper_gvec_shl16v,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_shl_mod_i32,
+ .fniv = tcg_gen_shlv_mod_vec,
+ .fno = gen_helper_gvec_shl32v,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_shl_mod_i64,
+ .fniv = tcg_gen_shlv_mod_vec,
+ .fno = gen_helper_gvec_shl64v,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+/*
+ * Similarly for logical right shifts.
+ */
+
+static void tcg_gen_shrv_mod_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
+ tcg_gen_and_vec(vece, t, t, b);
+ tcg_gen_shrv_vec(vece, d, a, t);
+ tcg_temp_free_vec(t);
+}
+
+static void tcg_gen_shr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t, b, 31);
+ tcg_gen_shr_i32(d, a, t);
+ tcg_temp_free_i32(t);
+}
+
+static void tcg_gen_shr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t, b, 63);
+ tcg_gen_shr_i64(d, a, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shrv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_shrv_mod_vec,
+ .fno = gen_helper_gvec_shr8v,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_shrv_mod_vec,
+ .fno = gen_helper_gvec_shr16v,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_shr_mod_i32,
+ .fniv = tcg_gen_shrv_mod_vec,
+ .fno = gen_helper_gvec_shr32v,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_shr_mod_i64,
+ .fniv = tcg_gen_shrv_mod_vec,
+ .fno = gen_helper_gvec_shr64v,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+/*
+ * Similarly for arithmetic right shifts.
+ */
+
+static void tcg_gen_sarv_mod_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
+ tcg_gen_and_vec(vece, t, t, b);
+ tcg_gen_sarv_vec(vece, d, a, t);
+ tcg_temp_free_vec(t);
+}
+
+static void tcg_gen_sar_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t, b, 31);
+ tcg_gen_sar_i32(d, a, t);
+ tcg_temp_free_i32(t);
+}
+
+static void tcg_gen_sar_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t, b, 63);
+ tcg_gen_sar_i64(d, a, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_sarv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_sarv_mod_vec,
+ .fno = gen_helper_gvec_sar8v,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_sarv_mod_vec,
+ .fno = gen_helper_gvec_sar16v,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_sar_mod_i32,
+ .fniv = tcg_gen_sarv_mod_vec,
+ .fno = gen_helper_gvec_sar32v,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_sar_mod_i64,
+ .fniv = tcg_gen_sarv_mod_vec,
+ .fno = gen_helper_gvec_sar64v,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
uint32_t oprsz, TCGCond cond)