diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2019-03-17 01:55:22 +0000 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2019-05-13 22:52:08 +0000 |
commit | 37ee55a081b7863ffab2151068dd1b2f11376914 (patch) | |
tree | fb44f76c0e0b814f5d408d5007213c0a7605cbff /tcg/tcg-op-gvec.c | |
parent | f23e5e15edfd49d5dd72cab2ed2d85ac354b2eeb (diff) | |
download | qemu-37ee55a081b7863ffab2151068dd1b2f11376914.zip |
tcg: Add INDEX_op_dupm_vec
Allow the backend to expand dup from memory directly, instead of
forcing the value into a temp first. This is especially important
if integer/vector register moves do not exist.
Note that officially tcg_out_dupm_vec is allowed to fail.
If it did, we could fix this up relatively easily:
VECE == 32/64:
Load the value into a vector register, then dup.
Both of these must work.
VECE == 8/16:
If the value happens to be at an offset such that an aligned
load would place the desired value in the least significant
end of the register, go ahead and load w/garbage in high bits.
Load the value w/INDEX_op_ld{8,16}_i32.
Attempt a move directly to vector reg, which may fail.
Store the value into the backing store for OTS.
Load the value into the vector reg w/TCG_TYPE_I32, which must work.
Duplicate from the vector reg into itself, which must work.
All of which is well and good, except that all supported
hosts can support dupm for all vece, so all of the failure
paths would be dead code and untestable.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'tcg/tcg-op-gvec.c')
-rw-r--r-- | tcg/tcg-op-gvec.c | 89 |
1 files changed, 48 insertions, 41 deletions
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 3fcb2352d9..35ebc5a201 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -395,6 +395,41 @@ static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece, return 0; } +static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, + uint32_t maxsz, TCGv_vec t_vec) +{ + uint32_t i = 0; + + switch (type) { + case TCG_TYPE_V256: + /* + * Recall that ARM SVE allows vector sizes that are not a + * power of 2, but always a multiple of 16. The intent is + * that e.g. size == 80 would be expanded with 2x32 + 1x16. + */ + for (; i + 32 <= oprsz; i += 32) { + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256); + } + /* fallthru */ + case TCG_TYPE_V128: + for (; i + 16 <= oprsz; i += 16) { + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128); + } + break; + case TCG_TYPE_V64: + for (; i < oprsz; i += 8) { + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64); + } + break; + default: + g_assert_not_reached(); + } + + if (oprsz < maxsz) { + expand_clr(dofs + oprsz, maxsz - oprsz); + } +} + /* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C. * Only one of IN_32 or IN_64 may be set; * IN_C is used if IN_32 and IN_64 are unset. @@ -434,49 +469,11 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, } else if (in_64) { tcg_gen_dup_i64_vec(vece, t_vec, in_64); } else { - switch (vece) { - case MO_8: - tcg_gen_dup8i_vec(t_vec, in_c); - break; - case MO_16: - tcg_gen_dup16i_vec(t_vec, in_c); - break; - case MO_32: - tcg_gen_dup32i_vec(t_vec, in_c); - break; - default: - tcg_gen_dup64i_vec(t_vec, in_c); - break; - } - } - - i = 0; - switch (type) { - case TCG_TYPE_V256: - /* Recall that ARM SVE allows vector sizes that are not a - * power of 2, but always a multiple of 16. The intent is - * that e.g. size == 80 would be expanded with 2x32 + 1x16. - */ - for (; i + 32 <= oprsz; i += 32) { - tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256); - } - /* fallthru */ - case TCG_TYPE_V128: - for (; i + 16 <= oprsz; i += 16) { - tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128); - } - break; - case TCG_TYPE_V64: - for (; i < oprsz; i += 8) { - tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64); - } - break; - default: - g_assert_not_reached(); + tcg_gen_dupi_vec(vece, t_vec, in_c); } - + do_dup_store(type, dofs, oprsz, maxsz, t_vec); tcg_temp_free_vec(t_vec); - goto done; + return; } /* Otherwise, inline with an integer type, unless "large". */ @@ -1449,6 +1446,16 @@ void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz, void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz) { + if (vece <= MO_64) { + TCGType type = choose_vector_type(0, vece, oprsz, 0); + if (type != 0) { + TCGv_vec t_vec = tcg_temp_new_vec(type); + tcg_gen_dup_mem_vec(vece, t_vec, cpu_env, aofs); + do_dup_store(type, dofs, oprsz, maxsz, t_vec); + tcg_temp_free_vec(t_vec); + return; + } + } if (vece <= MO_32) { TCGv_i32 in = tcg_temp_new_i32(); switch (vece) { |