diff options
-rw-r--r-- | target/arm/translate-neon.c.inc | 37 | ||||
-rw-r--r-- | target/arm/translate.c | 15 | ||||
-rw-r--r-- | target/arm/translate.h | 1 |
3 files changed, 44 insertions, 9 deletions
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index 18d9042130..9c2b076027 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -522,6 +522,7 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) int size = a->size; int nregs = a->n + 1; TCGv_i32 addr, tmp; + MemOp mop, align; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -532,18 +533,33 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) return false; } + align = 0; if (size == 3) { if (nregs != 4 || a->a == 0) { return false; } /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ - size = 2; - } - if (nregs == 1 && a->a == 1 && size == 0) { - return false; - } - if (nregs == 3 && a->a == 1) { - return false; + size = MO_32; + align = MO_ALIGN_16; + } else if (a->a) { + switch (nregs) { + case 1: + if (size == 0) { + return false; + } + align = MO_ALIGN; + break; + case 2: + align = pow2_align(size + 1); + break; + case 3: + return false; + case 4: + align = pow2_align(size + 2); + break; + default: + g_assert_not_reached(); + } } if (!vfp_access_check(s)) { @@ -556,12 +572,12 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) */ stride = a->t ? 2 : 1; vec_size = nregs == 1 ? stride * 8 : 8; - + mop = size | align; tmp = tcg_temp_new_i32(); addr = tcg_temp_new_i32(); load_reg_var(s, addr, a->rn); for (reg = 0; reg < nregs; reg++) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), size); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop); if ((vd & 1) && vec_size == 16) { /* * We cannot write 16 bytes at once because the @@ -577,6 +593,9 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) } tcg_gen_addi_i32(addr, addr, 1 << size); vd += stride; + + /* Subsequent memory operations inherit alignment */ + mop &= ~MO_AMASK; } tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); diff --git a/target/arm/translate.c b/target/arm/translate.c index 3b071012ca..43ff0d4b8a 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -908,6 +908,21 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var) #define IS_USER_ONLY 0 #endif +MemOp pow2_align(unsigned i) +{ + static const MemOp mop_align[] = { + 0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16, + /* + * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such + * that 256-bit alignment (MO_ALIGN_32) cannot be supported: + * see get_alignment_bits(). Enforce only 128-bit alignment for now. + */ + MO_ALIGN_16 + }; + g_assert(i < ARRAY_SIZE(mop_align)); + return mop_align[i]; +} + /* * Abstractions of "generate code to do a guest load/store for * AArch32", where a vaddr is always 32 bits (and is zero diff --git a/target/arm/translate.h b/target/arm/translate.h index 0c60b83b3d..ccf60c96d8 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -204,6 +204,7 @@ void arm_test_cc(DisasCompare *cmp, int cc); void arm_free_cc(DisasCompare *cmp); void arm_jump_cc(DisasCompare *cmp, TCGLabel *label); void arm_gen_test_cc(int cc, TCGLabel *label); +MemOp pow2_align(unsigned i); /* Return state of Alternate Half-precision flag, caller frees result */ static inline TCGv_i32 get_ahp_flag(void) |