qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 32/52] target-m68k: bitfield ops


From: Richard Henderson
Subject: Re: [Qemu-devel] [PATCH 32/52] target-m68k: bitfield ops
Date: Fri, 6 May 2016 09:11:03 -1000
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.0

On 05/04/2016 10:12 AM, Laurent Vivier wrote:
Signed-off-by: Laurent Vivier <address@hidden>
---
 target-m68k/helper.c    |  13 ++
 target-m68k/helper.h    |   4 +
 target-m68k/op_helper.c |  68 ++++++
 target-m68k/translate.c | 560 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 645 insertions(+)

diff --git a/target-m68k/helper.c b/target-m68k/helper.c
index e9e7cee..76dda44 100644
--- a/target-m68k/helper.c
+++ b/target-m68k/helper.c
@@ -267,6 +267,19 @@ uint32_t HELPER(ff1)(uint32_t x)
     return n;
 }

+uint32_t HELPER(bfffo)(uint32_t arg, uint32_t width)
+{
+    int n;
+    uint32_t mask;
+    mask = 0x80000000;
+    for (n = 0; n < width; n++) {
+       if (arg & mask)
+           break;
+       mask >>= 1;
+    }
+    return n;

  n = clz32(arg);
  return n < width ? n : width;

+DEF_HELPER_2(bfffo, i32, i32, i32)

DEF_HELPER_FLAGS_*

+DEF_HELPER_4(bitfield_load, i64, env, i32, i32, i32)
+DEF_HELPER_5(bitfield_store, void, env, i32, i32, i32, i64)

Likewise.

+        bitfield = cpu_ldub_data(env, addr);

Switch to using cpu_ldub_data_ra etc. That will avoid having to store PC or update_cc before calling these helpers.

+static inline void bcd_add(TCGv src, TCGv dest)

Did the bcd patch get squashed into this one by mistake?

Anyway, it might be nice to take off the inline marker for these, since they're fairly large functions.

+static inline void bcd_neg(TCGv dest, TCGv src)

Likewise wrt inline.

+    /* compute the 10's complement
+     *
+     *    bcd_add(0xff99 - (src + X), 0x0001)
+     *
+     *        t1 = 0xF9999999 - src - X)
+     *        t2 = t1 + 0x06666666
+     *        t3 = t2 + 0x00000001
+     *        t4 = t2 ^ 0x00000001
+     *        t5 = t3 ^ t4
+     *        t6 = ~t5 & 0x11111110
+     *        t7 = (t6 >> 2) | (t6 >> 3)
+     *        return t3 - t7
+     *
+     * reduced in:
+     *
+     *        t2 = 0xFFFFFFFF + (-src)
+     *        t3 = (-src)
+     *        t4 = t2  ^ (X ^ 1)
+     *        t5 = (t3 - X) ^ t4
+     *        t6 = ~t5 & 0x11111110
+     *        t7 = (t6 >> 2) | (t6 >> 3)
+     *        return (t3 - X) - t7

Is there a reason that you're computing the ten's compliment to 7 digits when you're only going to use 3 of them? Restricting to 3 means that these constants become smaller and therefore easier to build on a non-x86 host.

+DISAS_INSN(abcd_mem)
+{
+    TCGv src;
+    TCGv addr_src;
+    TCGv dest;
+    TCGv addr_dest;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    addr_src = AREG(insn, 0);
+    tcg_gen_subi_i32(addr_src, addr_src, opsize_bytes(OS_BYTE));
+    src = gen_load(s, OS_BYTE, addr_src, 0);
+
+    addr_dest = AREG(insn, 9);
+    tcg_gen_subi_i32(addr_dest, addr_dest, opsize_bytes(OS_BYTE));
+    dest = gen_load(s, OS_BYTE, addr_dest, 0);
+
+    bcd_add(src, dest);
+
+    gen_store(s, OS_BYTE, addr_dest, dest);

Surely the write-back to the address registers only happens after any possible exception due to the loads. Otherwise you can't restart the insn after a page fault.

+    set_cc_op(s, CC_OP_FLAGS);

This is redundant with gen_flush_flags.

+static void bitfield_param(uint16_t ext, TCGv *offset, TCGv *width, TCGv *mask)
+{
+    TCGv tmp;
+
+    /* offset */
+
+    if (ext & 0x0800) {
+        *offset = tcg_temp_new_i32();
+        tcg_gen_mov_i32(*offset, DREG(ext, 6));
+    } else {
+        *offset = tcg_temp_new_i32();
+        tcg_gen_movi_i32(*offset, (ext >> 6) & 31);
+    }

No need to have two copies of the *offset allocation.

+    tmp = tcg_temp_new_i32();
+    tcg_gen_sub_i32(tmp, tcg_const_i32(32), *width);
+    *mask = tcg_temp_new_i32();
+    tcg_gen_shl_i32(*mask, tcg_const_i32(0xffffffff), tmp);

Less garbage temporaries if you do

  tmp = tcg_const_i32(32);
  tcg_gen_sub_i32(tmp, tmp, *width);
  *mask = tcg_const_i32(-1);
  tcg_gen_shl_i32(*mask, *mask, tmp);
  tcg_temp_free_i32(tmp);

+    if (ext & 0x0800)
+        tcg_gen_andi_i32(offset, offset, 31);

Watch the coding style.


+    if (op == 7) {
+        TCGv tmp2;
+
+        tmp2 = tcg_temp_new_i32();
+        tcg_gen_sub_i32(tmp2, tcg_const_i32(32), width);
+        tcg_gen_shl_i32(tmp2, reg2, tmp2);
+        tcg_gen_and_i32(tmp2, tmp2, mask);
+        gen_logic_cc(s, tmp2, OS_LONG);

A rotate right of the width is easier to put the field into the most significant bits. Also, you need to do this AND before you rotate the mask.

A comment here that we're computing the flags for BFINS wouldn't go amiss.

+        tcg_temp_free_i32(tmp1);

tmp2.

+    } else {
+        gen_logic_cc(s, tmp1, OS_LONG);
+    }

I also question the logic of doing

        mask = mask >>> offset;
        tmp = reg & mask
        tmp = tmp <<< offset

when just

        tmp = reg <<< offset;
        tmp = tmp & mask

would do.

Let's assume we make this change, that we align the field at MSB for both reg1 and reg2. That gives us

    TCGv tmp = tcg_temp_new();
    bitfield_param(ext, &offset, &width, &mask);

    if (op != 7) { /* All except BFINS */
          tcg_gen_rotl_i32(tmp, reg, offset);
        tcg_gen_and_i32(tmp, tmp, mask);
        gen_logic_cc(s, tmp, OS_LONG);
    }

    switch (op) {
    case 0: /* bftst */
        break;
    case 1: /* bfextu */
        tcg_gen_rotl_i32(reg2, QREG_CC_N, width);
        break;
    case 2: /* bfchg */
        tcg_gen_rotr_i32(mask, mask, offset);
        tcg_gen_xor_i32(reg, reg, mask);
        break;
    case 3: /* bfexts */
        tcg_gen_subfi_i32(width, 32, width);
        tcg_gen_sar_i32(reg2, QREG_CC_N, width);
        break;
    case 4: /* bfclr */
        tcg_gen_rotr_i32(mask, mask, offset);
        tcg_gen_andc_i32(reg, reg, mask);
        break;
    case 5: /* bfffo */
        /* Set all bits outside of mask, so that we find one
           when searching via clz32.  */
        tcg_gen_orc_i32(tmp, QREG_CC_N, mask);
        tcg_gen_shr_i32(tmp, tmp, offset);
        gen_helper_clz(reg2, tmp);
        break;
    case 6: /* bfset */
        tcg_gen_rotr_i32(mask, mask, offset);
        tcg_gen_or_i32(reg, reg, mask);
        break;
    case 7: /* bfins */
        tcg_gen_rotr_i32(tmp, reg2, width);
        tcg_gen_and_i32(tmp, tmp, mask);
        gen_logic_cc(s, tmp, OS_LONG);
        /* ??? If constant offset and constant width,
           and offset + width <= 32, then we can use
           tcg_gen_deposit_i32 here.  */
        tcg_gen_rotr_i32(mask, mask, offset);
        tcg_gen_rotr_i32(tmp, tmp, offset);
        tcg_gen_andc_i32(reg, reg, mask);
        tcg_gen_or_i32(reg, reg, tmp);
        break;
    }
    tcg_temp_free(tmp);

+    /* tmp = (bitfield << offset) >> 32 */
+
+    tcg_gen_shri_i64(tmp64, tmp64, 32ULL);
+    dest = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(dest, tmp64);

tcg_gen_extrh_i64_i32.

+static TCGv_i64 gen_bitfield_mask(TCGv offset, TCGv width)
+{
+    TCGv tmp;
+    TCGv_i64 mask;
+    TCGv_i64 shift;
+
+    mask = tcg_temp_new_i64();
+
+    /* mask = (1u << width) - 1; */
+
+    tcg_gen_extu_i32_i64(mask, width);
+    tcg_gen_shl_i64(mask, tcg_const_i64(1), mask);
+    tcg_gen_subi_i64(mask, mask, 1);
+
+    /* shift = 64 - (width + offset); */
+
+    tmp = tcg_temp_new_i32();
+    tcg_gen_add_i32(tmp, offset, width);
+    tcg_gen_sub_i32(tmp, tcg_const_i32(64), tmp);
+    shift = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(shift, tmp);
+
+    /* mask <<= shift */
+
+    tcg_gen_shl_i64(mask, mask, shift);

Two less instructions with

  mask = -1 << width;
  mask <<= offset;
  mask >>= offset;

That said, similar comments apply to bitfield_mem re keeping the field at the 
MSB.

+    gen_helper_bitfield_load(bitfield, cpu_env, src, offset, width);
...
+        gen_logic_cc(s, val, OS_LONG);
...
+        gen_helper_bitfield_store(cpu_env, src, offset, width, bitfield);

You risk clobbering CC before the exception if the page isn't writable.

One way to fix this is to have bitfield_load perform a probe_write (or two, if the field spans a page boundary). That way when we come to bitfield_store we know that any exception will have already been taken.

Note that you can also make the helpers perform the rotates into and out of the MSB. That off-loads a little of the required tcg code.


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]