avr-gcc-list
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [avr-gcc-list] Bug in *rotlsi3 insns?


From: Georg-Johann Lay
Subject: Re: [avr-gcc-list] Bug in *rotlsi3 insns?
Date: Mon, 30 Mar 2009 17:30:28 +0200
User-agent: Thunderbird 2.0.0.21 (Windows/20090302)

Anatoly Sokolov schrieb:
Hi.

Consider this test case:

unsigned long rotl (int dummy, unsigned long x)
{
    return (x << 8) | (x >> 24);
}

Compile with, e.g.
   avr-gcc -mmcu=atmega8 -S -Os -fno-split-wide-types

This will map 0x33221100 to 0x33110033 instead of to 0x22110033



Please test the patch.

Anatoly.

This patch should fix all problems of rotlsi3 by 8, 16, 24.

Please test the patch.

Georg-Johann


Index: avr-protos.h
===================================================================
--- avr-protos.h        (Revision 145289)
+++ avr-protos.h        (Arbeitskopie)
@@ -90,6 +90,7 @@ extern const char *lshrsi3_out (rtx insn
 extern void expand_prologue (void);
 extern void expand_epilogue (void);
 extern int avr_epilogue_uses (int regno);
+extern void avr_split_rotlsi3 (rtx operands[], int offset);
 
 extern void avr_output_bld (rtx operands[], int bit_nr);
 extern void avr_output_addr_vec_elt (FILE *stream, int value);
Index: avr.md
===================================================================
--- avr.md      (Revision 145289)
+++ avr.md      (Arbeitskopie)
@@ -1478,19 +1478,14 @@
        (rotate:SI (match_operand:SI 1 "register_operand" "r")
                   (const_int 16)))]
   ""
-  "{mov __tmp_reg__,%A0\;mov %A0,%D0\;mov %D0, __tmp_reg__\;mov 
__tmp_reg__,%B0\;mov %B0,%C0\;mov %C0, __tmp_reg__|movw __tmp_reg__,%A0\;movw 
%A0,%C0\;movw %C0, __tmp_reg__\;clr __zero_reg__}"
+  "{mov __tmp_reg__,%A0\;mov %A0,%C0\;mov %C0, __tmp_reg__\;mov 
__tmp_reg__,%B0\;mov %B0,%D0\;mov %D0, __tmp_reg__|movw __tmp_reg__,%A0\;movw 
%A0,%C0\;movw %C0, __tmp_reg__\;clr __zero_reg__}"
   "reload_completed
    && REGNO (operands[0]) != REGNO (operands[1])"
-  [(set (match_dup 2) (match_dup 5))
-   (set (match_dup 3) (match_dup 4))]
-  "unsigned int si_lo_off = subreg_lowpart_offset (HImode, SImode);
-   unsigned int si_hi_off = subreg_highpart_offset (HImode, SImode);
-
-   operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, si_lo_off);
-   operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, si_hi_off);
-
-   operands[4] = simplify_gen_subreg (HImode, operands[1], SImode, si_lo_off);
-   operands[5] = simplify_gen_subreg (HImode, operands[1], SImode, si_hi_off);"
+  [(clobber (const_int 0))]
+  {
+    avr_split_rotlsi3 (operands, 16);
+    DONE;
+  }
   [(set (attr "length") (if_then_else (eq_attr "mcu_have_movw" "yes")
                                      (const_int 4)
                                      (const_int 6)))
@@ -1510,28 +1505,11 @@
        mov %A0, __tmp_reg__"
   "reload_completed
    && REGNO (operands[0]) != REGNO (operands[1])"
-  [(set (match_dup 2) (match_dup 9))
-   (set (match_dup 3) (match_dup 6))
-   (set (match_dup 4) (match_dup 7))
-   (set (match_dup 5) (match_dup 8))]
-  "unsigned int si_lo_off = subreg_lowpart_offset (HImode, SImode);
-   unsigned int si_hi_off = subreg_highpart_offset (HImode, SImode);
-   unsigned int hi_lo_off = subreg_lowpart_offset (QImode, HImode);
-   unsigned int hi_hi_off = subreg_highpart_offset (QImode, HImode);
-
-   operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, si_lo_off);
-   operands[4] = simplify_gen_subreg (HImode, operands[0], SImode, si_hi_off);
-   operands[3] = simplify_gen_subreg (QImode, operands[2], HImode, hi_hi_off);
-   operands[2] = simplify_gen_subreg (QImode, operands[2], HImode, hi_lo_off);
-   operands[5] = simplify_gen_subreg (QImode, operands[4], HImode, hi_hi_off);
-   operands[4] = simplify_gen_subreg (QImode, operands[4], HImode, hi_lo_off);
-
-   operands[6] = simplify_gen_subreg (HImode, operands[1], SImode, si_lo_off);
-   operands[8] = simplify_gen_subreg (HImode, operands[1], SImode, si_hi_off);
-   operands[7] = simplify_gen_subreg (QImode, operands[6], HImode, hi_hi_off);
-   operands[6] = simplify_gen_subreg (QImode, operands[6], HImode, hi_lo_off);
-   operands[9] = simplify_gen_subreg (QImode, operands[8], HImode, hi_hi_off);
-   operands[8] = simplify_gen_subreg (QImode, operands[8], HImode, hi_lo_off);"
+  [(clobber (const_int 0))]
+  {
+    avr_split_rotlsi3 (operands, 8);
+    DONE;
+  }
    [(set_attr "length" "5")
    (set_attr "cc" "none")])
 
@@ -1547,28 +1525,11 @@
        mov %D0, __tmp_reg__"
   "reload_completed
    && REGNO (operands[0]) != REGNO (operands[1])"
-  [(set (match_dup 2) (match_dup 7))
-   (set (match_dup 3) (match_dup 8))
-   (set (match_dup 4) (match_dup 9))
-   (set (match_dup 5) (match_dup 6))]
-  "unsigned int si_lo_off = subreg_lowpart_offset (HImode, SImode);
-   unsigned int si_hi_off = subreg_highpart_offset (HImode, SImode);
-   unsigned int hi_lo_off = subreg_lowpart_offset (QImode, HImode);
-   unsigned int hi_hi_off = subreg_highpart_offset (QImode, HImode);
-
-   operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, si_lo_off);
-   operands[4] = simplify_gen_subreg (HImode, operands[0], SImode, si_hi_off);
-   operands[3] = simplify_gen_subreg (QImode, operands[2], HImode, hi_hi_off);
-   operands[2] = simplify_gen_subreg (QImode, operands[2], HImode, hi_lo_off);
-   operands[5] = simplify_gen_subreg (QImode, operands[4], HImode, hi_hi_off);
-   operands[4] = simplify_gen_subreg (QImode, operands[4], HImode, hi_lo_off);
-
-   operands[6] = simplify_gen_subreg (HImode, operands[1], SImode, si_lo_off);
-   operands[8] = simplify_gen_subreg (HImode, operands[1], SImode, si_hi_off);
-   operands[7] = simplify_gen_subreg (QImode, operands[6], HImode, hi_hi_off);
-   operands[6] = simplify_gen_subreg (QImode, operands[6], HImode, hi_lo_off);
-   operands[9] = simplify_gen_subreg (QImode, operands[8], HImode, hi_hi_off);
-   operands[8] = simplify_gen_subreg (QImode, operands[8], HImode, hi_lo_off);"
+  [(clobber (const_int 0))]
+  {
+    avr_split_rotlsi3 (operands, 24);
+    DONE;
+  }
    [(set_attr "length" "5")
    (set_attr "cc" "none")])
 
Index: avr.c
===================================================================
--- avr.c       (Revision 145291)
+++ avr.c       (Arbeitskopie)
@@ -4317,6 +4317,120 @@ lshrsi3_out (rtx insn, rtx operands[], i
   return "";
 }
 
+/* Emit insns to split left rotate SI by 8, 16, 24.
+   We must be careful because source and destination can overlap. */
+
+void
+avr_split_rotlsi3 (rtx operands[], int offset)
+{
+  int i;
+
+  switch (offset)
+    {
+    case 16:
+      {
+        rtx dest[2];
+        rtx src[2];
+
+        for (i = 0; i < 2; i++)
+          {
+            dest[i] = simplify_gen_subreg (HImode, operands[0], SImode, 
i*GET_MODE_SIZE(HImode));
+            src[i] = simplify_gen_subreg (HImode, operands[1], SImode, 
i*GET_MODE_SIZE(HImode));
+          }
+
+        if (!reg_overlap_mentioned_p (operands[0], operands[1]))
+          {
+            emit_move_insn (dest[0], src[1]);
+            emit_move_insn (dest[1], src[0]);
+          }
+        else if (REGNO (operands[0]) == 2 + REGNO(operands[1]))
+          {
+            /* We already have dest[0] = src[1] */
+            emit_move_insn (dest[1], src[0]);
+          }
+        else if (REGNO (operands[0]) + 2 == REGNO(operands[1]))
+          {
+            /* We already have dest[1] = src[0] */
+            emit_move_insn (dest[0], src[1]);
+          }
+        else
+          {
+            gcc_unreachable();
+          }
+        break;
+      }
+
+    case 8:
+      {
+        rtx dest[4];
+        rtx src[4];
+
+        for (i = 0; i < 4; i++)
+          {
+            dest[i] = simplify_gen_subreg (QImode, operands[0], SImode, 
i*GET_MODE_SIZE(QImode));
+            src[i] = simplify_gen_subreg (QImode, operands[1], SImode, 
i*GET_MODE_SIZE(QImode));
+          }
+
+        if (!reg_overlap_mentioned_p (operands[0], operands[1])
+            || REGNO (operands[0]) == 2 + REGNO(operands[1]))
+          {
+            emit_move_insn (dest[3], src[2]);
+            emit_move_insn (dest[0], src[3]);
+            emit_move_insn (dest[1], src[0]);
+            emit_move_insn (dest[2], src[1]);
+          }
+        else if (REGNO (operands[0]) + 2 == REGNO(operands[1]))
+          {
+            emit_move_insn (dest[0], src[3]);
+            emit_move_insn (dest[1], src[0]);
+            emit_move_insn (dest[2], src[1]);
+            emit_move_insn (dest[3], src[2]);
+          }
+        else
+          {
+            gcc_unreachable();
+          }
+        break;
+      }
+      
+    case 24:
+      {
+        rtx dest[4];
+        rtx src[4];
+
+        for (i = 0; i < 4; i++)
+          {
+            dest[i] = simplify_gen_subreg (QImode, operands[0], SImode, 
i*GET_MODE_SIZE(QImode));
+            src[i] = simplify_gen_subreg (QImode, operands[1], SImode, 
i*GET_MODE_SIZE(QImode));
+          }
+
+        if (!reg_overlap_mentioned_p (operands[0], operands[1])
+            || REGNO (operands[0]) == 2 + REGNO(operands[1]))
+          {
+            emit_move_insn (dest[3], src[2]);
+            emit_move_insn (dest[0], src[3]);
+            emit_move_insn (dest[1], src[0]);
+            emit_move_insn (dest[2], src[1]);
+          }
+        else if (REGNO (operands[0]) + 2 == REGNO(operands[1]))
+          {
+            emit_move_insn (dest[0], src[1]);
+            emit_move_insn (dest[3], src[0]);
+            emit_move_insn (dest[2], src[3]);
+            emit_move_insn (dest[1], src[2]);
+          }
+        else
+          {
+            gcc_unreachable();
+          }
+        break;
+      }
+
+    default:
+      gcc_unreachable();
+    }
+}
+
 /* Modifies the length assigned to instruction INSN
  LEN is the initially computed length of the insn.  */
 

reply via email to

[Prev in Thread] Current Thread [Next in Thread]