[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[avr-gcc-list] [patch,avr-gcc] optimizing mov SI/SF
From: |
Georg-Johann Lay |
Subject: |
[avr-gcc-list] [patch,avr-gcc] optimizing mov SI/SF |
Date: |
Tue, 24 Mar 2009 22:01:41 +0100 |
User-agent: |
Thunderbird 2.0.0.21 (Windows/20090302) |
Hi,
this patch is a minor optimization when moving a compile time const
SI/SF to a reg that is not in LD_REGS. The patch tracks the value in the
scratch register and saves some moves here and there.
A small test program is attached to see the effect. Compile, e.g. with
-Os -dp -S
-Os -dp -S -fno-split-wide-types
and look out for the movsf/reload_insi patterns.
The drawback is that the constant's value is obfuscated in the asm output.
Same could be done for cmpsi. Things with cmpsi would be easier with the
adjust_insn_len skeleton, so that work can wait until sometimes.
For SI there some masks like 0xffff0000 and 0x0000ffff in libgcc where
the patch will take effekt.
Greets,
Georg-Johann
Index: /local/gcc.gnu.org/trunk/gcc/config/avr/avr.c
===================================================================
--- /local/gcc.gnu.org/trunk/gcc/config/avr/avr.c (Revision 145005)
+++ /local/gcc.gnu.org/trunk/gcc/config/avr/avr.c (Arbeitskopie)
@@ -2472,6 +2472,19 @@ out_movsi_mr_r (rtx insn, rtx op[], int
return "";
}
+/* An internal helper:
+ len == NULL: dispatch to output_asm_insn
+ len != NULL: add inc to *len, no output. Used below adjust_insn_length */
+
+static inline void
+output_asm_insn_len (const char *str, rtx *operands, int *len, int inc)
+{
+ if (len)
+ (*len) += inc;
+ else
+ output_asm_insn (str, operands);
+}
+
const char *
output_movsisf(rtx insn, rtx operands[], int *l)
{
@@ -2589,17 +2602,51 @@ output_movsisf(rtx insn, rtx operands[],
}
/* Last resort, better than loading from memory. */
- *l = 10;
- return (AS2 (mov,__tmp_reg__,r31) CR_TAB
- AS2 (ldi,r31,lo8(%1)) CR_TAB
- AS2 (mov,%A0,r31) CR_TAB
- AS2 (ldi,r31,hi8(%1)) CR_TAB
- AS2 (mov,%B0,r31) CR_TAB
- AS2 (ldi,r31,hlo8(%1)) CR_TAB
- AS2 (mov,%C0,r31) CR_TAB
- AS2 (ldi,r31,hhi8(%1)) CR_TAB
- AS2 (mov,%D0,r31) CR_TAB
- AS2 (mov,r31,__tmp_reg__));
+ /* FIXME: Probably worse than defining reload_insf
+ because in most cases a LD_REGS scratch might
+ be available, so then there would be no need to
+ save/restore the scratch by hand. But if no LD_REGS
+ was available, some stuff will be spilled... */
+
+ {
+ int nbytes = GET_MODE_SIZE (GET_MODE (dest));
+ int dest_regno = true_regnum (dest);
+ int tmp_regno = dest_regno + nbytes-1;
+ rtx tmp_reg;
+ rtx op[3];
+
+ /* Even though dest is not in class "d", some regs of it
+ may be in "d". This happens if dest lives in R14.
+ In that special case we don't need to bother Z+1. */
+
+ if (!TEST_HARD_REG_CLASS (LD_REGS, tmp_regno))
+ tmp_regno = REG_Z+1;
+
+ tmp_reg = gen_rtx_REG (QImode, tmp_regno);
+
+ op[0] = dest;
+ op[1] = operands[1];
+ op[2] = tmp_reg;
+
+ /* save tmp_reg if needed. Adjust len is at restoring */
+ if (!real_l
+ && REG_Z+1 == tmp_regno)
+ {
+ output_asm_insn (AS2 (mov,__tmp_reg__,%2), op);
+ }
+
+ /* output the very move: dispatch to reload_in */
+ output_reload_insisf (insn, op, real_l);
+
+ /* restore tmp_reg if needed */
+ if (REG_Z+1 == tmp_regno)
+ {
+ /* +2 because save from above */
+ output_asm_insn_len (AS2 (mov,%2,__tmp_reg__), op, real_l, 2);
+ }
+
+ return "";
+ }
}
else if (GET_CODE (src) == MEM)
return out_movsi_r_mr (insn, operands, real_l); /* mov r,m */
@@ -5908,50 +5955,81 @@ output_reload_inhi (rtx insn ATTRIBUTE_U
const char *
output_reload_insisf (rtx insn ATTRIBUTE_UNUSED, rtx *operands, int *len)
{
+ rtx op[3];
+ rtx dest = operands[0];
rtx src = operands[1];
- int cnst = (GET_CODE (src) == CONST_INT);
-
- if (len)
- {
- if (cnst)
- *len = 4 + ((INTVAL (src) & 0xff) != 0)
- + ((INTVAL (src) & 0xff00) != 0)
- + ((INTVAL (src) & 0xff0000) != 0)
- + ((INTVAL (src) & 0xff000000) != 0);
- else
- *len = 8;
- return "";
- }
+ unsigned HOST_WIDE_INT ival;
+ HOST_WIDE_INT tmp_val = 0x100;
+ int byte_no;
- if (cnst && ((INTVAL (src) & 0xff) == 0))
- output_asm_insn (AS2 (mov, %A0, __zero_reg__), operands);
- else
+ if (CONST_INT == GET_CODE (src))
{
- output_asm_insn (AS2 (ldi, %2, lo8(%1)), operands);
- output_asm_insn (AS2 (mov, %A0, %2), operands);
+ ival = INTVAL (src);
}
- if (cnst && ((INTVAL (src) & 0xff00) == 0))
- output_asm_insn (AS2 (mov, %B0, __zero_reg__), operands);
- else
+ else if (CONST_DOUBLE == GET_CODE (src))
{
- output_asm_insn (AS2 (ldi, %2, hi8(%1)), operands);
- output_asm_insn (AS2 (mov, %B0, %2), operands);
+ REAL_VALUE_TYPE rv;
+ gcc_assert (SFmode == GET_MODE (src));
+ REAL_VALUE_FROM_CONST_DOUBLE (rv, src);
+ REAL_VALUE_TO_TARGET_SINGLE (rv, ival);
}
- if (cnst && ((INTVAL (src) & 0xff0000) == 0))
- output_asm_insn (AS2 (mov, %C0, __zero_reg__), operands);
else
{
- output_asm_insn (AS2 (ldi, %2, hlo8(%1)), operands);
- output_asm_insn (AS2 (mov, %C0, %2), operands);
+ gcc_unreachable();
}
- if (cnst && ((INTVAL (src) & 0xff000000) == 0))
- output_asm_insn (AS2 (mov, %D0, __zero_reg__), operands);
- else
+
+ if (len)
+ *len = 0;
+
+ /* Print the bytes. Start with LSB because the scratch reg op[2]
+ may be the LSB if this is called from output_movsisf.
+ Note that some parts of dest may be in LD_REGS even though dest
+ is in NO_LD_REGS */
+
+ op[2] = operands[2];
+
+ for (byte_no = 0; byte_no < GET_MODE_SIZE (SImode); byte_no++)
{
- output_asm_insn (AS2 (ldi, %2, hhi8(%1)), operands);
- output_asm_insn (AS2 (mov, %D0, %2), operands);
+ int val8 = ival & 0xff;
+ int regno = true_regnum (dest) + byte_no;
+
+ op[0] = gen_rtx_REG (QImode, regno);
+
+ if (0 == val8)
+ {
+ /* Value == 0 is fine */
+ output_asm_insn_len (AS1 (clr, %0), op, len, 1);
+ }
+ else
+ {
+ /* Value != 0 must go through an LD_REG */
+ op[1] = GEN_INT (val8);
+
+ if (test_hard_reg_class (LD_REGS, op[0]))
+ {
+ /* Scratch not needed */
+ output_asm_insn_len (AS2 (ldi,%0,%1), op, len, 1);
+ }
+ else
+ {
+ /* Need the scratch */
+ if (val8 != tmp_val)
+ {
+ /* Value in scratch doesn't fit. Load val8 into it.
+ Keep track of tmp_val, i.e. value in scratch reg */
+ tmp_val = val8;
+
+ output_asm_insn_len (AS2 (ldi,%2,%1), op, len, 1);
+ }
+
+ output_asm_insn_len (AS2 (mov,%0,%2), op, len, 1);
+ }
+ }
+
+ ival >>= 8;
}
+
return "";
}
void foo ();
char x;
int movsi_1 (long long a, long long b, long i)
{
if (x)
i = 0xffff00ff;
foo();
return i;
}
int movsi_2 (long long a, long b, long i)
{
if (x)
i = 0xffff00ff;
foo();
return i;
}
float movsf_1 (long long a, long long b, float i)
{
if (x)
i = 3.0f;
foo();
return i;
}
float movsf_2 (long long a, long b, float i)
{
if (x)
i = 3.0f;
foo();
return i;
}
- [avr-gcc-list] [patch,avr-gcc] optimizing mov SI/SF,
Georg-Johann Lay <=