avr-gcc-list
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [avr-gcc-list] Need help compiling testcase with gcc 3.3.5 or 3.4.3


From: Andy Hutchinson
Subject: Re: [avr-gcc-list] Need help compiling testcase with gcc 3.3.5 or 3.4.3
Date: Fri, 28 Jan 2005 20:41:20 -0500
User-agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.4) Gecko/20030624 Netscape/7.1 (ax)

Many Thanks to summarize 3.4.3 has same response as WINAVR 3.3.1

I've filed bug report on 4.0 for lack of loop reversal - but it will be treated as missed optimisation unless other ports take a big hit.

gcc-avr has problem with pattern matching - This happens at least back to 3.3.1

The AVR.MD RTL peephole turns sign status tests (Rx<=0) + jump into (RX & -32768) +jump. Then "*sbrx_and_branchhi" turns this into SBRS. Unfortunately this occurs before the potentially more efficient code peephole ((Rx=Rx-1)<=0)+jump is tried.

Somehow I have to figure out how to catch things before 1st peephole grabs the sign test. Probabley another RTL peephole and delete the old code ones.


address@hidden wrote:
Andy Hutchinson wrote:

I have been testing GCC 4.0 for avr wrt loop optimisations. I need to compare results with earlier release to see if this is new or old limitation.

Attached is short makefile and source. Only important thing is to keep optimisation options the same.

Can somebody help me out here? Ive got way too many versions of gcc live right now to deal with more. The comments explain the effects in 4.0.


3.3.1 or 3.3.5 plus 3.4.3 would be great.

Attached are the results for GCC 3.4.3. Hot off the press. ;-)

HTH
Eric


------------------------------------------------------------------------

    .file   "testcase.c"
    .arch atmega169
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
    .global __do_copy_data
    .global __do_clear_bss
    .text
.global testloop1
    .type   testloop1, @function
testloop1:
/* prologue: frame size=0 */
    push r28
    push r29
/* prologue end (size=2) */
    ldi r28,lo8(9)
    ldi r29,hi8(9)
.L5:
    call foo
    sbiw r28,1
    sbrs r29,7
    rjmp .L5
/* epilogue: frame size=0 */
    pop r29
    pop r28
    ret
/* epilogue end (size=3) */
/* function testloop1 size 12 (7) */
    .size   testloop1, .-testloop1
.global testloop2
    .type   testloop2, @function
testloop2:
/* prologue: frame size=0 */
    push r28
    push r29
/* prologue end (size=2) */
    ldi r28,lo8(0)
    ldi r29,hi8(0)
.L20:
    lds r24,value
    tst r24
    breq .L20
    call foo
    adiw r28,1
    cpi r28,10
    cpc r29,__zero_reg__
    brlt .L20
/* epilogue: frame size=0 */
    pop r29
    pop r28
    ret
/* epilogue end (size=3) */
/* function testloop2 size 17 (12) */
    .size   testloop2, .-testloop2
.global testloop3
    .type   testloop3, @function
testloop3:
/* prologue: frame size=0 */
    push r28
    push r29
/* prologue end (size=2) */
    ldi r28,lo8(9)
    ldi r29,hi8(9)
.L33:
    lds r24,value
    tst r24
    brne .L32
    call foo
    rjmp .L33
.L32:
    sbiw r28,1
    sbrs r29,7
    rjmp .L33
/* epilogue: frame size=0 */
    pop r29
    pop r28
    ret
/* epilogue end (size=3) */
/* function testloop3 size 17 (12) */
    .size   testloop3, .-testloop3
.global testloop4
    .type   testloop4, @function
testloop4:
/* prologue: frame size=0 */
    push r28
    push r29
/* prologue end (size=2) */
    ldi r28,lo8(0)
    ldi r29,hi8(0)
.L47:
    call foo
    lds r24,value
    tst r24
    breq .L47
    adiw r28,1
    cpi r28,10
    cpc r29,__zero_reg__
    brlt .L47
/* epilogue: frame size=0 */
    pop r29
    pop r28
    ret
/* epilogue end (size=3) */
/* function testloop4 size 17 (12) */
    .size   testloop4, .-testloop4
.global testloop5
    .type   testloop5, @function
testloop5:
/* prologue: frame size=0 */
    push r28
    push r29
/* prologue end (size=2) */
    ldi r28,lo8(9)
    ldi r29,hi8(9)
.L53:
    lds r24,value
    tst r24
    brne .L51
    call foo
.L51:
    sbiw r28,1
    sbrs r29,7
    rjmp .L53
/* epilogue: frame size=0 */
    pop r29
    pop r28
    ret
/* epilogue end (size=3) */
/* function testloop5 size 16 (11) */
    .size   testloop5, .-testloop5
    .comm value,1,1
/* File "testcase.c": code   79 = 0x004f (  54), prologues  10, epilogues  15 */


------------------------------------------------------------------------

OPT = s
MCU = atmega169
CC = m:/WinAVR/bin/avr-gcc

CFLAGS = -mmcu=$(MCU) -W  -Wall  -v -funsigned-char -funsigned-bitfields 
-fpack-struct -fshort-enums  -O$(OPT)

.c.s:
    $(CC) $(CFLAGS) -c  -S $<

all:    testcase.s
clean:
    rm -f *.o
    rm -f testcase.s



------------------------------------------------------------------------

extern int foo(void);
volatile char  value;
/* Test ablity of gcc to optimise simple loops
* Expectations - all loops should reduce to simple decrement, test and branch
* Assembler code being
*
* Looptop:
* <body>
* sbiw Rx,1;
* brlt looptop (or similar)
*/

/* TESTCASE 1 the simple loop.
*
 * loop reversed (i=10....1) - OK
*/
void testloop1(void)
{
   int i;
   /*a simple loop thats ok*/
   for (i=0;i<10;i++)
   {
       foo();
   }
   }
/*TESTCASE 2 - add inner loop - directed at itself (neither forward nor 
backward)
*
* doloop - loop reversed  (i=9....0) OK
* decrement uses *addhi pattern with -1 as subtract - OK
* uses sbrs - not OK!
* Intention was for peephole (L 2347 avr.md) to combine subtract, test and 
branch (3 insn)
* by finding compare NE 65535 after subtract. This pattern no longer applies.
* We now have GE 0 - which no longer matches!
* RTL ends up with "*sbrx_and_branchhi" pattern as equivalent x & 32768 sign 
test (2 insn)
* Hack - add ANOTHER peephole to use GE 0
* Better maybe - define decrement,test and jump insn and let combiner figure it 
out.
* Also - why is gcc inconsistent in loop reversal bounds????
* Need more info to design a robust fix!!
*/

void testloop2(void)
{
   int i;
   for (i=0;i<10;i++)
   {
       while (!value);
       foo();
   }
}
/* TESTCASE 3 add inner loop - forward jump 'while'
*
* doloop not reversed - not OK
* Cause is forward jump inside loop setting maybe_multiple flag that stops
* optimisation. Very poor the loop counter is never used, and there is no way 
it can
* get set mutiple times regardless of jumps inside loop. Need to file bug report
*
* Loop increment using *addhi pattern OK
* Test and jump using compare OK
*/
void testloop3(void)
{
   int i;
   for (i=0;i<10;i++)
   {
       while (!value)
       {
       foo();
       }
   }
   }
/* TESTCASE 4
*
* Same as above but with inner while loop
*
* Loop reversed (i=9.....0) - OK
* Same code pattern matching probelm as testcase 2
*/
void testloop4(void)
{
   int i;
   for (i=0;i<10;i++)
   {
       do
       {
           foo();
       } while (!value);
   }
}

/* TESTCASE 4
*
* Same as above but no inner loop - just 'if'
*
* Loop reversed - (i=10.....1) - OK
* Again why is gcc using 10...1 as opposed to 9....0.
*/
void testloop5(void)
{
   int i;
   for (i=0;i<10;i++)
   {
       if (!value)
       {
           foo();
       }
   }
}


------------------------------------------------------------------------

m:/WinAVR/bin/avr-gcc -mmcu=atmega169 -W  -Wall  -v -funsigned-char 
-funsigned-bitfields -fpack-struct -fshort-enums  -Os -c  -S testcase.c
Reading specs from m:/WinAVR/bin/../lib/gcc/avr/3.4.3/specs
Configured with: ../gcc-3.4.3/configure --prefix=m:/WinAVR --build=mingw32 
--host=mingw32 --target=avr --enable-languages=c,c++ --with-dwarf2
Thread model: single
gcc version 3.4.3
 m:/WinAVR/bin/../libexec/gcc/avr/3.4.3/cc1.exe -quiet -v -iprefix 
m:\WinAVR\bin\../lib/gcc/avr/3.4.3/ testcase.c -quiet -dumpbase testcase.c 
-mmcu=atmega169 -auxbase testcase -Os -W -Wall -version -funsigned-char 
-funsigned-bitfields -fpack-struct -fshort-enums -o testcase.s
ignoring nonexistent directory 
"m:/WinAVR/bin/../lib/gcc/avr/3.4.3/../../../../avr/sys-include"
ignoring nonexistent directory 
"m:/WinAVR/lib/gcc/avr/3.4.3/../../../../avr/sys-include"
#include "..." search starts here:
#include <...> search starts here:
 m:/WinAVR/bin/../lib/gcc/avr/3.4.3/include
 m:/WinAVR/bin/../lib/gcc/avr/3.4.3/../../../../avr/include
 m:/WinAVR/lib/gcc/avr/3.4.3/include
 m:/WinAVR/lib/gcc/avr/3.4.3/../../../../avr/include
End of search list.
GNU C version 3.4.3 (avr)
    compiled by GNU C version 3.3.3 (cygwin special).
GGC heuristics: --param ggc-min-expand=46 --param ggc-min-heapsize=31613



reply via email to

[Prev in Thread] Current Thread [Next in Thread]