avr-gcc-list
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[avr-gcc-list] Missed optimization or am *I* missing something?


From: Johannes Bauer
Subject: [avr-gcc-list] Missed optimization or am *I* missing something?
Date: Thu, 23 Sep 2010 13:04:56 +0200
User-agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.12) Gecko/20100915 Thunderbird/3.0.8

Hello list,

I've encountered a strange problem of which I do not know exactly wether I cause it or if avr-gcc misses an optimization - this is why I refer to you for help. This is a full example problem demonstrating the issue:

#include <stdint.h>
#include <avr/io.h>

//#define WEIRD                         0
#define FOOBUFSIZE              8
uint16_t fooBoundedBuffer[FOOBUFSIZE];
uint16_t FOO(void) {
        uint16_t boundedBufferValueSum;
        unsigned char offset;
        unsigned char i;
        offset = DMA.CH0.TRFCNT - WEIRD;
        boundedBufferValueSum = 0;
        for (i = 0; i < 4; i++) {
boundedBufferValueSum += fooBoundedBuffer[(offset + i + WEIRD) % FOOBUFSIZE];
        }
        return boundedBufferValueSum;
}

int main() {
        return 0;
}

if this is compiled with

avr-gcc -mmcu=atxmega128a1 -Os -DWEIRD=0 -o foo example.c && avr-nm foo -S | grep FOO

it shows

00000244 0000006e T FOO

i.e. FOO is 110 bytes long. Compile with

avr-gcc -mmcu=atxmega128a1 -Os -DWEIRD=1 -o foo example.c && avr-nm foo -S | grep FOO

and it shows

00000244 00000042 T FOO

i.e. FOO is only 66 bytes long (44 bytes shorter). The resulting assembly in the short case is

00000244 <FOO>:
 244:   80 91 14 01     lds     r24, 0x0114
 248:   90 91 15 01     lds     r25, 0x0115
 24c:   81 50           subi    r24, 0x01       ; 1
 24e:   40 e0           ldi     r20, 0x00       ; 0
 250:   50 e0           ldi     r21, 0x00       ; 0
 252:   20 e0           ldi     r18, 0x00       ; 0
 254:   30 e0           ldi     r19, 0x00       ; 0
 256:   90 e0           ldi     r25, 0x00       ; 0
 258:   bc 01           movw    r22, r24
 25a:   6f 5f           subi    r22, 0xFF       ; 255
 25c:   7f 4f           sbci    r23, 0xFF       ; 255
 25e:   fb 01           movw    r30, r22
 260:   e2 0f           add     r30, r18
 262:   f3 1f           adc     r31, r19
 264:   e7 70           andi    r30, 0x07       ; 7
 266:   f0 70           andi    r31, 0x00       ; 0
 268:   ee 0f           add     r30, r30
 26a:   ff 1f           adc     r31, r31
 26c:   e0 50           subi    r30, 0x00       ; 0
 26e:   f0 4e           sbci    r31, 0xE0       ; 224
 270:   80 81           ld      r24, Z
 272:   91 81           ldd     r25, Z+1        ; 0x01
 274:   48 0f           add     r20, r24
 276:   59 1f           adc     r21, r25
 278:   2f 5f           subi    r18, 0xFF       ; 255
 27a:   3f 4f           sbci    r19, 0xFF       ; 255
 27c:   24 30           cpi     r18, 0x04       ; 4
 27e:   31 05           cpc     r19, r1
 280:   71 f7           brne    .-36            ; 0x25e <FOO+0x1a>
 282:   ca 01           movw    r24, r20
 284:   08 95           ret

and in the long case

00000244 <FOO>:
 244:   e0 e0           ldi     r30, 0x00       ; 0
 246:   f1 e0           ldi     r31, 0x01       ; 1
 248:   44 89           ldd     r20, Z+20       ; 0x14
 24a:   55 89           ldd     r21, Z+21       ; 0x15
 24c:   50 e0           ldi     r21, 0x00       ; 0
 24e:   da 01           movw    r26, r20
 250:   12 96           adiw    r26, 0x02       ; 2
 252:   a7 70           andi    r26, 0x07       ; 7
 254:   b0 70           andi    r27, 0x00       ; 0
 256:   60 e0           ldi     r22, 0x00       ; 0
 258:   70 e2           ldi     r23, 0x20       ; 32
 25a:   aa 0f           add     r26, r26
 25c:   bb 1f           adc     r27, r27
 25e:   a6 0f           add     r26, r22
 260:   b7 1f           adc     r27, r23
 262:   fa 01           movw    r30, r20
 264:   31 96           adiw    r30, 0x01       ; 1
 266:   e7 70           andi    r30, 0x07       ; 7
 268:   f0 70           andi    r31, 0x00       ; 0
 26a:   ee 0f           add     r30, r30
 26c:   ff 1f           adc     r31, r31
 26e:   e6 0f           add     r30, r22
 270:   f7 1f           adc     r31, r23
 272:   2d 91           ld      r18, X+
 274:   3c 91           ld      r19, X
 276:   80 81           ld      r24, Z
 278:   91 81           ldd     r25, Z+1        ; 0x01
 27a:   28 0f           add     r18, r24
 27c:   39 1f           adc     r19, r25
 27e:   fa 01           movw    r30, r20
 280:   e7 70           andi    r30, 0x07       ; 7
 282:   f0 70           andi    r31, 0x00       ; 0
 284:   ee 0f           add     r30, r30
 286:   ff 1f           adc     r31, r31
 288:   e6 0f           add     r30, r22
 28a:   f7 1f           adc     r31, r23
 28c:   80 81           ld      r24, Z
 28e:   91 81           ldd     r25, Z+1        ; 0x01
 290:   28 0f           add     r18, r24
 292:   39 1f           adc     r19, r25
 294:   4d 5f           subi    r20, 0xFD       ; 253
 296:   5f 4f           sbci    r21, 0xFF       ; 255
 298:   47 70           andi    r20, 0x07       ; 7
 29a:   50 70           andi    r21, 0x00       ; 0
 29c:   44 0f           add     r20, r20
 29e:   55 1f           adc     r21, r21
 2a0:   64 0f           add     r22, r20
 2a2:   75 1f           adc     r23, r21
 2a4:   fb 01           movw    r30, r22
 2a6:   80 81           ld      r24, Z
 2a8:   91 81           ldd     r25, Z+1        ; 0x01
 2aa:   28 0f           add     r18, r24
 2ac:   39 1f           adc     r19, r25
 2ae:   c9 01           movw    r24, r18
 2b0:   08 95           ret

The used compiler is 4.3.4 with the CVS patches from the BSD repository.

Can this be explained somehow?

Kind regards,
Johannes



reply via email to

[Prev in Thread] Current Thread [Next in Thread]