[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] x86: cvtsi2s{s,d} etc. array access
From: |
Blue Swirl |
Subject: |
Re: [Qemu-devel] x86: cvtsi2s{s,d} etc. array access |
Date: |
Tue, 15 May 2012 17:08:40 +0000 |
On Mon, May 14, 2012 at 9:05 PM, Blue Swirl <address@hidden> wrote:
> Hi,
>
> While working on the AREG0 patches, I noticed strange code in
> target-i386/translate.c.
>
> We have this table of function pointers:
> static void *sse_op_table3[4 * 3] = {
> gen_helper_cvtsi2ss,
> gen_helper_cvtsi2sd,
> X86_64_ONLY(gen_helper_cvtsq2ss),
> X86_64_ONLY(gen_helper_cvtsq2sd),
>
> gen_helper_cvttss2si,
> gen_helper_cvttsd2si,
> X86_64_ONLY(gen_helper_cvttss2sq),
> X86_64_ONLY(gen_helper_cvttsd2sq),
>
> gen_helper_cvtss2si,
> gen_helper_cvtsd2si,
> X86_64_ONLY(gen_helper_cvtss2sq),
> X86_64_ONLY(gen_helper_cvtsd2sq),
> };
>
> It's accessed like this (line 3537):
> sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
>
> b >> 8 can be only either 1 or 0. I don't see how this can work, won't
> the array index become negative for s->dflag != 2?
>
> The other access is as follows (line 3594):
> sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
> (b & 1) * 4];
>
> This looks better because of + 4 but I think some array values are not
> accessible (max. 1 * 2 + (1 - 2) + 4 + 1 * 4 == 9).
I still don't understand the arithmetic, but it looks like the correct
helpers are called:
$ cat cvtsi2ss.c
int main(void)
{
asm("cvtsi2ss %eax, %xmm0;");
asm("cvtsi2sd %eax, %xmm0;");
#ifdef __amd64__
asm("cvtsi2ss %rax, %xmm0;");
asm("cvtsi2sd %rax, %xmm0;");
#endif
asm("cvttss2si %xmm0, %eax;");
asm("cvttsd2si %xmm0, %eax;");
#ifdef __amd64__
asm("cvttss2si %xmm0, %rax;");
asm("cvttsd2si %xmm0, %rax;");
#endif
asm("cvtss2si %xmm0, %eax;");
asm("cvtsd2si %xmm0, %eax;");
#ifdef __amd64__
asm("cvtss2si %xmm0, %rax;");
asm("cvtsd2si %xmm0, %rax;");
#endif
return 0;
}
$ gcc -o cvtsi2ss cvtsi2ss.c
$ gcc -m32 -o cvtsi2ss.i386 cvtsi2ss.c
$ qemu-x86_64 -d in_asm,op_opt ./cvtsi2ss
IN: main
0x0000000000400494: push %rbp
0x0000000000400495: mov %rsp,%rbp
0x0000000000400498: cvtsi2ss %eax,%xmm0
0x000000000040049c: cvtsi2sd %eax,%xmm0
0x00000000004004a0: cvtsi2ssq %rax,%xmm0
0x00000000004004a5: cvtsi2sdq %rax,%xmm0
0x00000000004004aa: cvttss2si %xmm0,%eax
0x00000000004004ae: cvttsd2si %xmm0,%eax
0x00000000004004b2: cvttss2siq %xmm0,%rax
0x00000000004004b7: cvttsd2siq %xmm0,%rax
0x00000000004004bc: cvtss2si %xmm0,%eax
0x00000000004004c0: cvtsd2si %xmm0,%eax
0x00000000004004c4: cvtss2siq %xmm0,%rax
0x00000000004004c9: cvtsd2siq %xmm0,%rax
0x00000000004004ce: mov $0x0,%eax
0x00000000004004d3: leaveq
0x00000000004004d4: retq
OP after liveness analysis:
mov_i64 tmp0,rbp
mov_i64 tmp2,rsp
movi_i64 tmp12,$0xfffffffffffffff8
add_i64 tmp2,tmp2,tmp12
qemu_st64 tmp0,tmp2,$0xffffffffffffffff
mov_i64 rsp,tmp2
mov_i64 tmp0,rsp
mov_i64 rbp,tmp0
mov_i64 tmp0,rax
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
mov_i32 tmp6,tmp0
movi_i64 tmp12,$cvtsi2ss
call tmp12,$0x0,$0,tmp10,tmp6
mov_i64 tmp0,rax
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
mov_i32 tmp6,tmp0
movi_i64 tmp12,$cvtsi2sd
call tmp12,$0x0,$0,tmp10,tmp6
mov_i64 tmp0,rax
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
movi_i64 tmp12,$cvtsq2ss
call tmp12,$0x0,$0,tmp10,tmp0
mov_i64 tmp0,rax
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
movi_i64 tmp12,$cvtsq2sd
call tmp12,$0x0,$0,tmp10,tmp0
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
movi_i64 tmp12,$cvttss2si
call tmp12,$0x0,$1,tmp6,tmp10
ext32u_i64 tmp0,tmp6
ext32u_i64 rax,tmp0
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
movi_i64 tmp12,$cvttsd2si
call tmp12,$0x0,$1,tmp6,tmp10
ext32u_i64 tmp0,tmp6
ext32u_i64 rax,tmp0
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
movi_i64 tmp12,$cvttss2sq
call tmp12,$0x0,$1,tmp0,tmp10
mov_i64 rax,tmp0
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
movi_i64 tmp12,$cvttsd2sq
call tmp12,$0x0,$1,tmp0,tmp10
mov_i64 rax,tmp0
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
movi_i64 tmp12,$cvtss2si
call tmp12,$0x0,$1,tmp6,tmp10
ext32u_i64 tmp0,tmp6
ext32u_i64 rax,tmp0
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
movi_i64 tmp12,$cvtsd2si
call tmp12,$0x0,$1,tmp6,tmp10
ext32u_i64 tmp0,tmp6
ext32u_i64 rax,tmp0
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
movi_i64 tmp12,$cvtss2sq
call tmp12,$0x0,$1,tmp0,tmp10
mov_i64 rax,tmp0
movi_i64 tmp12,$0x2a8
add_i64 tmp10,env,tmp12
movi_i64 tmp12,$cvtsd2sq
$ qemu-i386 -d in_asm,op_opt ./cvtsi2ss.i386
$ grep -B3 -A29 cvtsi2ss /tmp/qemu.log
IN: main
0x08048394: push %ebp
0x08048395: mov %esp,%ebp
0x08048397: cvtsi2ss %eax,%xmm0
0x0804839b: cvtsi2sd %eax,%xmm0
0x0804839f: cvttss2si %xmm0,%eax
0x080483a3: cvttsd2si %xmm0,%eax
0x080483a7: cvtss2si %xmm0,%eax
0x080483ab: cvtsd2si %xmm0,%eax
0x080483af: mov $0x0,%eax
0x080483b4: pop %ebp
0x080483b5: ret
OP after liveness analysis:
mov_i32 tmp0,ebp
mov_i32 tmp2,esp
movi_i32 tmp12,$0xfffffffc
add_i32 tmp2,tmp2,tmp12
qemu_st32 tmp0,tmp2,$0xffffffffffffffff
mov_i32 esp,tmp2
mov_i32 tmp0,esp
mov_i32 ebp,tmp0
mov_i32 tmp0,eax
movi_i64 tmp13,$0x1d8
add_i64 tmp10,env,tmp13
mov_i32 tmp6,tmp0
movi_i64 tmp13,$cvtsi2ss
call tmp13,$0x0,$0,tmp10,tmp6
mov_i32 tmp0,eax
movi_i64 tmp13,$0x1d8
add_i64 tmp10,env,tmp13
mov_i32 tmp6,tmp0
movi_i64 tmp13,$cvtsi2sd
call tmp13,$0x0,$0,tmp10,tmp6
movi_i64 tmp13,$0x1d8
add_i64 tmp10,env,tmp13
movi_i64 tmp13,$cvttss2si
call tmp13,$0x0,$1,tmp6,tmp10
nopn $0x2,$0x2
mov_i32 eax,tmp6
movi_i64 tmp13,$0x1d8
add_i64 tmp10,env,tmp13
movi_i64 tmp13,$cvttsd2si
call tmp13,$0x0,$1,tmp6,tmp10
nopn $0x2,$0x2
mov_i32 eax,tmp6
movi_i64 tmp13,$0x1d8
add_i64 tmp10,env,tmp13
movi_i64 tmp13,$cvtss2si
call tmp13,$0x0,$1,tmp6,tmp10
nopn $0x2,$0x2
mov_i32 eax,tmp6
movi_i64 tmp13,$0x1d8
add_i64 tmp10,env,tmp13
movi_i64 tmp13,$cvtsd2si
call tmp13,$0x0,$1,tmp6,tmp10