lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Lightning register liveliness


From: Paulo César Pereira de Andrade
Subject: Re: Lightning register liveliness
Date: Mon, 10 Feb 2020 15:09:28 -0300

Em seg., 27 de jan. de 2020 às 17:12, Paul Cercueil
<address@hidden> escreveu:
>
> Hi Paulo,

  Hi Paul,
  Sorry for the long delay before the reply.
  I will look next in the armv7hl that must be a bug.

> Here is a new example program that shows incorrect behaviour with the
> latest master.
> My JIT_R1 (== r10) is set at the beginning of the program and read back
> at the end, but it's used in between as a temporary register by
> Lightning.

  With the correction of the bug in git master, the example should be
correct.
  Checking only on for now Linux it did not reuse the register, but if
you are sure the functions called with jit_callr(JIT_R2) do not modify
JIT_R1, it is required to add a jit_live(JIT_R1) because otherwise non
callee save registers are considered dead when returning from a function.

  The jit_live(JIT_V4) must not be required. If it is, it is a bug. I
will recheck it on the windows environments to make 100% sure there
isn't any bug.

> Thanks,
> -Paul
>
>
> C code:
> -------------------------------
> #include <lightning.h>
>
> #define ADDR_REG JIT_V(JIT_V_NUM - 1)
> #define CYCLE_REG JIT_V(JIT_V_NUM - 2)
>
> int main(int argc, char **argv)
> {
>  jit_state_t *_jit;
>  jit_node_t *node1, *node2, *node3;
>
>  init_jit(argv[0]);
>
>  _jit = jit_new_state();
>
>  jit_prolog();
>  jit_tramp(256);
>
>  jit_extr_i(JIT_V1, JIT_V1);
>  jit_addr(JIT_R1, JIT_V4, JIT_V1); // <-- JIT_R1 is written here
>
>  jit_extr_us(JIT_V1, JIT_V3);
>  jit_addr(JIT_R2, JIT_V4, JIT_V1);
>  jit_rshi_u(JIT_V1, JIT_V3, 0x10);
>  jit_andi(JIT_V1, JIT_V1, 0x7fff);
>  jit_lshi(JIT_V1, JIT_V1, 2);
>
>  jit_ldxi_i(JIT_V4, ADDR_REG, 0x30);
>  jit_extr_i(JIT_V1, JIT_V1);
>  jit_stxi_i(0x3c, ADDR_REG, JIT_V0);
>  jit_addr(JIT_V0, JIT_V4, JIT_V1);
>  jit_stxi_i(0x7c, ADDR_REG, JIT_V1);
>
>  jit_andi(JIT_V4, JIT_R0, 0x10000000);
>  jit_rshi_u(JIT_V4, JIT_V4, 6);
>  jit_ori(JIT_V4, JIT_V4, 0x1f9fffff);
>  jit_andi(JIT_V1, JIT_R0, JIT_V4);
>  jit_movi(JIT_V4, 0x30000000);
>  jit_addr(JIT_V1, JIT_V1, JIT_V4);
>  jit_ldxi_us(JIT_V1, JIT_V1, 0);
>
>  jit_addi(JIT_V4, JIT_R0, 2);
>  jit_stxi_i(0x48, ADDR_REG, JIT_R2);
>  jit_andi(JIT_R2, JIT_V4, 0x10000000);
>  jit_rshi_u(JIT_R2, JIT_R2, 6);
>  jit_ori(JIT_R2, JIT_R2, 0x1f9fffff);
>  jit_andi(JIT_V4, JIT_V4, JIT_R2);
>  jit_movi(JIT_R2, 0x30000000);
>  jit_addr(JIT_V4, JIT_V4, JIT_R2);
>  jit_ldxi_us(JIT_V4, JIT_V4, 0);
>
>  jit_rshi_u(JIT_V3, JIT_V1, 0x8);
>
>  jit_extr_i(JIT_V3, JIT_V3);
>  jit_ldxi_i(JIT_R2, ADDR_REG, 0x8);
>  jit_addr(JIT_V3, JIT_V3, JIT_R2);
>
>  jit_extr_i(JIT_V3, JIT_V3);
>  jit_lshi(JIT_V3, JIT_V3, 0x12);
>
>  jit_extr_uc(JIT_V2, JIT_V1);
>
>  jit_ldxi_i(JIT_R2, ADDR_REG, 0x4);
>  jit_addr(JIT_V2, JIT_V2, JIT_R2);
>
>  jit_extr_i(JIT_V2, JIT_V2);
>  jit_lshi(JIT_V2, JIT_V2, 2);
>
>  jit_extr_us(JIT_V2, JIT_V2);
>
>  jit_extr_i(JIT_V3, JIT_V3);
>  jit_orr(JIT_V3, JIT_V3, JIT_V2);
>
>  jit_stxi_i(0x40, ADDR_REG, JIT_R0);
>  jit_ldxi(JIT_R2, ADDR_REG, 0xf8);
>  jit_stxi_i(0x78, ADDR_REG, JIT_V3);
>  jit_movi(JIT_R0, 0x489e0000);
>  jit_callr(JIT_R2);
>
>  jit_live(JIT_R1);
>  jit_live(JIT_V4);
>
>  jit_extr_uc(JIT_V2, JIT_V4);
>  jit_stxi_i(0x4c, ADDR_REG, JIT_V4);
>  jit_ldxi_i(JIT_V4, ADDR_REG, 0xc);
>  jit_addr(JIT_V2, JIT_V2, JIT_V4);
>
>  jit_extr_i(JIT_V2, JIT_V2);
>  jit_lshi(JIT_V2, JIT_V2, 2);
>
>  jit_ldxi(JIT_R2, ADDR_REG, 0xf8);
>  jit_stxi_i(0x74, ADDR_REG, JIT_V2);
>  jit_movi(JIT_R0, 0x489d0800);
>  jit_callr(JIT_R2);
>
>  jit_live(JIT_R1);
>
>  jit_andi(JIT_R2, JIT_R1, 0x10000000); // <--- JIT_R1 is read back here
>
>
>  jit_ret();
>  jit_epilog();
>
>  jit_emit();
>  jit_disassemble();
>  jit_clear_state();
>  jit_destroy_state();
>
>  return 0;
> }
> ------------------------------
>
>
> Generated code:
> ------------------------------
>          0x1350000 movsxd rdi,edi
>          0x1350003 lea r10,[r13+rdi*1+0x0]       <---- r10 is set here
>          0x1350008 movzx rdi,r12w
>          0x135000c lea r11,[r13+rdi*1+0x0]
>          0x1350011 mov rdi,r12
>          0x1350014 shr rdi,0x10
>          0x1350018 and rdi,0x7fff
>          0x135001f lea rdi,[rdi*4+0x0]
>          0x1350027 movsxd r13,DWORD PTR [r15+0x30]
>          0x135002b movsxd rdi,edi
>          0x135002e mov DWORD PTR [r15+0x3c],ebx
>          0x1350032 lea rbx,[r13+rdi*1+0x0]
>          0x1350037 mov DWORD PTR [r15+0x7c],edi
>          0x135003b mov r13d,0x10000000
>          0x1350041 and r13,rax
>          0x1350044 shr r13,0x6
>          0x1350048 or r13,0x1f9fffff
>          0x135004f mov edi,0x7
>          0x1350054 and rdi,rax
>          0x1350057 mov r13d,0x30000000
>          0x135005d add rdi,r13
>          0x1350060 movzx rdi,WORD PTR [rdi]
>          0x1350064 lea r13,[rax+0x2]
>          0x1350068 mov DWORD PTR [r15+0x48],r11d
>          0x135006c mov r11d,0x10000000
>          0x1350072 and r11,r13
>          0x1350075 shr r11,0x6
>          0x1350079 or r11,0x1f9fffff
>          0x1350080 and r13,0x2
>          0x1350084 mov r11d,0x30000000
>          0x135008a add r13,r11
>          0x135008d movzx r13,WORD PTR [r13+0x0]
>          0x1350092 mov r12,rdi
>          0x1350095 shr r12,0x8
>          0x1350099 movsxd r12,r12d
>          0x135009c movsxd r11,DWORD PTR [r15+0x8]
>          0x13500a0 add r12,r11
>          0x13500a3 movsxd r12,r12d
>          0x13500a6 shl r12,0x12
>          0x13500aa mov r10,rdi        <------------ r10 is overwritten
> here!!!
>          0x13500ad movzx rsi,r10b
>          0x13500b1 movsxd r11,DWORD PTR [r15+0x4]
>          0x13500b5 add rsi,r11
>          0x13500b8 movsxd rsi,esi
>          0x13500bb lea rsi,[rsi*4+0x0]
>          0x13500c3 movzx rsi,si
>          0x13500c7 movsxd r12,r12d
>          0x13500ca or r12,rsi
>          0x13500cd mov DWORD PTR [r15+0x40],eax
>          0x13500d1 mov r11,QWORD PTR [r15+0xf8]
>          0x13500d8 mov DWORD PTR [r15+0x78],r12d
>          0x13500dc mov eax,0x489e0000
>          0x13500e1 call r11
>          0x13500e4 mov rax,r13
>          0x13500e7 movzx rsi,al
>          0x13500eb mov DWORD PTR [r15+0x4c],r13d
>          0x13500ef movsxd r13,DWORD PTR [r15+0xc]
>          0x13500f3 add rsi,r13
>          0x13500f6 movsxd rsi,esi
>          0x13500f9 lea rsi,[rsi*4+0x0]
>          0x1350101 mov r11,QWORD PTR [r15+0xf8]
>          0x1350108 mov DWORD PTR [r15+0x74],esi
>          0x135010c mov eax,0x489d0800
>          0x1350111 call r11
>          0x1350114 mov r11d,0x10000000
>          0x135011a and r11,r10           <--------- r10 is read back
> here
> ----------------------

[snip]

Thanks,
Paulo



reply via email to

[Prev in Thread] Current Thread [Next in Thread]