lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Idea : load/stores with pre-decrement / post-increment


From: Paulo César Pereira de Andrade
Subject: Re: Idea : load/stores with pre-decrement / post-increment
Date: Tue, 19 Dec 2023 18:07:47 -0300

[snip]

  Still very early work in progress, only in "it compiles" state, but
you might have
different ideas or extra suggestions to the concept/design.

  Sample session:

$ cat ldstxbar.tst
.disasm
.data 32
.code
    prolog
    ldxbr_c %r0 %r1 1
    ldxar_c %r0 %r1 1
    ldxbr_uc %r0 %r1 1
    ldxar_uc %r0 %r1 1
    ldxbr_s %r0 %r1 2
    ldxar_s %r0 %r1 2
    ldxbr_us %r0 %r1 2
    ldxar_us %r0 %r1 2
    ldxbr_i %r0 %r1 4
    ldxar_i %r0 %r1 4
#if __WORDSIZE == 64
    ldxbr_ui %r0 %r1 4
    ldxar_ui %r0 %r1 4
    ldxbr_l %r0 %r1 8
    ldxar_l %r0 %r1 8
#endif
    ldxbr_f %f0 %r1 4
    ldxar_f %f0 %r1 4
    ldxbr_d %f0 %r1 8
    ldxar_d %f0 %r1 8
    stxbr_c %r0 %r1 1
    stxar_c %r0 %r1 1
    stxbr_s %r0 %r1 2
    stxar_s %r0 %r1 2
    stxbr_i %r0 %r1 4
    stxar_i %r0 %r1 4
#if __WORDSIZE == 64
    stxbr_l %r0 %r1 8
    stxar_l %r0 %r1 8
#endif
    stxbr_f %r0 %f0 4
    stxar_f %r0 %f0 4
    stxbr_d %r0 %f0 8
    stxar_d %r0 %f0 8
    ret
    epilog

 ./lightning ldstxbar.tst
L0: %rbx %r13 %r14 %r15 %r12 /* prolog */
    ldxbr_c %rax %r10 0x1
    ldxar_c %rax %r10 0x1
    ldxbr_uc %rax %r10 0x1
    ldxar_uc %rax %r10 0x1
    ldxbr_s %rax %r10 0x2
    ldxar_s %rax %r10 0x2
    ldxbr_us %rax %r10 0x2
    ldxar_us %rax %r10 0x2
    ldxbr_i %rax %r10 0x4
    ldxar_i %rax %r10 0x4
    ldxbr_ui %rax %r10 0x4
    ldxar_ui %rax %r10 0x4
    ldxbr_l %rax %r10 0x8
    ldxar_l %rax %r10 0x8
    ldxbr_f %xmm8 %r10 0x4
    ldxar_f %xmm8 %r10 0x4
    ldxbr_d %xmm8 %r10 0x8
    ldxar_d %xmm8 %r10 0x8
    stxbr_c %rax %r10 0x1
    stxar_c %rax %r10 0x1
    stxbr_s %rax %r10 0x2
    stxar_s %rax %r10 0x2
    stxbr_i %rax %r10 0x4
    stxar_i %rax %r10 0x4
    stxbr_l %rax %r10 0x8
    stxar_l %rax %r10 0x8
    stxbr_f %rax %xmm8 0x4
    stxar_f %rax %xmm8 0x4
    stxbr_d %rax %xmm8 0x8
    stxar_d %rax %xmm8 0x8
    prepare
    pushargi_l 0x61c388
     \__ movi %rdi 0x61c388
    finishi 0x7f744c699390
     \__ calli 0x7f744c699390
L1: %rbx %r13 %r14 %r15 %r12
    ret
L2: %rax %xmm0 /* epilog */
  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
L0: %rbx %r13 %r14 %r15 %r12 /* prolog */
    0x7f744cc95000    sub    $0x18,%rsp
    0x7f744cc95004    mov    %rbp,(%rsp)
    0x7f744cc95008    mov    %rsp,%rbp
    ldxbr_c %rax %r10 0x1
    0x7f744cc9500b    add    $0x1,%r10
    0x7f744cc9500f    movsbq (%r10),%rax
    ldxar_c %rax %r10 0x1
    0x7f744cc95013    movsbq (%r10),%rax
    0x7f744cc95017    add    $0x1,%r10
    ldxbr_uc %rax %r10 0x1
    0x7f744cc9501b    add    $0x1,%r10
    0x7f744cc9501f    movzbq (%r10),%rax
    ldxar_uc %rax %r10 0x1
    0x7f744cc95023    movzbq (%r10),%rax
    0x7f744cc95027    add    $0x1,%r10
    ldxbr_s %rax %r10 0x2
    0x7f744cc9502b    add    $0x2,%r10
    0x7f744cc9502f    movswq (%r10),%rax
    ldxar_s %rax %r10 0x2
    0x7f744cc95033    movswq (%r10),%rax
    0x7f744cc95037    add    $0x2,%r10
    ldxbr_us %rax %r10 0x2
    0x7f744cc9503b    add    $0x2,%r10
    0x7f744cc9503f    movzwq (%r10),%rax
    ldxar_us %rax %r10 0x2
    0x7f744cc95043    movzwq (%r10),%rax
    0x7f744cc95047    add    $0x2,%r10
    ldxbr_i %rax %r10 0x4
    0x7f744cc9504b    add    $0x4,%r10
    0x7f744cc9504f    movslq (%r10),%rax
    ldxar_i %rax %r10 0x4
    0x7f744cc95052    movslq (%r10),%rax
    0x7f744cc95055    add    $0x4,%r10
    ldxbr_ui %rax %r10 0x4
    0x7f744cc95059    add    $0x4,%r10
    0x7f744cc9505d    movsxd (%r10),%eax
    ldxar_ui %rax %r10 0x4
    0x7f744cc95060    movsxd (%r10),%eax
    0x7f744cc95063    add    $0x4,%r10
    ldxbr_l %rax %r10 0x8
    0x7f744cc95067    add    $0x8,%r10
    0x7f744cc9506b    mov    (%r10),%rax
    ldxar_l %rax %r10 0x8
    0x7f744cc9506e    mov    (%r10),%rax
    0x7f744cc95071    add    $0x8,%r10
    ldxbr_f %xmm8 %r10 0x4
    0x7f744cc95075    add    $0x4,%r10
    0x7f744cc95079    movss  (%r10),%xmm8
    ldxar_f %xmm8 %r10 0x4
    0x7f744cc9507e    movss  (%r10),%xmm8
    0x7f744cc95083    add    $0x4,%r10
    ldxbr_d %xmm8 %r10 0x8
    0x7f744cc95087    add    $0x8,%r10
    0x7f744cc9508b    movsd  (%r10),%xmm8
    ldxar_d %xmm8 %r10 0x8
    0x7f744cc95090    movsd  (%r10),%xmm8
    0x7f744cc95095    add    $0x8,%r10
    stxbr_c %rax %r10 0x1
    0x7f744cc95099    add    $0x1,%rax
    0x7f744cc9509d    mov    %r10b,(%rax)
    stxar_c %rax %r10 0x1
    0x7f744cc950a0    mov    %r10b,(%rax)
    0x7f744cc950a3    add    $0x1,%rax
    stxbr_s %rax %r10 0x2
    0x7f744cc950a7    add    $0x2,%rax
    0x7f744cc950ab    mov    %r10w,(%rax)
    stxar_s %rax %r10 0x2
    0x7f744cc950af    mov    %r10w,(%rax)
    0x7f744cc950b3    add    $0x2,%rax
    stxbr_i %rax %r10 0x4
    0x7f744cc950b7    add    $0x4,%rax
    0x7f744cc950bb    mov    %r10d,(%rax)
    stxar_i %rax %r10 0x4
    0x7f744cc950be    mov    %r10d,(%rax)
    0x7f744cc950c1    add    $0x4,%rax
    stxbr_l %rax %r10 0x8
    0x7f744cc950c5    add    $0x8,%rax
    0x7f744cc950c9    mov    %r10,(%rax)
    stxar_l %rax %r10 0x8
    0x7f744cc950cc    mov    %r10,(%rax)
    0x7f744cc950cf    add    $0x8,%rax
    stxbr_f %rax %xmm8 0x4
    0x7f744cc950d3    add    $0x4,%rax
    0x7f744cc950d7    movsd  %xmm8,(%rax)
    stxar_f %rax %xmm8 0x4
    0x7f744cc950dc    movsd  %xmm8,(%rax)
    0x7f744cc950e1    add    $0x4,%rax
    stxbr_d %rax %xmm8 0x8
    0x7f744cc950e5    add    $0x8,%rax
    0x7f744cc950e9    movsd  %xmm8,(%rax)
    stxar_d %rax %xmm8 0x8
    0x7f744cc950ee    movsd  %xmm8,(%rax)
    0x7f744cc950f3    add    $0x8,%rax
    prepare
    pushargi_l 0x61c388
     \__ movi %rdi 0x61c388
    0x7f744cc950f7    mov    $0x61c388,%edi
    finishi 0x7f744c699390
     \__ calli 0x7f744c699390
    0x7f744cc950fc    call   0x7f744c699390
L1: %rbx %r13 %r14 %r15 %r12
    ret
L2: %rax %xmm0 /* epilog */
    0x7f744cc95101    mov    %rbp,%rsp
    0x7f744cc95104    mov    (%rsp),%rbp
    0x7f744cc95108    add    $0x18,%rsp
    0x7f744cc9510c    ret

  So, the idea is the pattern:

jit_ldxbr_T(R0, R1, DISP), jit_ldxar_T(R0, R1, DISP)
jit_stxbr_T(R0, R1, DISP) and jit_stxar_T(R0, R1, DISP)

where the fallback/generic version does addi of DISP in the base
register (b)efore
or (a)fter the load and otherwise is a normal jit_ldr_T or jit_str_T.

Thanks!
Paulo



reply via email to

[Prev in Thread] Current Thread [Next in Thread]