[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Idea : load/stores with pre-decrement / post-increment
From: |
Paulo César Pereira de Andrade |
Subject: |
Re: Idea : load/stores with pre-decrement / post-increment |
Date: |
Tue, 19 Dec 2023 18:07:47 -0300 |
[snip]
Still very early work in progress, only in "it compiles" state, but
you might have
different ideas or extra suggestions to the concept/design.
Sample session:
$ cat ldstxbar.tst
.disasm
.data 32
.code
prolog
ldxbr_c %r0 %r1 1
ldxar_c %r0 %r1 1
ldxbr_uc %r0 %r1 1
ldxar_uc %r0 %r1 1
ldxbr_s %r0 %r1 2
ldxar_s %r0 %r1 2
ldxbr_us %r0 %r1 2
ldxar_us %r0 %r1 2
ldxbr_i %r0 %r1 4
ldxar_i %r0 %r1 4
#if __WORDSIZE == 64
ldxbr_ui %r0 %r1 4
ldxar_ui %r0 %r1 4
ldxbr_l %r0 %r1 8
ldxar_l %r0 %r1 8
#endif
ldxbr_f %f0 %r1 4
ldxar_f %f0 %r1 4
ldxbr_d %f0 %r1 8
ldxar_d %f0 %r1 8
stxbr_c %r0 %r1 1
stxar_c %r0 %r1 1
stxbr_s %r0 %r1 2
stxar_s %r0 %r1 2
stxbr_i %r0 %r1 4
stxar_i %r0 %r1 4
#if __WORDSIZE == 64
stxbr_l %r0 %r1 8
stxar_l %r0 %r1 8
#endif
stxbr_f %r0 %f0 4
stxar_f %r0 %f0 4
stxbr_d %r0 %f0 8
stxar_d %r0 %f0 8
ret
epilog
./lightning ldstxbar.tst
L0: %rbx %r13 %r14 %r15 %r12 /* prolog */
ldxbr_c %rax %r10 0x1
ldxar_c %rax %r10 0x1
ldxbr_uc %rax %r10 0x1
ldxar_uc %rax %r10 0x1
ldxbr_s %rax %r10 0x2
ldxar_s %rax %r10 0x2
ldxbr_us %rax %r10 0x2
ldxar_us %rax %r10 0x2
ldxbr_i %rax %r10 0x4
ldxar_i %rax %r10 0x4
ldxbr_ui %rax %r10 0x4
ldxar_ui %rax %r10 0x4
ldxbr_l %rax %r10 0x8
ldxar_l %rax %r10 0x8
ldxbr_f %xmm8 %r10 0x4
ldxar_f %xmm8 %r10 0x4
ldxbr_d %xmm8 %r10 0x8
ldxar_d %xmm8 %r10 0x8
stxbr_c %rax %r10 0x1
stxar_c %rax %r10 0x1
stxbr_s %rax %r10 0x2
stxar_s %rax %r10 0x2
stxbr_i %rax %r10 0x4
stxar_i %rax %r10 0x4
stxbr_l %rax %r10 0x8
stxar_l %rax %r10 0x8
stxbr_f %rax %xmm8 0x4
stxar_f %rax %xmm8 0x4
stxbr_d %rax %xmm8 0x8
stxar_d %rax %xmm8 0x8
prepare
pushargi_l 0x61c388
\__ movi %rdi 0x61c388
finishi 0x7f744c699390
\__ calli 0x7f744c699390
L1: %rbx %r13 %r14 %r15 %r12
ret
L2: %rax %xmm0 /* epilog */
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
L0: %rbx %r13 %r14 %r15 %r12 /* prolog */
0x7f744cc95000 sub $0x18,%rsp
0x7f744cc95004 mov %rbp,(%rsp)
0x7f744cc95008 mov %rsp,%rbp
ldxbr_c %rax %r10 0x1
0x7f744cc9500b add $0x1,%r10
0x7f744cc9500f movsbq (%r10),%rax
ldxar_c %rax %r10 0x1
0x7f744cc95013 movsbq (%r10),%rax
0x7f744cc95017 add $0x1,%r10
ldxbr_uc %rax %r10 0x1
0x7f744cc9501b add $0x1,%r10
0x7f744cc9501f movzbq (%r10),%rax
ldxar_uc %rax %r10 0x1
0x7f744cc95023 movzbq (%r10),%rax
0x7f744cc95027 add $0x1,%r10
ldxbr_s %rax %r10 0x2
0x7f744cc9502b add $0x2,%r10
0x7f744cc9502f movswq (%r10),%rax
ldxar_s %rax %r10 0x2
0x7f744cc95033 movswq (%r10),%rax
0x7f744cc95037 add $0x2,%r10
ldxbr_us %rax %r10 0x2
0x7f744cc9503b add $0x2,%r10
0x7f744cc9503f movzwq (%r10),%rax
ldxar_us %rax %r10 0x2
0x7f744cc95043 movzwq (%r10),%rax
0x7f744cc95047 add $0x2,%r10
ldxbr_i %rax %r10 0x4
0x7f744cc9504b add $0x4,%r10
0x7f744cc9504f movslq (%r10),%rax
ldxar_i %rax %r10 0x4
0x7f744cc95052 movslq (%r10),%rax
0x7f744cc95055 add $0x4,%r10
ldxbr_ui %rax %r10 0x4
0x7f744cc95059 add $0x4,%r10
0x7f744cc9505d movsxd (%r10),%eax
ldxar_ui %rax %r10 0x4
0x7f744cc95060 movsxd (%r10),%eax
0x7f744cc95063 add $0x4,%r10
ldxbr_l %rax %r10 0x8
0x7f744cc95067 add $0x8,%r10
0x7f744cc9506b mov (%r10),%rax
ldxar_l %rax %r10 0x8
0x7f744cc9506e mov (%r10),%rax
0x7f744cc95071 add $0x8,%r10
ldxbr_f %xmm8 %r10 0x4
0x7f744cc95075 add $0x4,%r10
0x7f744cc95079 movss (%r10),%xmm8
ldxar_f %xmm8 %r10 0x4
0x7f744cc9507e movss (%r10),%xmm8
0x7f744cc95083 add $0x4,%r10
ldxbr_d %xmm8 %r10 0x8
0x7f744cc95087 add $0x8,%r10
0x7f744cc9508b movsd (%r10),%xmm8
ldxar_d %xmm8 %r10 0x8
0x7f744cc95090 movsd (%r10),%xmm8
0x7f744cc95095 add $0x8,%r10
stxbr_c %rax %r10 0x1
0x7f744cc95099 add $0x1,%rax
0x7f744cc9509d mov %r10b,(%rax)
stxar_c %rax %r10 0x1
0x7f744cc950a0 mov %r10b,(%rax)
0x7f744cc950a3 add $0x1,%rax
stxbr_s %rax %r10 0x2
0x7f744cc950a7 add $0x2,%rax
0x7f744cc950ab mov %r10w,(%rax)
stxar_s %rax %r10 0x2
0x7f744cc950af mov %r10w,(%rax)
0x7f744cc950b3 add $0x2,%rax
stxbr_i %rax %r10 0x4
0x7f744cc950b7 add $0x4,%rax
0x7f744cc950bb mov %r10d,(%rax)
stxar_i %rax %r10 0x4
0x7f744cc950be mov %r10d,(%rax)
0x7f744cc950c1 add $0x4,%rax
stxbr_l %rax %r10 0x8
0x7f744cc950c5 add $0x8,%rax
0x7f744cc950c9 mov %r10,(%rax)
stxar_l %rax %r10 0x8
0x7f744cc950cc mov %r10,(%rax)
0x7f744cc950cf add $0x8,%rax
stxbr_f %rax %xmm8 0x4
0x7f744cc950d3 add $0x4,%rax
0x7f744cc950d7 movsd %xmm8,(%rax)
stxar_f %rax %xmm8 0x4
0x7f744cc950dc movsd %xmm8,(%rax)
0x7f744cc950e1 add $0x4,%rax
stxbr_d %rax %xmm8 0x8
0x7f744cc950e5 add $0x8,%rax
0x7f744cc950e9 movsd %xmm8,(%rax)
stxar_d %rax %xmm8 0x8
0x7f744cc950ee movsd %xmm8,(%rax)
0x7f744cc950f3 add $0x8,%rax
prepare
pushargi_l 0x61c388
\__ movi %rdi 0x61c388
0x7f744cc950f7 mov $0x61c388,%edi
finishi 0x7f744c699390
\__ calli 0x7f744c699390
0x7f744cc950fc call 0x7f744c699390
L1: %rbx %r13 %r14 %r15 %r12
ret
L2: %rax %xmm0 /* epilog */
0x7f744cc95101 mov %rbp,%rsp
0x7f744cc95104 mov (%rsp),%rbp
0x7f744cc95108 add $0x18,%rsp
0x7f744cc9510c ret
So, the idea is the pattern:
jit_ldxbr_T(R0, R1, DISP), jit_ldxar_T(R0, R1, DISP)
jit_stxbr_T(R0, R1, DISP) and jit_stxar_T(R0, R1, DISP)
where the fallback/generic version does addi of DISP in the base
register (b)efore
or (a)fter the load and otherwise is a normal jit_ldr_T or jit_str_T.
Thanks!
Paulo
- Idea : load/stores with pre-decrement / post-increment, Paul Cercueil, 2023/12/18
- Re: Idea : load/stores with pre-decrement / post-increment, Paulo César Pereira de Andrade, 2023/12/18
- Re: Idea : load/stores with pre-decrement / post-increment, Marc Nieper-Wißkirchen, 2023/12/18
- Re: Idea : load/stores with pre-decrement / post-increment, Paulo César Pereira de Andrade, 2023/12/18
- Re: Idea : load/stores with pre-decrement / post-increment, Paul Cercueil, 2023/12/18
- Re: Idea : load/stores with pre-decrement / post-increment, Paulo César Pereira de Andrade, 2023/12/18
- Re: Idea : load/stores with pre-decrement / post-increment,
Paulo César Pereira de Andrade <=
- Re: Idea : load/stores with pre-decrement / post-increment, Paul Cercueil, 2023/12/21
- Re: Idea : load/stores with pre-decrement / post-increment, Paulo César Pereira de Andrade, 2023/12/21
- Re: Idea : load/stores with pre-decrement / post-increment, Paul Cercueil, 2023/12/21
- Re: Idea : load/stores with pre-decrement / post-increment, Paulo César Pereira de Andrade, 2023/12/21
- Re: Idea : load/stores with pre-decrement / post-increment, Paulo César Pereira de Andrade, 2023/12/22
Re: Idea : load/stores with pre-decrement / post-increment, Paul Cercueil, 2023/12/18