guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 07/07: Merge remote-tracking branch 'ekaitz/main' into w


From: Ludovic Courtès
Subject: [Guile-commits] 07/07: Merge remote-tracking branch 'ekaitz/main' into wip-lightening-riscv
Date: Tue, 31 Jan 2023 10:13:07 -0500 (EST)

civodul pushed a commit to branch wip-lightening-riscv
in repository guile.

commit a88ebcc1e69d8c8edb197ae83536040f0a253084
Merge: 5b42f8c15 b17ba7b6b
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Mon Jan 30 18:14:08 2023 +0100

    Merge remote-tracking branch 'ekaitz/main' into wip-lightening-riscv

 libguile/lightening/.editorconfig           |    8 +
 libguile/lightening/.gitignore              |    1 +
 libguile/lightening/.gitlab-ci.yml          |   42 +-
 libguile/lightening/lightening.am           |    6 +-
 libguile/lightening/lightening.h            |    2 +
 libguile/lightening/lightening/endian.h     |    2 +
 libguile/lightening/lightening/lightening.c |   33 +
 libguile/lightening/lightening/riscv-cpu.c  | 2464 +++++++++++++++++++++++++++
 libguile/lightening/lightening/riscv-fpu.c  |  858 ++++++++++
 libguile/lightening/lightening/riscv.c      |  327 ++++
 libguile/lightening/lightening/riscv.h      |  194 +++
 libguile/lightening/tests/Makefile          |   20 +-
 12 files changed, 3939 insertions(+), 18 deletions(-)

diff --cc libguile/lightening/.editorconfig
index 000000000,d673aaa2d..d673aaa2d
mode 000000,100644..100644
--- a/libguile/lightening/.editorconfig
+++ b/libguile/lightening/.editorconfig
diff --cc libguile/lightening/lightening/lightening.c
index 0ba609f55,000000000..dfe7a7d2a
mode 100644,000000..100644
--- a/libguile/lightening/lightening/lightening.c
+++ b/libguile/lightening/lightening/lightening.c
@@@ -1,1485 -1,0 +1,1518 @@@
 +/*
 + * Copyright (C) 2012-2020  Free Software Foundation, Inc.
 + *
 + * This file is part of GNU lightning.
 + *
 + * GNU lightning is free software; you can redistribute it and/or modify it
 + * under the terms of the GNU Lesser General Public License as published
 + * by the Free Software Foundation; either version 3, or (at your option)
 + * any later version.
 + *
 + * GNU lightning is distributed in the hope that it will be useful, but
 + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 + * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 + * License for more details.
 + *
 + * Authors:
 + *      Paulo Cesar Pereira de Andrade
 + */
 +
 +#if HAVE_CONFIG_H
 +# include "config.h"
 +#endif
 +
 +#include <assert.h>
 +#include <fcntl.h>
 +#include <limits.h>
 +#include <stdio.h>
 +
 +#include "../lightening.h"
 +
 +#define ASSERT(x) do { if (!(x)) abort(); } while (0)
 +
 +#if defined(__GNUC__)
 +# define maybe_unused           __attribute__ ((unused))
 +# define UNLIKELY(exprn) __builtin_expect(exprn, 0)
 +#else
 +# define maybe_unused           /**/
 +# define UNLIKELY(exprn) exprn
 +#endif
 +
 +union jit_pc
 +{
 +  uint8_t *uc;
 +  uint16_t *us;
 +  uint32_t *ui;
 +  uint64_t *ul;
 +  intptr_t w;
 +  uintptr_t uw;
 +};
 +
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +struct jit_literal_pool_entry
 +{
 +  jit_reloc_t reloc;
 +  uintptr_t value;
 +};
 +
 +struct jit_literal_pool
 +{
 +  uint32_t deadline;
 +  uint32_t size;
 +  uint32_t capacity;
 +  struct jit_literal_pool_entry entries[];
 +};
 +#endif // JIT_NEEDS_LITERAL_POOL
 +
 +struct jit_state
 +{
 +  union jit_pc pc;
 +  uint8_t *start;
 +  uint8_t *last_instruction_start;
 +  uint8_t *limit;
 +  uint8_t temp_gpr_saved;
 +  uint8_t temp_fpr_saved;
 +  uint8_t overflow;
 +  uint8_t emitting_data;
 +  int frame_size; // Used to know when to align stack.
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +  struct jit_literal_pool *pool;
 +#endif
 +  void* (*alloc)(size_t);
 +  void (*free)(void*);
 +};
 +
 +static jit_bool_t jit_get_cpu(void);
 +static jit_bool_t jit_init(jit_state_t *);
 +static void jit_flush(void *fptr, void *tptr);
 +static void jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc,
 +                            jit_pointer_t addr);
 +static void* bless_function_pointer(void *ptr);
 +
 +struct abi_arg_iterator;
 +
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +static struct jit_literal_pool* alloc_literal_pool(jit_state_t *_jit,
 +                                                   size_t capacity);
 +static void reset_literal_pool(jit_state_t *_jit,
 +                               struct jit_literal_pool *pool);
 +static jit_bool_t add_pending_literal(jit_state_t *_jit, jit_reloc_t src,
 +                                      uint8_t max_offset_bits);
 +static void remove_pending_literal(jit_state_t *_jit, jit_reloc_t src);
 +static void patch_pending_literal(jit_state_t *_jit, jit_reloc_t src,
 +                                  uintptr_t value);
 +enum guard_pool { GUARD_NEEDED, NO_GUARD_NEEDED };
 +static void emit_literal_pool(jit_state_t *_jit, enum guard_pool guard);
 +
 +static int32_t read_jmp_offset(uint32_t *loc);
 +static int offset_in_jmp_range(ptrdiff_t offset, int flags);
 +static void patch_jmp_offset(uint32_t *loc, ptrdiff_t offset);
 +static void patch_veneer_jmp_offset(uint32_t *loc, ptrdiff_t offset);
 +static int32_t read_jcc_offset(uint32_t *loc);
 +static int offset_in_jcc_range(ptrdiff_t offset, int flags);
 +static void patch_jcc_offset(uint32_t *loc, ptrdiff_t offset);
 +static void patch_veneer_jcc_offset(uint32_t *loc, ptrdiff_t offset);
 +static void patch_veneer(uint32_t *loc, jit_pointer_t addr);
 +static int32_t read_load_from_pool_offset(uint32_t *loc);
 +#endif
 +
 +static jit_bool_t is_fpr_arg(enum jit_operand_abi arg);
 +static jit_bool_t is_gpr_arg(enum jit_operand_abi arg);
 +static void reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
 +                                   const jit_operand_t *args);
 +static void next_abi_arg(struct abi_arg_iterator *iter,
 +                         jit_operand_t *arg);
 +
 +jit_bool_t
 +init_jit(void)
 +{
 +  return jit_get_cpu ();
 +}
 +
 +jit_state_t *
 +jit_new_state(void* (*alloc_fn)(size_t), void (*free_fn)(void*))
 +{
 +  if (!alloc_fn) alloc_fn = malloc;
 +  if (!free_fn) free_fn = free;
 +
 +  jit_state_t *_jit = alloc_fn (sizeof (*_jit));
 +  if (!_jit)
 +    abort ();
 +
 +  memset(_jit, 0, sizeof (*_jit));
 +  _jit->alloc = alloc_fn;
 +  _jit->free = free_fn;
 +
 +  if (!jit_init (_jit)) {
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +    free_fn (_jit->pool);
 +#endif
 +    free_fn (_jit);
 +    return NULL;
 +  }
 +
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +  _jit->pool = alloc_literal_pool(_jit, 0);
 +#endif
 +
 +  return _jit;
 +}
 +
 +void
 +jit_destroy_state(jit_state_t *_jit)
 +{
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +  _jit->free (_jit->pool);
 +#endif
 +  _jit->free (_jit);
 +}
 +
 +jit_pointer_t
 +jit_address(jit_state_t *_jit)
 +{
 +  ASSERT (_jit->start);
 +  jit_pointer_t ret = _jit->pc.uc;
 +  return _jit->emitting_data ? ret : jit_address_to_function_pointer (ret);
 +}
 +
 +void
 +jit_begin(jit_state_t *_jit, uint8_t* buf, size_t length)
 +{
 +  ASSERT (!_jit->start);
 +
 +  _jit->pc.uc = _jit->start = buf;
 +  _jit->limit = buf + length;
 +  _jit->overflow = 0;
 +  _jit->frame_size = 0;
 +  _jit->emitting_data = 0;
 +#if JIT_NEEDS_LITERAL_POOL
 +  ASSERT(_jit->pool->size == 0);
 +  _jit->pool->deadline = length;
 +#endif
 +}
 +
 +jit_bool_t
 +jit_has_overflow(jit_state_t *_jit)
 +{
 +  ASSERT (_jit->start);
 +  return _jit->overflow;
 +}
 +
 +void
 +jit_reset(jit_state_t *_jit)
 +{
 +  ASSERT (_jit->start);
 +  _jit->pc.uc = _jit->start = _jit->limit = NULL;
 +  _jit->overflow = 0;
 +  _jit->frame_size = 0;
 +  _jit->emitting_data = 0;
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +  reset_literal_pool(_jit, _jit->pool);
 +#endif
 +}
 +
 +jit_function_pointer_t
 +jit_address_to_function_pointer(jit_pointer_t p)
 +{
 +  return bless_function_pointer(p);
 +}
 +
 +void*
 +jit_end(jit_state_t *_jit, size_t *length)
 +{
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +  if (_jit->pool->size)
 +    emit_literal_pool(_jit, NO_GUARD_NEEDED);
 +#endif
 +
 +  if (_jit->overflow)
 +    return NULL;
 +
 +  uint8_t *start = _jit->start;
 +  uint8_t *end = _jit->pc.uc;
 +
 +  ASSERT(start);
 +  ASSERT(start <= end);
 +  ASSERT(end <= _jit->limit);
 +  ASSERT(!_jit->emitting_data);
 +
 +  jit_flush (start, end);
 +
 +  if (length) {
 +    *length = end - start;
 +  }
 +
 +  _jit->pc.uc = _jit->start = _jit->limit = NULL;
 +  _jit->overflow = 0;
 +  _jit->frame_size = 0;
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +  reset_literal_pool(_jit, _jit->pool);
 +#endif
 +
 +  return jit_address_to_function_pointer(start);
 +}
 +
 +static int
 +is_power_of_two (unsigned x)
 +{
 +  return x && !(x & (x-1));
 +}
 +
 +static jit_gpr_t
 +get_temp_gpr(jit_state_t *_jit)
 +{
 +  switch(_jit->temp_gpr_saved++)
 +    {
 +    case 0:
 +      return JIT_TMP0;
 +#ifdef JIT_TMP1
 +    case 1:
 +      return JIT_TMP1;
++#endif
++#ifdef JIT_TMP2
++    case 2:
++      return JIT_TMP2;
++#endif
++#ifdef JIT_TMP3
++    case 3:
++      return JIT_TMP3;
++#endif
++#ifdef JIT_TMP4
++    case 4:
++      return JIT_TMP4;
++#endif
++#ifdef JIT_TMP5
++    case 5:
++      return JIT_TMP5;
 +#endif
 +    default:
 +      abort();
 +    }
 +}
 +
 +static jit_fpr_t
 +get_temp_fpr(jit_state_t *_jit)
 +{
 +  switch(_jit->temp_fpr_saved++)
 +    {
 +    case 0:
 +      return JIT_FTMP;
 +    default:
 +      abort();
 +    }
 +}
 +
 +static void
 +unget_temp_fpr(jit_state_t *_jit)
 +{
 +  ASSERT(_jit->temp_fpr_saved);
 +  _jit->temp_fpr_saved--;
 +}
 +
 +static void
 +unget_temp_gpr(jit_state_t *_jit)
 +{
 +  ASSERT(_jit->temp_gpr_saved);
 +  _jit->temp_gpr_saved--;
 +}
 +
 +static inline void emit_u8(jit_state_t *_jit, uint8_t u8) {
 +  if (UNLIKELY(_jit->pc.uc + 1 > _jit->limit)) {
 +    _jit->overflow = 1;
 +  } else {
 +    *_jit->pc.uc++ = u8;
 +  }
 +}
 +
 +static inline void emit_u16(jit_state_t *_jit, uint16_t u16) {
 +  if (UNLIKELY(_jit->pc.us + 1 > (uint16_t*)_jit->limit)) {
 +    _jit->overflow = 1;
 +  } else {
 +    *_jit->pc.us++ = u16;
 +  }
 +}
 +
 +static inline void emit_u32(jit_state_t *_jit, uint32_t u32) {
 +  if (UNLIKELY(_jit->pc.ui + 1 > (uint32_t*)_jit->limit)) {
 +    _jit->overflow = 1;
 +  } else {
 +    *_jit->pc.ui++ = u32;
 +  }
 +}
 +
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +static inline void emit_u16_with_pool(jit_state_t *_jit, uint16_t u16) {
 +  emit_u16(_jit, u16);
 +  if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline))
 +    emit_literal_pool(_jit, GUARD_NEEDED);
 +}
 +
 +static inline void emit_u32_with_pool(jit_state_t *_jit, uint32_t u32) {
 +  emit_u32(_jit, u32);
 +  if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline))
 +    emit_literal_pool(_jit, GUARD_NEEDED);
 +}
 +#endif
 +
 +static inline void emit_u64(jit_state_t *_jit, uint64_t u64) {
 +  if (UNLIKELY(_jit->pc.ul + 1 > (uint64_t*)_jit->limit)) {
 +    _jit->overflow = 1;
 +  } else {
 +    *_jit->pc.ul++ = u64;
 +  }
 +}
 +
 +static inline void emit_uintptr(jit_state_t *_jit, uintptr_t u) {
 +  if (sizeof(u) == 4)
 +    emit_u32 (_jit, u);
 +  else
 +    emit_u64 (_jit, u);
 +}
 +
 +static inline jit_reloc_t
 +jit_reloc(jit_state_t *_jit, enum jit_reloc_kind kind,
 +          uint8_t inst_start_offset, uint8_t *loc, uint8_t *pc_base,
 +          uint8_t rsh)
 +{
 +  jit_reloc_t ret;
 +
 +  ASSERT(rsh < __WORDSIZE);
 +  ASSERT(pc_base >= (loc - inst_start_offset));
 +  ASSERT(pc_base - (loc - inst_start_offset) < 256);
 +
 +  ret.kind = kind;
 +  ret.inst_start_offset = inst_start_offset;
 +  ret.pc_base_offset = pc_base - (loc - inst_start_offset);
 +  ret.rsh = rsh;
 +  ret.offset = loc - _jit->start;
 +  
 +  return ret;
 +}
 +
 +static inline jit_reloc_t
 +emit_abs_reloc (jit_state_t *_jit, uint8_t inst_start)
 +{
 +  uint8_t *loc = _jit->pc.uc;
 +  emit_uintptr (_jit, 0);
 +  return jit_reloc(_jit, JIT_RELOC_ABSOLUTE, inst_start, loc, _jit->pc.uc, 0);
 +}
 +
 +void
 +jit_patch_here(jit_state_t *_jit, jit_reloc_t reloc)
 +{
 +  jit_patch_there (_jit, reloc, jit_address (_jit));
 +}
 +
 +void
 +jit_patch_there(jit_state_t* _jit, jit_reloc_t reloc, jit_pointer_t addr)
 +{
 +  if (_jit->overflow)
 +    return;
 +  union jit_pc loc;
 +  uint8_t *end;
 +  loc.uc = _jit->start + reloc.offset;
 +  uint8_t *pc_base = loc.uc - reloc.inst_start_offset + reloc.pc_base_offset;
 +  ptrdiff_t diff = (uint8_t*)addr - pc_base;
 +  ASSERT((diff & ((1 << reloc.rsh) - 1)) == 0);
 +  diff >>= reloc.rsh;
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +  int flags = reloc.kind & ~JIT_RELOC_MASK;
 +#endif
 +
 +  switch (reloc.kind & JIT_RELOC_MASK)
 +    {
 +    case JIT_RELOC_ABSOLUTE:
 +      if (sizeof(diff) == 4)
 +        *loc.ui = (uintptr_t)addr;
 +      else
 +        *loc.ul = (uintptr_t)addr;
 +      end = loc.uc + sizeof(diff);
 +      break;
 +    case JIT_RELOC_REL8:
 +      ASSERT (INT8_MIN <= diff && diff <= INT8_MAX);
 +      *loc.uc = diff;
 +      end = loc.uc + 1;
 +      break;
 +    case JIT_RELOC_REL16:
 +      ASSERT (INT16_MIN <= diff && diff <= INT16_MAX);
 +      *loc.us = diff;
 +      end = loc.uc + 2;
 +      break;
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +    case JIT_RELOC_JMP_WITH_VENEER: {
 +      int32_t voff = read_jmp_offset(loc.ui);
 +      uint8_t *target = pc_base + (voff << reloc.rsh);
 +      if (target == loc.uc) {
 +        // PC still in range to reify direct branch.
 +        if (offset_in_jmp_range(diff, flags)) {
 +          // Target also in range: reify direct branch.
 +          patch_jmp_offset(loc.ui, diff);
 +          remove_pending_literal(_jit, reloc);
 +        } else {
 +          // Target out of range; branch to veneer.
 +          patch_pending_literal(_jit, reloc, (uintptr_t) addr);
 +        }
 +      } else {
 +        // Already emitted a veneer.  In this case, patch the veneer
 +        // directly.
 +        patch_veneer((uint32_t *) target, addr);
 +      }
 +      return;
 +    }
 +    case JIT_RELOC_JCC_WITH_VENEER: {
 +      int32_t voff = read_jcc_offset(loc.ui);
 +      uint8_t *target = pc_base + (voff << reloc.rsh);
 +      if (target == loc.uc) {
 +        if (offset_in_jcc_range(diff, flags)) {
 +          patch_jcc_offset(loc.ui, diff);
 +          remove_pending_literal(_jit, reloc);
 +        } else {
 +          patch_pending_literal(_jit, reloc, (uintptr_t) addr);
 +        }
 +      } else {
 +        patch_veneer((uint32_t *) target, addr);
 +      }
 +      return;
 +    }
 +    case JIT_RELOC_LOAD_FROM_POOL: {
 +      int32_t voff = read_load_from_pool_offset(loc.ui);
 +      uint8_t *target = pc_base + (voff << reloc.rsh);
 +      if (target == loc.uc) {
 +        patch_pending_literal(_jit, reloc, (uintptr_t) addr);
 +      } else {
 +        *(uintptr_t *) target = (uintptr_t) addr;
 +      }
 +      return;
 +    }
 +#endif
 +    case JIT_RELOC_REL32:
 +      ASSERT (INT32_MIN <= diff && diff <= INT32_MAX);
 +      *loc.ui = diff;
 +      end = loc.uc + 4;
 +      break;
 +    case JIT_RELOC_REL64:
 +      *loc.ul = diff;
 +      end = loc.uc + 8;
 +      break;
 +    default:
 +      abort ();
 +    }
 +
 +  if (end == _jit->pc.uc)
 +    jit_try_shorten (_jit, reloc, addr);
 +}
 +
 +void
 +jit_begin_data(jit_state_t *j, size_t max_size_or_zero)
 +{
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +  if (j->pool->size) {
 +    uint8_t *deadline = j->start + j->pool->deadline;
 +    // Emit a literal pool now if the data might overwrite the deadline.
 +    // Emitting data won't add entries to the pool.
 +    if (max_size_or_zero == 0 || j->pc.uc + max_size_or_zero >= deadline)
 +      emit_literal_pool(j, NO_GUARD_NEEDED);
 +  }
 +#endif
 +
 +  ASSERT(!j->emitting_data);
 +  j->emitting_data = 1;
 +}
 +
 +void
 +jit_end_data(jit_state_t *j)
 +{
 +  ASSERT(j->emitting_data);
 +  j->emitting_data = 0;
 +}
 +
 +void
 +jit_emit_u8(jit_state_t *j, uint8_t u8)
 +{
 +  ASSERT(j->emitting_data);
 +  emit_u8(j, u8);
 +}
 +
 +void
 +jit_emit_u16(jit_state_t *j, uint16_t u16)
 +{
 +  ASSERT(j->emitting_data);
 +  emit_u16(j, u16);
 +}
 +
 +void
 +jit_emit_u32(jit_state_t *j, uint32_t u32)
 +{
 +  ASSERT(j->emitting_data);
 +  emit_u32(j, u32);
 +}
 +
 +void
 +jit_emit_u64(jit_state_t *j, uint64_t u64)
 +{
 +  ASSERT(j->emitting_data);
 +  emit_u64(j, u64);
 +}
 +
 +jit_reloc_t
 +jit_emit_addr(jit_state_t *j)
 +{
 +  ASSERT(j->emitting_data);
 +  uint8_t inst_start = 0;
 +  return emit_abs_reloc(j, inst_start);
 +}
 +
 +#if defined(__i386__) || defined(__x86_64__)
 +# include "x86.c"
 +#elif defined(__mips__)
 +# include "mips.c"
 +#elif defined(__arm__)
 +# include "arm.c"
 +#elif defined(__ppc__) || defined(__powerpc__)
 +# include "ppc.c"
 +#elif defined(__aarch64__)
 +# include "aarch64.c"
 +#elif defined(__s390__) || defined(__s390x__)
 +# include "s390.c"
++#elif defined(__riscv__) || defined(__riscv)
++# include "riscv.c"
 +#endif
 +
 +#define JIT_IMPL_0(stem, ret) \
 +  ret jit_##stem (jit_state_t* _jit) \
 +  {                                  \
 +    return stem(_jit);            \
 +  }
 +#define JIT_IMPL_1(stem, ret, ta)                 \
 +  ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a) \
 +  {                                               \
 +    return stem(_jit, unwrap_##ta(a));         \
 +  }
 +#define JIT_IMPL_2(stem, ret, ta, tb)                             \
 +  ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b) \
 +  {                                                               \
 +    return stem(_jit, unwrap_##ta(a), unwrap_##tb(b));         \
 +  }
 +#define JIT_IMPL_3(stem, ret, ta, tb, tc)                               \
 +  ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, 
jit_##tc##_t c) \
 +  {                                                                     \
 +    return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c)); \
 +  }
 +#define JIT_IMPL_4(stem, ret, ta, tb, tc, td)                           \
 +  ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, 
jit_##tc##_t c, jit_##td##_t d) \
 +  {                                                                     \
 +    return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c), 
unwrap_##td(d)); \
 +  }
 +
 +#define JIT_IMPL_RFF__(stem) JIT_IMPL_2(stem, jit_reloc_t, fpr, fpr)
 +#define JIT_IMPL_RGG__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, gpr)
 +#define JIT_IMPL_RG___(stem) JIT_IMPL_1(stem, jit_reloc_t, gpr)
 +#define JIT_IMPL_RGi__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, imm)
 +#define JIT_IMPL_RGu__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, uimm)
 +#define JIT_IMPL_R____(stem) JIT_IMPL_0(stem, jit_reloc_t)
 +#define JIT_IMPL__FFF_(stem) JIT_IMPL_3(stem, void, fpr, fpr, fpr)
 +#define JIT_IMPL__FF__(stem) JIT_IMPL_2(stem, void, fpr, fpr)
 +#define JIT_IMPL__FGG_(stem) JIT_IMPL_3(stem, void, fpr, gpr, gpr)
 +#define JIT_IMPL__FG__(stem) JIT_IMPL_2(stem, void, fpr, gpr)
 +#define JIT_IMPL__FGo_(stem) JIT_IMPL_3(stem, void, fpr, gpr, off)
 +#define JIT_IMPL__F___(stem) JIT_IMPL_1(stem, void, fpr)
 +#define JIT_IMPL__Fd__(stem) JIT_IMPL_2(stem, void, fpr, float64)
 +#define JIT_IMPL__Ff__(stem) JIT_IMPL_2(stem, void, fpr, float32)
 +#define JIT_IMPL__Fp__(stem) JIT_IMPL_2(stem, void, fpr, pointer)
 +#define JIT_IMPL__GF__(stem) JIT_IMPL_2(stem, void, gpr, fpr)
 +#define JIT_IMPL__GGF_(stem) JIT_IMPL_3(stem, void, gpr, gpr, fpr)
 +#define JIT_IMPL__GGGG(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, gpr)
 +#define JIT_IMPL__GGG_(stem) JIT_IMPL_3(stem, void, gpr, gpr, gpr)
 +#define JIT_IMPL__GGGi(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, imm)
 +#define JIT_IMPL__GGGu(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, uimm)
 +#define JIT_IMPL__GG__(stem) JIT_IMPL_2(stem, void, gpr, gpr)
 +#define JIT_IMPL__GGi_(stem) JIT_IMPL_3(stem, void, gpr, gpr, imm)
 +#define JIT_IMPL__GGo_(stem) JIT_IMPL_3(stem, void, gpr, gpr, off)
 +#define JIT_IMPL__GGu_(stem) JIT_IMPL_3(stem, void, gpr, gpr, uimm)
 +#define JIT_IMPL__G___(stem) JIT_IMPL_1(stem, void, gpr)
 +#define JIT_IMPL__Gi__(stem) JIT_IMPL_2(stem, void, gpr, imm)
 +#define JIT_IMPL__Gp__(stem) JIT_IMPL_2(stem, void, gpr, pointer)
 +#define JIT_IMPL______(stem) JIT_IMPL_0(stem, void)
 +#define JIT_IMPL__i___(stem) JIT_IMPL_1(stem, void, imm)
 +#define JIT_IMPL__oGF_(stem) JIT_IMPL_3(stem, void, off, gpr, fpr)
 +#define JIT_IMPL__oGG_(stem) JIT_IMPL_3(stem, void, off, gpr, gpr)
 +#define JIT_IMPL__pF__(stem) JIT_IMPL_2(stem, void, pointer, fpr)
 +#define JIT_IMPL__pG__(stem) JIT_IMPL_2(stem, void, pointer, gpr)
 +#define JIT_IMPL__p___(stem) JIT_IMPL_1(stem, void, pointer)
 +
 +#define unwrap_gpr(r) jit_gpr_regno(r)
 +#define unwrap_fpr(r) jit_fpr_regno(r)
 +#define unwrap_imm(i) i
 +#define unwrap_uimm(u) u
 +#define unwrap_off(o) o
 +#define unwrap_pointer(p) ((uintptr_t) p)
 +#define unwrap_float32(f) f
 +#define unwrap_float64(d) d
 +
 +#define IMPL_INSTRUCTION(kind, stem) JIT_IMPL_##kind(stem)
 +FOR_EACH_INSTRUCTION(IMPL_INSTRUCTION)
 +#undef IMPL_INSTRUCTION
 +
 +void
 +jit_align(jit_state_t *_jit, unsigned align)
 +{
 +  ASSERT (is_power_of_two (align));
 +  uintptr_t here = _jit->pc.w;
 +  uintptr_t there = (here + align - 1) & ~(align - 1);
 +  if (there - here)
 +    nop(_jit, there - here);
 +}
 +
 +static jit_bool_t
 +is_fpr_arg(enum jit_operand_abi arg)
 +{
 +  switch (arg)
 +    {
 +    case JIT_OPERAND_ABI_UINT8:
 +    case JIT_OPERAND_ABI_INT8:
 +    case JIT_OPERAND_ABI_UINT16:
 +    case JIT_OPERAND_ABI_INT16:
 +    case JIT_OPERAND_ABI_UINT32:
 +    case JIT_OPERAND_ABI_INT32:
 +    case JIT_OPERAND_ABI_UINT64:
 +    case JIT_OPERAND_ABI_INT64:
 +    case JIT_OPERAND_ABI_POINTER:
 +      return 0;
 +    case JIT_OPERAND_ABI_FLOAT:
 +    case JIT_OPERAND_ABI_DOUBLE:
 +      return 1;
 +    default:
 +      abort();
 +    }
 +}
 +
 +static jit_bool_t
 +is_gpr_arg(enum jit_operand_abi arg)
 +{
 +  return !is_fpr_arg(arg);
 +}
 +
 +static void
 +abi_imm_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t dst,
 +               intptr_t imm)
 +{
 +  switch (abi) {
 +  case JIT_OPERAND_ABI_UINT8:
 +    ASSERT(0 <= imm);
 +    ASSERT(imm <= UINT8_MAX);
 +    break;
 +  case JIT_OPERAND_ABI_INT8:
 +    ASSERT(INT8_MIN <= imm);
 +    ASSERT(imm <= INT8_MAX);
 +    break;
 +  case JIT_OPERAND_ABI_UINT16:
 +    ASSERT(0 <= imm);
 +    ASSERT(imm <= UINT16_MAX);
 +    break;
 +  case JIT_OPERAND_ABI_INT16:
 +    ASSERT(INT16_MIN <= imm);
 +    ASSERT(imm <= INT16_MAX);
 +    break;
 +  case JIT_OPERAND_ABI_UINT32:
 +    ASSERT(0 <= imm);
 +    ASSERT(imm <= UINT32_MAX);
 +    break;
 +  case JIT_OPERAND_ABI_INT32:
 +    ASSERT(INT32_MIN <= imm);
 +    ASSERT(imm <= INT32_MAX);
 +    break;
 +#if __WORDSIZE > 32
 +  case JIT_OPERAND_ABI_UINT64:
 +  case JIT_OPERAND_ABI_INT64:
 +    break;
 +#endif
 +  case JIT_OPERAND_ABI_POINTER:
 +    break;
 +  default:
 +    abort();
 +  }
 +  jit_movi (_jit, dst, imm);
 +}
 +
 +static void
 +abi_gpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi,
 +               jit_gpr_t base, ptrdiff_t offset, jit_gpr_t src)
 +{
 +  // Invariant: GPR memory destination operand sizes are rounded up to words.
 +  // True for ARM, AArch64, IA32, and X86-64.  Some ABIs expect to be able to
 +  // load operands from the stack via a full-word read, so we need to make 
sure
 +  // we don't leave garbage in the high bytes of (for example) the stack slot
 +  // for a uint8_t arg.
 +  switch (abi) {
 +  case JIT_OPERAND_ABI_UINT8:
 +  case JIT_OPERAND_ABI_INT8:
 +    jit_stxi(_jit, offset, base, src);
 +    break;
 +  case JIT_OPERAND_ABI_UINT16:
 +  case JIT_OPERAND_ABI_INT16:
 +    jit_stxi(_jit, offset, base, src);
 +    break;
 +  case JIT_OPERAND_ABI_UINT32:
 +  case JIT_OPERAND_ABI_INT32:
 +#if __WORDSIZE == 32
 +  case JIT_OPERAND_ABI_POINTER:
 +#endif
 +    jit_stxi(_jit, offset, base, src);
 +    break;
 +#if __WORDSIZE == 64
 +  case JIT_OPERAND_ABI_UINT64:
 +  case JIT_OPERAND_ABI_INT64:
 +  case JIT_OPERAND_ABI_POINTER:
 +    jit_stxi_l(_jit, offset, base, src);
 +    break;
 +#endif
 +  default:
 +    abort();
 +  }
 +}
 +
 +static void
 +abi_fpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi,
 +               jit_gpr_t base, ptrdiff_t offset, jit_fpr_t src)
 +{
 +  switch (abi) {
 +  case JIT_OPERAND_ABI_FLOAT:
 +    jit_stxi_f(_jit, offset, base, src);
 +    break;
 +  case JIT_OPERAND_ABI_DOUBLE:
 +    jit_stxi_d(_jit, offset, base, src);
 +    break;
 +  default:
 +    abort();
 +  }
 +}
 +
 +static void
 +abi_mem_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi,
 +               jit_gpr_t dst, jit_gpr_t base, ptrdiff_t offset)
 +{
 +  switch (abi) {
 +  case JIT_OPERAND_ABI_UINT8:
 +    jit_ldxi_uc(_jit, dst, base, offset);
 +    break;
 +  case JIT_OPERAND_ABI_INT8:
 +    jit_ldxi_c(_jit, dst, base, offset);
 +    break;
 +  case JIT_OPERAND_ABI_UINT16:
 +    jit_ldxi_us(_jit, dst, base, offset);
 +    break;
 +  case JIT_OPERAND_ABI_INT16:
 +    jit_ldxi_s(_jit, dst, base, offset);
 +    break;
 +#if __WORDSIZE == 32
 +  case JIT_OPERAND_ABI_UINT32:
 +  case JIT_OPERAND_ABI_POINTER:
 +#endif
 +  case JIT_OPERAND_ABI_INT32:
 +    jit_ldxi_i(_jit, dst, base, offset);
 +    break;
 +#if __WORDSIZE == 64
 +  case JIT_OPERAND_ABI_UINT32:
 +    jit_ldxi_ui(_jit, dst, base, offset);
 +    break;
 +  case JIT_OPERAND_ABI_UINT64:
 +  case JIT_OPERAND_ABI_POINTER:
 +  case JIT_OPERAND_ABI_INT64:
 +    jit_ldxi_l(_jit, dst, base, offset);
 +    break;
 +#endif
 +  default:
 +    abort();
 +  }
 +}
 +
 +static void
 +abi_mem_to_fpr(jit_state_t *_jit, enum jit_operand_abi abi,
 +               jit_fpr_t dst, jit_gpr_t base, ptrdiff_t offset)
 +{
 +  switch (abi) {
 +  case JIT_OPERAND_ABI_FLOAT:
 +    jit_ldxi_f(_jit, dst, base, offset);
 +    break;
 +  case JIT_OPERAND_ABI_DOUBLE:
 +    jit_ldxi_d(_jit, dst, base, offset);
 +    break;
 +  default:
 +    abort();
 +  }
 +}
 +
 +static void
 +abi_imm_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base,
 +               ptrdiff_t offset, jit_imm_t imm)
 +{
 +  ASSERT(!is_fpr_arg(abi));
 +
 +  jit_gpr_t tmp = get_temp_gpr(_jit);
 +  abi_imm_to_gpr(_jit, abi, tmp, imm);
 +  abi_gpr_to_mem(_jit, abi, base, offset, tmp);
 +  unget_temp_gpr(_jit);
 +}
 +
 +static void
 +abi_mem_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base,
 +               ptrdiff_t offset, jit_gpr_t src_base, ptrdiff_t src_offset)
 +{
 +  if (is_gpr_arg (abi)) {
 +    jit_gpr_t tmp = get_temp_gpr(_jit);
 +    abi_mem_to_gpr(_jit, abi, tmp, src_base, src_offset);
 +    abi_gpr_to_mem(_jit, abi, base, offset, tmp);
 +    unget_temp_gpr(_jit);
 +  } else {
 +    jit_fpr_t tmp = get_temp_fpr(_jit);
 +    abi_mem_to_fpr(_jit, abi, tmp, src_base, src_offset);
 +    abi_fpr_to_mem(_jit, abi, base, offset, tmp);
 +    unget_temp_fpr(_jit);
 +  }
 +}
 +
 +#define MOVE_KIND(a, b) ((((int) a) << 4) | ((int) b))
 +
 +#define MOVE_KIND_ENUM(a, b) \
 +  MOVE_##a##_TO_##b = MOVE_KIND(JIT_OPERAND_KIND_##a, JIT_OPERAND_KIND_##b)
 +enum move_kind {
 +  MOVE_KIND_ENUM(IMM, GPR),
 +  MOVE_KIND_ENUM(GPR, GPR),
 +  MOVE_KIND_ENUM(MEM, GPR),
 +  MOVE_KIND_ENUM(FPR, FPR),
 +  MOVE_KIND_ENUM(MEM, FPR),
 +  MOVE_KIND_ENUM(IMM, MEM),
 +  MOVE_KIND_ENUM(GPR, MEM),
 +  MOVE_KIND_ENUM(FPR, MEM),
 +  MOVE_KIND_ENUM(MEM, MEM)
 +};
 +#undef MOVE_KIND_ENUM
 +
 +static void
 +move_operand(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src)
 +{
 +  switch (MOVE_KIND (src.kind, dst.kind)) {
 +  case MOVE_IMM_TO_GPR:
 +    return abi_imm_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.imm);
 +
 +  case MOVE_GPR_TO_GPR:
 +    return jit_movr(_jit, dst.loc.gpr.gpr, src.loc.gpr.gpr);
 +
 +  case MOVE_MEM_TO_GPR:
 +    return abi_mem_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.mem.base,
 +                          src.loc.mem.offset);
 +
 +  case MOVE_FPR_TO_FPR:
 +    ASSERT(src.abi == dst.abi);
 +    if (src.abi == JIT_OPERAND_ABI_DOUBLE)
 +      return jit_movr_d(_jit, dst.loc.fpr, src.loc.fpr);
 +    else
 +      return jit_movr_f(_jit, dst.loc.fpr, src.loc.fpr);
 +
 +  case MOVE_MEM_TO_FPR:
 +    return abi_mem_to_fpr(_jit, src.abi, dst.loc.fpr, src.loc.mem.base,
 +                          src.loc.mem.offset);
 +
 +  case MOVE_IMM_TO_MEM:
 +    return abi_imm_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
 +                          src.loc.imm);
 +
 +  case MOVE_GPR_TO_MEM:
 +    return abi_gpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
 +                          src.loc.gpr.gpr);
 +
 +  case MOVE_FPR_TO_MEM:
 +    return abi_fpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
 +                          src.loc.fpr);
 +
 +  case MOVE_MEM_TO_MEM:
 +    return abi_mem_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
 +                          src.loc.mem.base, src.loc.mem.offset);
 +
 +  default:
 +    abort();
 +  }
 +}
 +
 +// A direct transliteration of "Tilting at windmills with Coq: formal
 +// verification of a compilation algorithm for parallel moves" by
 +// Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy:
 +// https://xavierleroy.org/publi/parallel-move.pdf
 +
 +enum move_status { TO_MOVE, BEING_MOVED, MOVED };
 +
 +static inline int
 +already_in_place(jit_operand_t src, jit_operand_t dst)
 +{
 +  switch (MOVE_KIND(src.kind, dst.kind)) {
 +  case MOVE_GPR_TO_GPR:
 +    return jit_same_gprs (src.loc.gpr.gpr, dst.loc.gpr.gpr);
 +  case MOVE_FPR_TO_FPR:
 +    return jit_same_fprs (src.loc.fpr, dst.loc.fpr);
 +  case MOVE_MEM_TO_MEM:
 +    return jit_same_gprs (src.loc.mem.base, dst.loc.mem.base) &&
 +      src.loc.mem.offset == dst.loc.mem.offset;
 +  default:
 +    return 0;
 +  }
 +}
 +
 +static inline int
 +write_would_clobber(jit_operand_t src, jit_operand_t dst)
 +{
 +  if (already_in_place (src, dst))
 +    return 1;
 +
 +  if (MOVE_KIND(src.kind, dst.kind) == MOVE_MEM_TO_GPR)
 +    return jit_same_gprs(src.loc.mem.base, dst.loc.gpr.gpr);
 +
 +  return 0;
 +}
 +
 +static inline ptrdiff_t
 +operand_addend(jit_operand_t op)
 +{
 +  switch (op.kind) {
 +  case JIT_OPERAND_KIND_GPR:
 +    return op.loc.gpr.addend;
 +  case JIT_OPERAND_KIND_MEM:
 +    return op.loc.mem.addend;
 +  default:
 +    abort();
 +  }
 +}
 +
 +static void
 +move_one(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src,
 +         size_t argc, enum move_status *status, size_t i)
 +{
 +  int tmp_gpr = 0, tmp_fpr = 0;
 +
 +  if (already_in_place(src[i], dst[i]))
 +    return;
 +
 +  status[i] = BEING_MOVED;
 +  for (size_t j = 0; j < argc; j++) {
 +    if (write_would_clobber(src[j], dst[i])) {
 +      switch (status[j]) {
 +      case TO_MOVE:
 +        move_one(_jit, dst, src, argc, status, j);
 +        break;
 +      case BEING_MOVED: {
 +        jit_operand_t tmp;
 +        if (is_fpr_arg (src[j].kind)) {
 +          tmp_fpr = 1;
 +          tmp = jit_operand_fpr(src[j].abi, get_temp_fpr(_jit));
 +        } else {
 +          tmp_gpr = 1;
 +          /* Preserve addend, if any, from source operand, to be applied
 +             at the end.  */
 +          tmp = jit_operand_gpr_with_addend(src[j].abi, get_temp_gpr(_jit),
 +                                            operand_addend(src[j]));
 +        }
 +        move_operand (_jit, tmp, src[j]);
 +        src[j] = tmp;
 +        break;
 +      }
 +      case MOVED:
 +        break;
 +      default:
 +        abort ();
 +      }
 +    }
 +  }
 +
 +  move_operand (_jit, dst[i], src[i]);
 +  status[i] = MOVED;
 +  if (tmp_gpr)
 +    unget_temp_gpr(_jit);
 +  else if (tmp_fpr)
 +    unget_temp_fpr(_jit);
 +}
 +
 +static void
 +apply_addend(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src)
 +{
 +  switch (MOVE_KIND(src.kind, dst.kind)) {
 +  case MOVE_GPR_TO_GPR:
 +  case MOVE_MEM_TO_GPR:
 +    if (operand_addend(src))
 +      jit_addi(_jit, dst.loc.gpr.gpr, dst.loc.gpr.gpr, operand_addend(src));
 +    break;
 +  case MOVE_GPR_TO_MEM:
 +  case MOVE_MEM_TO_MEM:
 +    if (operand_addend(src)) {
 +      jit_gpr_t tmp = get_temp_gpr(_jit);
 +      abi_mem_to_gpr(_jit, dst.abi, tmp, dst.loc.mem.base, 
dst.loc.mem.offset);
 +      jit_addi(_jit, tmp, tmp, operand_addend(src));
 +      abi_gpr_to_mem(_jit, dst.abi, dst.loc.mem.base, dst.loc.mem.offset, 
tmp);
 +      unget_temp_gpr(_jit);
 +    }
 +    break;
 +  default:
 +    break;
 +  }
 +}
 +
 +/* Preconditions: No dest operand is IMM.  No dest operand aliases
 +   another dest operand.  No dest MEM operand uses a base register which
 +   is used as a dest GPR.  No dst operand has an addend.  The registers
 +   returned by get_temp_gpr and get_temp_fpr do not appear in source or
 +   dest args.  */
 +void
 +jit_move_operands(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src,
 +                  size_t argc)
 +{
 +  // Check preconditions, except the condition about tmp registers.
 +  {
 +    uint64_t src_gprs = 0;
 +    uint64_t dst_gprs = 0;
 +    uint64_t dst_fprs = 0;
 +    uint64_t dst_mem_base_gprs = 0;
 +    for (size_t i = 0; i < argc; i++) {
 +      switch (src[i].kind) {
 +      case JIT_OPERAND_KIND_GPR:
 +        src_gprs |= 1ULL << jit_gpr_regno(src[i].loc.gpr.gpr);
 +        break;
 +      case JIT_OPERAND_KIND_FPR:
 +      case JIT_OPERAND_KIND_IMM:
 +      case JIT_OPERAND_KIND_MEM:
 +        break;
 +      default:
 +        abort();
 +      }
 +      switch (dst[i].kind) {
 +      case JIT_OPERAND_KIND_GPR: {
 +        ASSERT(dst[i].loc.gpr.addend == 0);
 +        uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.gpr.gpr);
 +        ASSERT((dst_gprs & bit) == 0);
 +        dst_gprs |= bit;
 +        break;
 +      }
 +      case JIT_OPERAND_KIND_FPR: {
 +        uint64_t bit = 1ULL << jit_fpr_regno(dst[i].loc.fpr);
 +        ASSERT((dst_fprs & bit) == 0);
 +        dst_fprs |= bit;
 +        break;
 +      }
 +      case JIT_OPERAND_KIND_MEM: {
 +        ASSERT(dst[i].loc.mem.addend == 0);
 +        uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.mem.base);
 +        dst_mem_base_gprs |= bit;
 +        break;
 +      }
 +      case JIT_OPERAND_KIND_IMM:
 +      default:
 +        abort();
 +        break;
 +      }
 +    }
 +    ASSERT(((src_gprs | dst_gprs) & dst_mem_base_gprs) == 0);
 +  }
 +
 +  enum move_status status[argc];
 +  for (size_t i = 0; i < argc; i++)
 +    status[i] = TO_MOVE;
 +  for (size_t i = 0; i < argc; i++)
 +    if (status[i] == TO_MOVE)
 +      move_one(_jit, dst, src, argc, status, i);
 +
 +  // Apply addends at the end.  We could do it earlier in some cases but
 +  // at least at the end we know that an in-place increment of one
 +  // operand won't alias another.
 +  for (size_t i = 0; i < argc; i++)
 +    apply_addend(_jit, dst[i], src[i]);
 +}
 +
 +size_t
 +jit_align_stack(jit_state_t *_jit, size_t expand)
 +{
 +  size_t new_size = _jit->frame_size + expand;
 +  // Align stack to double-word boundaries.  This isn't really a
 +  // principle but it does work for Aarch32, AArch64 and x86-64.
 +  size_t alignment = jit_stack_alignment ();
 +  size_t aligned_size = (new_size + alignment - 1) & ~(alignment - 1);
 +  size_t diff = aligned_size - _jit->frame_size;
 +  if (diff)
 +    jit_subi (_jit, JIT_SP, JIT_SP, diff);
 +  _jit->frame_size = aligned_size;
 +  return diff;
 +}
 +
 +void
 +jit_shrink_stack(jit_state_t *_jit, size_t diff)
 +{
 +  if (diff)
 +    jit_addi (_jit, JIT_SP, JIT_SP, diff);
 +  _jit->frame_size -= diff;
 +}
 +
 +static const jit_gpr_t platform_callee_save_gprs[] = {
 +  JIT_PLATFORM_CALLEE_SAVE_GPRS
 +};
 +
 +static const jit_gpr_t user_callee_save_gprs[] = {
 +  JIT_V0, JIT_V1, JIT_V2
 +#ifdef JIT_V3
 +  , JIT_V3
 +#endif
 +#ifdef JIT_V4
 +  , JIT_V4
 +#endif
 +#ifdef JIT_V5
 +  , JIT_V5
 +#endif
 +#ifdef JIT_V6
 +  , JIT_V6
 +#endif
 +#ifdef JIT_V7
 +  , JIT_V7
 +#endif
 +#ifdef JIT_V8
 +  , JIT_V8
 +#endif
 +#ifdef JIT_V9
 +  , JIT_V9
++#endif
++#ifdef JIT_V10
++  , JIT_V10
 +#endif
 + };
 +
 +static const jit_fpr_t user_callee_save_fprs[] = {
 +#ifdef JIT_VF0
 +  JIT_VF0
 +#endif
 +#ifdef JIT_VF1
 +  , JIT_VF1
 +#endif
 +#ifdef JIT_VF2
 +  , JIT_VF2
 +#endif
 +#ifdef JIT_VF3
 +  , JIT_VF3
 +#endif
 +#ifdef JIT_VF4
 +  , JIT_VF4
 +#endif
 +#ifdef JIT_VF5
 +  , JIT_VF5
 +#endif
 +#ifdef JIT_VF6
 +  , JIT_VF6
 +#endif
 +#ifdef JIT_VF7
 +  , JIT_VF7
 +#endif
++#ifdef JIT_VF8
++  , JIT_VF8
++#endif
++#ifdef JIT_VF9
++  , JIT_VF9
++#endif
++#ifdef JIT_VF10
++  , JIT_VF10
++#endif
++#ifdef JIT_VF11
++  , JIT_VF11
++#endif
 +};
 +
 +#define ARRAY_SIZE(X) (sizeof (X)/sizeof ((X)[0]))
 +static const size_t pv_count = ARRAY_SIZE(platform_callee_save_gprs);
 +static const size_t v_count = ARRAY_SIZE(user_callee_save_gprs);
 +static const size_t vf_count = ARRAY_SIZE(user_callee_save_fprs);
 +
 +size_t
 +jit_enter_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
 +{
 +  ASSERT(v <= v_count);
 +  ASSERT(vf <= vf_count);
 +
 +  ASSERT(_jit->frame_size == 0);
 +  _jit->frame_size = jit_initial_frame_size();
 +
 +  size_t reserved =
 +    jit_align_stack(_jit, (pv_count + v) * (__WORDSIZE / 8) + vf * 8);
 +
 +  size_t offset = 0;
 +  for (size_t i = 0; i < vf; i++, offset += 8)
 +    jit_stxi_d(_jit, offset, JIT_SP, user_callee_save_fprs[i]);
 +  for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
 +    jit_stxi(_jit, offset, JIT_SP, user_callee_save_gprs[i]);
 +  for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
 +    jit_stxi(_jit, offset, JIT_SP, platform_callee_save_gprs[i]);
 +  ASSERT(offset <= reserved);
 +
 +  return reserved;
 +}
 +
 +void
 +jit_leave_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
 +{
 +  ASSERT(v <= v_count);
 +  ASSERT(vf <= vf_count);
 +  ASSERT((pv_count + v) * (__WORDSIZE / 8) + vf * 8 <= frame_size);
 +
 +  size_t offset = 0;
 +  for (size_t i = 0; i < vf; i++, offset += 8)
 +    jit_ldxi_d(_jit, user_callee_save_fprs[i], JIT_SP, offset);
 +  for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
 +    jit_ldxi(_jit, user_callee_save_gprs[i], JIT_SP, offset);
 +  for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
 +    jit_ldxi(_jit, platform_callee_save_gprs[i], JIT_SP, offset);
 +  ASSERT(offset <= frame_size);
 +
 +  jit_shrink_stack(_jit, frame_size);
 +}
 +
 +// Precondition: stack is already aligned.
 +static size_t
 +prepare_call_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
 +{
 +  jit_operand_t dst[argc];
 +  struct abi_arg_iterator iter;
 +  
 +  // Compute shuffle destinations and space for spilled arguments.
 +  reset_abi_arg_iterator(&iter, argc, args);
 +  for (size_t i = 0; i < argc; i++)
 +    next_abi_arg(&iter, &dst[i]);
 +
 +  // Reserve space for spilled arguments and ensure stack alignment.
 +  size_t stack_size = jit_align_stack(_jit, iter.stack_size);
 +
 +  // Fix up SP-relative operands.
 +  for (size_t i = 0; i < argc; i++) {
 +    switch(args[i].kind) {
 +    case JIT_OPERAND_KIND_GPR:
 +      if (jit_same_gprs (args[i].loc.gpr.gpr, JIT_SP))
 +        args[i].loc.gpr.addend += stack_size;
 +      break;
 +    case JIT_OPERAND_KIND_MEM:
 +      if (jit_same_gprs (args[i].loc.mem.base, JIT_SP))
 +        args[i].loc.mem.offset += stack_size;
 +      break;
 +    default:
 +      break;
 +    }
 +  }
 +
 +  jit_move_operands(_jit, dst, args, argc);
 +
 +  return stack_size;
 +}
 +
 +void
 +jit_calli(jit_state_t *_jit, jit_pointer_t f, size_t argc, jit_operand_t 
args[])
 +{
 +  size_t stack_bytes = prepare_call_args(_jit, argc, args);
 +
 +  calli(_jit, (jit_word_t)f);
 +
 +  jit_shrink_stack(_jit, stack_bytes);
 +}
 +
 +void
 +jit_callr(jit_state_t *_jit, jit_gpr_t f, size_t argc, jit_operand_t args[])
 +{
 +  size_t stack_bytes = prepare_call_args(_jit, argc, args);
 +
 +  callr(_jit, jit_gpr_regno(f));
 +
 +  jit_shrink_stack(_jit, stack_bytes);
 +}
 +
 +void
 +jit_locate_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
 +{
 +  struct abi_arg_iterator iter;
 +    
 +  reset_abi_arg_iterator(&iter, argc, args);
 +  iter.stack_size += _jit->frame_size;
 +  for (size_t i = 0; i < argc; i++)
 +    next_abi_arg(&iter, &args[i]);
 +}
 +
 +/* Precondition: args are distinct locations of type GPR or FPR.  All
 +   addends of arg operands are zero.  No GPR arg is SP.  */
 +void
 +jit_load_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
 +{
 +  jit_operand_t src[argc];
 +
 +  memcpy(src, args, sizeof(src[0]) * argc);
 +
 +  jit_locate_args(_jit, argc, src);
 +  jit_move_operands(_jit, args, src, argc);
 +}
 +
 +#ifdef JIT_NEEDS_LITERAL_POOL
 +static uint32_t
 +literal_pool_byte_size(struct jit_literal_pool *pool)
 +{
 +  // Assume that we might need a uint32_t to branch over a table, and up
 +  // to 7 bytes for alignment of the table.  Then we assume that no
 +  // entry will be more than two words.
 +  return sizeof(uint32_t) + 7 + pool->size * sizeof(uintptr_t) * 2;
 +}
 +
 +static void
 +reset_literal_pool(jit_state_t *_jit, struct jit_literal_pool *pool)
 +{
 +  pool->deadline = _jit->limit - _jit->start;
 +  memset(pool->entries, 0, sizeof(pool->entries[0]) * pool->size);
 +  pool->size = 0;
 +}
 +
 +#define INITIAL_LITERAL_POOL_CAPACITY 12
 +static struct jit_literal_pool*
 +alloc_literal_pool(jit_state_t *_jit, size_t capacity)
 +{
 +  if (capacity == 0) capacity = INITIAL_LITERAL_POOL_CAPACITY;
 +
 +  struct jit_literal_pool *ret =
 +    _jit->alloc (sizeof (struct jit_literal_pool) +
 +                 sizeof (struct jit_literal_pool_entry) * capacity);
 +  ASSERT (ret);
 +  ret->capacity = capacity;
 +  reset_literal_pool(_jit, ret);
 +  return ret;
 +}
 +
 +static void
 +grow_literal_pool(jit_state_t *_jit)
 +{
 +  struct jit_literal_pool *new_pool =
 +    alloc_literal_pool(_jit, _jit->pool->capacity * 2);
 +
 +  for (size_t i = 0; i < _jit->pool->size; i++)
 +    new_pool->entries[new_pool->size++] = _jit->pool->entries[i];
 +  new_pool->deadline = _jit->pool->deadline;
 +
 +  _jit->free (_jit->pool);
 +  _jit->pool = new_pool;
 +}
 +
 +static jit_bool_t
 +add_literal_pool_entry(jit_state_t *_jit, struct jit_literal_pool_entry entry,
 +                       uint32_t max_offset)
 +{
 +  if (_jit->overflow)
 +    return 1;
 +
 +  if (max_offset <= literal_pool_byte_size(_jit->pool)) {
 +    emit_literal_pool(_jit, GUARD_NEEDED);
 +    return 0;
 +  }
 +
 +  if (_jit->pool->size == _jit->pool->capacity)
 +    grow_literal_pool (_jit);
 +
 +  uint32_t loc_offset = _jit->pc.uc - _jit->start;
 +  uint32_t inst_offset = loc_offset - entry.reloc.inst_start_offset;
 +  uint32_t pc_base_offset = inst_offset + entry.reloc.pc_base_offset;
 +  uint32_t deadline =
 +    pc_base_offset + (max_offset - literal_pool_byte_size(_jit->pool));
 +  if (deadline < _jit->pool->deadline)
 +    _jit->pool->deadline = deadline;
 +
 +  _jit->pool->entries[_jit->pool->size++] = entry;
 +
 +  return 1;
 +}
 +
 +static jit_bool_t
 +add_pending_literal(jit_state_t *_jit, jit_reloc_t src,
 +                    uint8_t max_offset_bits)
 +{
 +  struct jit_literal_pool_entry entry = { src, 0 };
 +  uint32_t max_inst_size = sizeof(uint32_t);
 +  uint32_t max_offset = (1 << (max_offset_bits + src.rsh)) - max_inst_size;
 +  return add_literal_pool_entry(_jit, entry, max_offset);
 +}
 +
 +static void
 +remove_pending_literal(jit_state_t *_jit, jit_reloc_t src)
 +{
 +  for (size_t i = _jit->pool->size; i--; ) {
 +    if (_jit->pool->entries[i].reloc.offset == src.offset) {
 +      for (size_t j = i + 1; j < _jit->pool->size; j++)
 +        _jit->pool->entries[j-1] = _jit->pool->entries[j];
 +      _jit->pool->size--;
 +      return;
 +    }
 +  }
 +  abort();
 +}
 +
 +static void
 +patch_pending_literal(jit_state_t *_jit, jit_reloc_t src, uintptr_t value)
 +{
 +  for (size_t i = _jit->pool->size; i--; ) {
 +    if (_jit->pool->entries[i].reloc.offset == src.offset) {
 +      ASSERT(_jit->pool->entries[i].value == 0);
 +      _jit->pool->entries[i].value = value;
 +      return;
 +    }
 +  }
 +  abort();
 +}
 +
 +static void
 +emit_literal_pool(jit_state_t *_jit, enum guard_pool guard)
 +{
 +  if (_jit->overflow)
 +    return;
 +
 +  if (!_jit->pool->size)
 +    return;
 +
 +  uint32_t *patch_loc = NULL;
 +  if (guard == GUARD_NEEDED)
 +    patch_loc = jmp_without_veneer(_jit);
 +
 +  // FIXME: Could de-duplicate constants.
 +  for (size_t i = 0; i < _jit->pool->size; i++) {
 +    // Align to word boundary without emitting pool.
 +    if (_jit->pc.w & 1) emit_u8(_jit, 0);
 +    if (_jit->pc.w & 2) emit_u16(_jit, 0);
 +    if (sizeof(uintptr_t) > 4 && (_jit->pc.w & 4))
 +      emit_u32(_jit, 0);
 +    ASSERT((_jit->pc.w & (sizeof(uintptr_t) - 1)) == 0);
 +    struct jit_literal_pool_entry *entry = &_jit->pool->entries[i];
 +    uint8_t *loc = _jit->start + entry->reloc.offset;
 +    uint8_t *pc_base =
 +      loc - entry->reloc.inst_start_offset + entry->reloc.pc_base_offset;
 +    ptrdiff_t diff = _jit->pc.uc - pc_base;
 +    diff >>= entry->reloc.rsh;
 +
 +    if (_jit->overflow)
 +      return;
 +
 +    switch (entry->reloc.kind & JIT_RELOC_MASK) {
 +    case JIT_RELOC_JMP_WITH_VENEER:
 +      patch_veneer_jmp_offset((uint32_t*) loc, diff);
 +      emit_veneer(_jit, (void*) entry->value);
 +      break;
 +    case JIT_RELOC_JCC_WITH_VENEER:
 +      patch_veneer_jcc_offset((uint32_t*) loc, diff);
 +      emit_veneer(_jit, (void*) entry->value);
 +      break;
 +    case JIT_RELOC_LOAD_FROM_POOL:
 +      patch_load_from_pool_offset((uint32_t*) loc, diff);
 +      emit_uintptr(_jit, entry->value);
 +      break;
 +    default:
 +      abort();
 +    }
 +  }
 +
 +  if (_jit->overflow)
 +    return;
 +
 +  if (guard == GUARD_NEEDED)
 +    patch_jmp_without_veneer(_jit, patch_loc);
 +
 +  reset_literal_pool(_jit, _jit->pool);
 +}
 +#endif
diff --cc libguile/lightening/lightening/riscv-cpu.c
index 000000000,37c252a78..37c252a78
mode 000000,100644..100644
--- a/libguile/lightening/lightening/riscv-cpu.c
+++ b/libguile/lightening/lightening/riscv-cpu.c
diff --cc libguile/lightening/lightening/riscv-fpu.c
index 000000000,315ed8d14..315ed8d14
mode 000000,100644..100644
--- a/libguile/lightening/lightening/riscv-fpu.c
+++ b/libguile/lightening/lightening/riscv-fpu.c
diff --cc libguile/lightening/lightening/riscv.c
index 000000000,808192fae..808192fae
mode 000000,100644..100644
--- a/libguile/lightening/lightening/riscv.c
+++ b/libguile/lightening/lightening/riscv.c
diff --cc libguile/lightening/lightening/riscv.h
index 000000000,653d74bf9..653d74bf9
mode 000000,100644..100644
--- a/libguile/lightening/lightening/riscv.h
+++ b/libguile/lightening/lightening/riscv.h



reply via email to

[Prev in Thread] Current Thread [Next in Thread]