qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] Porting TCG to alpha platform


From: identifier scorpio
Subject: [Qemu-devel] [PATCH] Porting TCG to alpha platform
Date: Tue, 19 Jan 2010 16:47:12 +0800 (CST)

Hello.

I ported TCG to alpha platform, the patch is currently based on stable-0.10 
branch, and now it can run linux-0.2.img testing image on my alpha XP1000 
workstation. but it still can't run MS-windows, and I hope someone, especially 
those guys that are working on target-alpha, may help me to find the bugs.

From 0ee33ea1e43298e6045e16dfcf07cb7a530dfd56 Mon Sep 17 00:00:00 2001
From: Dong Weiyu <address@hidden>
Date: Tue, 19 Jan 2010 16:22:54 +0800
Subject: [PATCH] porting TCG to alpha platform.

---
 cpu-all.h              |    2 +-
 tcg/alpha/tcg-target.c | 1335 ++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/alpha/tcg-target.h |   70 +++
 3 files changed, 1406 insertions(+), 1 deletions(-)
 create mode 100644 tcg/alpha/tcg-target.c
 create mode 100644 tcg/alpha/tcg-target.h

diff --git a/cpu-all.h b/cpu-all.h
index e0c3efd..bdf6fb2 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -22,7 +22,7 @@
 
 #include "qemu-common.h"
 
-#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || 
defined(__hppa__)
+#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || 
defined(__hppa__) || defined(__alpha__)
 #define WORDS_ALIGNED
 #endif
 
diff --git a/tcg/alpha/tcg-target.c b/tcg/alpha/tcg-target.c
new file mode 100644
index 0000000..6bbd69f
--- /dev/null
+++ b/tcg/alpha/tcg-target.c
@@ -0,0 +1,1335 @@
+/*

+ * Tiny Code Generator for QEMU on ALPHA platform
+*/

+

+#ifndef NDEBUG

+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {

+    "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",

+    "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15",

+    "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",

+    "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31",

+};

+#endif

+

+/* 

+ * $26 ~ $31 are special, reserved, 

+ * and $25 is deliberately reserved for jcc operation

+ * and $0 is usually used for return function result, better allocate it later

+ * and $15 is used for cpu_env pointer, allocate it at last

+*/

+static const int tcg_target_reg_alloc_order[] = {

+    TCG_REG_1, TCG_REG_2, TCG_REG_3, TCG_REG_4, TCG_REG_5, TCG_REG_6,

+    TCG_REG_7, TCG_REG_8, TCG_REG_22, 
+    TCG_REG_9, TCG_REG_10, TCG_REG_11, TCG_REG_12, TCG_REG_13, TCG_REG_14,

+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19, TCG_REG_20, TCG_REG_21
+};

+

+/*

+ * according to alpha calling convention, these 6 registers are used for 

+ * function parameter passing. if function has more than 6 parameters, remained

+ * ones are stored on stack.

+*/

+static const int tcg_target_call_iarg_regs[6] = { 

+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19, TCG_REG_20, TCG_REG_21

+};

+

+/*

+ * according to alpha calling convention, $0 is used for returning function 
result.

+*/

+static const int tcg_target_call_oarg_regs[1] = { TCG_REG_0 };

+

+/*

+ * save the address of TB's epilogue.

+*/

+static uint8_t *tb_ret_addr;

+

+/* 

+ * op-code and func-code for jump insn 

+*/

+#define OP_CALL        0x01A

+#define OP_RET         0x01A

+#define OP_JMP         0x01A

+

+#define FUNC_JMP       0x00

+#define FUNC_CALL      0x01

+#define FUNC_RET       0x02

+

+#define OP_BR          0x30

+#define OP_BEQ        0x39

+#define OP_BNE        0x3D

+#define OP_BLBC        0x38

+#define OP_BLBS        0x3C

+

+#define OP_ADDSUBCMP   0x10

+

+#define FUNC_ADDL      0x00

+#define FUNC_SUBL      0x09

+#define FUNC_ADDQ      0x20

+#define FUNC_SUBQ      0x29

+#define FUNC_CMPEQ     0x2D

+#define FUNC_CMPLT     0x4D

+#define FUNC_CMPLE     0x6D

+#define FUNC_CMPULT    0x1D

+#define FUNC_CMPULE    0x3D

+

+#define OP_MUL         0x13

+

+#define FUNC_MULL      0x00

+#define FUNC_MULQ      0x20

+

+#define OP_LOGIC       0x11

+

+#define FUNC_AND       0x00

+#define FUNC_BIS       0x20

+#define FUNC_XOR       0x40

+

+#define OP_SHIFT       0x12

+

+#define FUNC_SLL       0x39

+#define FUNC_SRL       0x34

+#define FUNC_SRA       0x3C

+

+#define OP_SEXT        0x1C

+

+#define FUNC_SEXTB     0x00

+#define FUNC_SEXTW     0x01

+

+#define OP_LDA         0x08

+#define OP_LDAH        0x09

+#define OP_LDBU        0x0A

+#define OP_LDWU        0x0C

+#define OP_LDL         0x28

+#define OP_LDQ         0x29

+#define OP_STB         0x0E

+#define OP_STW         0x0D

+#define OP_STL         0x2C

+#define OP_STQ         0x2D
+

+/*

+ * return the # of regs used for parameter passing on procedure calling.

+ * note that alpha use $16~$21 to transfer the first 6 paramenters of a 
procedure.

+*/

+static inline int tcg_target_get_call_iarg_regs_count(int flags)

+{

+    return 6;

+}

+

+/*

+ * given constraint, return available register set. this function is called 
once

+ * for each op at qemu's initialization stage.

+*/

+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)

+{

+    const char *ct_str = *pct_str;

+

+    switch(ct_str[0]) 

+    {

+    case 'r':

+        /* constaint 'r' means any register is okay */

+        ct->ct |= TCG_CT_REG;

+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);

+        break;

+

+    case 'L':

+        /* 

+        * constranit 'L' is used for qemu_ld/st, which has 2 meanings:

+        * 1st, we the argument need to be allocated a register.

+        * 2nd, we should reserve some registers that belong to 
caller-clobbered 

+        * list for qemu_ld/st local usage, so these registers must not be 

+        * allocated to the argument that the 'L' constraint is describing.

+        *

+        * note that op qemu_ld/st has the TCG_OPF_CALL_CLOBBER flag, and 

+        * tcg will free all callee-clobbered registers before generate target

+        * insn for qemu_ld/st, so we can use these register directly without

+        * warrying about destroying their content.

+        */

+        ct->ct |= TCG_CT_REG;

+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_16);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_17);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_18);
+        break;
+

+    default:

+        return -1;

+    }

+

+    ct_str++;

+    *pct_str = ct_str;

+    return 0;

+}

+

+/*

+ * whether op's input argument may use constant 

+*/

+static inline int tcg_target_const_match( \

+       tcg_target_long val, const TCGArgConstraint *arg_ct)

+{

+    int ct = arg_ct->ct;
+    return (ct & TCG_CT_CONST) ? 1 : 0;
+}

+

+static inline void tcg_out_inst2(TCGContext *s, int Opcode, int Ra, int Disp)

+{

+    uint32_t v = 0;

+    v = ( ( Opcode & 0x3f ) << 26  )

+        | ( ( Ra & 0x1f ) << 21 )

+        | ( Disp & 0x1fffff) ;

+    tcg_out32(s, v);   

+}

+

+static inline void tcg_out_inst3_disp(TCGContext *s, int Opcode, int Ra, int 
Rb, int Disp)

+{

+    uint32_t v = 0;

+    v = ( ( Opcode & 0x3f ) << 26  )

+        | ( ( Ra & 0x1f ) << 21 )

+        | ( ( Rb & 0x1f ) << 16 )

+        | ( Disp & 0xffff) ;

+    tcg_out32(s, v);   

+}

+

+static inline void tcg_out_inst3_func(TCGContext *s, int Opcode, int Ra, int 
Rb, int Func, int Disp)

+{

+    uint32_t v = 0;
+    v = ( ( Opcode & 0x3f ) << 26  )

+       | ( ( Ra & 0x1f ) << 21 )

+       | ( ( Rb & 0x1f ) << 16 )

+       | ( ( Func & 0x3 ) << 14 )

+       | ( Disp & 0x3fff) ;

+    tcg_out32(s, v);   

+}

+

+static inline void tcg_out_inst4(TCGContext *s, int Opcode, int Ra, int Rb, 
int Func, int Rc)

+{

+    uint32_t v = 0;
+    v = ( (Opcode & 0x3f) << 26  )

+        | ( ( Ra & 0x1f ) << 21 )

+        | ( ( Rb & 0x1f ) << 16 )

+        | ( ( Func & 0x7f ) << 5 )

+        | ( Rc & 0x1f ) ;

+    tcg_out32(s, v);   

+}

+

+static inline void tcg_out_inst4i(TCGContext *s, int Opcode, int Ra, int Lit, 
int Func, int Rc)

+{

+    uint32_t v = 0;
+    v = ( (Opcode & 0x3f) << 26  )

+        | ( ( Ra & 0x1f ) << 21 )

+        | ( ( Lit & 0xff ) << 13 )

+        | ( ( Func & 0x7f ) << 5 )

+        | ( 1 << 12 )

+        | ( Rc & 0x1f ) ;

+    tcg_out32(s, v);   

+}

+

+/*

+ * mov from a reg to another

+*/

+static inline void tcg_out_mov(TCGContext *s, int Rc, int Rb)

+{  

+    if ( Rb != Rc ) {

+        tcg_out_inst4(s, OP_LOGIC, TCG_REG_31, Rb, FUNC_BIS, Rc);
+    }

+}

+

+/*
+ * mov a 64-bit immediate 'arg' to regsiter 'Ra', this function will
+ * generate fixed length (8 insns, 32 bytes) of target insn sequence.
+*/
+static void tcg_out_movi_fixl( \
+    TCGContext *s, TCGType type, int Ra, tcg_target_long arg)
+{
+    tcg_target_long l0, l1, l2, l3;
+    tcg_target_long l1_tmp, l2_tmp, l3_tmp;
+
+    l0 = arg & 0xffffu;
+    l1_tmp = l1 = ( arg >> 16) & 0xffffu;
+    l2_tmp = l2 = ( arg >> 32) & 0xffffu;
+    l3_tmp = l3 = ( arg >> 48) & 0xffffu;
+
+    if ( l0 & 0x8000u)
+        l1_tmp = (l1 + 1) & 0xffffu;
+    if ( (l1_tmp & 0x8000u) || ((l1_tmp == 0) && (l1_tmp != l1)))
+        l2_tmp = (l2 + 1) & 0xffffu;
+    if ( (l2_tmp & 0x8000u) || ((l2_tmp == 0) && (l2_tmp != l2)))
+        l3_tmp = (l3 + 1) & 0xffffu;
+
+    tcg_out_inst3_disp( s, OP_LDAH, Ra, TCG_REG_31, l3_tmp);
+    tcg_out_inst3_disp( s, OP_LDA, Ra, Ra, l2_tmp);
+    tcg_out_inst4i( s, OP_SHIFT, Ra, 32, FUNC_SLL, Ra);
+    tcg_out_inst3_disp( s, OP_LDAH, Ra, Ra, l1_tmp);
+    tcg_out_inst3_disp( s, OP_LDA, Ra, Ra, l0);
+}
+
+/*

+ * mov 64-bit immediate 'arg' to regsiter 'Ra'. this function will

+ * generate variable length of target insn sequence.

+*/

+static inline void tcg_out_movi( \

+    TCGContext *s, TCGType type, int Ra, tcg_target_long arg)

+{
+    if (type == TCG_TYPE_I32) {
+        if ( arg != (int32_t)arg)
+            tcg_abort();
+    }
+
+    if (arg == 0) {

+        tcg_out_inst4(s, OP_LOGIC, Ra, Ra, FUNC_XOR, Ra);
+    }

+    else if( arg == (int16_t)arg ) {

+        tcg_out_inst3_disp(s, OP_LDA, Ra, TCG_REG_31, arg );
+    }

+    else if( arg == (int32_t)arg ) {
+        tcg_out_inst3_disp(s, OP_LDAH, Ra, TCG_REG_31, (arg>>16));
+        if( arg & ((tcg_target_ulong)0x8000) ) {
+            tcg_out_inst3_disp(s, OP_LDAH, Ra, Ra, 1);
+        }
+        tcg_out_inst3_disp(s, OP_LDA, Ra, Ra, arg);
+    } else {

+        tcg_out_movi_fixl(s, type, Ra, arg);

+    }
+}
+
+static inline int _is_tmp_reg( int r)
+{
+    if ( r == TMP_REG1 || r == TMP_REG2 || r == TMP_REG3)
+        return 1;
+    else
+        return 0;
+}
+
+/*

+ * load value in disp(Rb) to Ra.

+*/

+static inline void tcg_out_ld( \

+    TCGContext *s, TCGType type, int Ra, int Rb, tcg_target_long disp)

+{

+    int Opcode;

+    
+    if ( _is_tmp_reg(Ra) || _is_tmp_reg(Rb))
+        tcg_abort();
+
+    Opcode = ((type == TCG_TYPE_I32) ? OP_LDL : OP_LDQ);
+
+    if( disp != (int16_t)disp ) {

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp); 

+        tcg_out_inst4(s, OP_ADDSUBCMP, Rb, TMP_REG1, FUNC_ADDQ, TMP_REG1);
+        tcg_out_inst3_disp(s, Opcode, Ra, TMP_REG1, 0);
+    }

+    else

+        tcg_out_inst3_disp(s, Opcode, Ra, Rb, disp);
+}

+
+/*

+ * store value in Ra to disp(Rb).

+*/

+static inline void tcg_out_st( \

+    TCGContext *s, TCGType type, int Ra, int Rb, tcg_target_long disp)

+{

+    int Opcode;

+
+    if ( _is_tmp_reg(Ra) || _is_tmp_reg(Rb))
+        tcg_abort();
+    
+    Opcode = ((type == TCG_TYPE_I32) ? OP_STL : OP_STQ);
+

+    if( disp != (int16_t)disp ) {

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);

+        tcg_out_inst4(s, OP_ADDSUBCMP, Rb, TMP_REG1, FUNC_ADDQ, TMP_REG1);
+        tcg_out_inst3_disp(s, Opcode, Ra, TMP_REG1, 0);
+    }

+    else

+        tcg_out_inst3_disp(s, Opcode, Ra, Rb, disp);
+}

+

+/*

+ * generate arithmatic instruction with immediate. Ra is used as both

+ * input and output, and val is used as another input.

+*/

+static inline void tgen_arithi( \

+    TCGContext *s, int Opcode, int Func, int Ra, tcg_target_long val)

+{
+    if ( _is_tmp_reg(Ra))
+        tcg_abort();
+
+    if (val == (uint8_t)val) {

+        tcg_out_inst4i(s, Opcode, Ra, val, Func, Ra);
+    } else {

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, val);

+        tcg_out_inst4(s, Opcode, Ra, TMP_REG1, Func, Ra);
+    }

+}

+

+/*

+ * generate addq instruction with immediate.

+*/

+static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)

+{

+    if (val != 0)

+        tgen_arithi(s, OP_ADDSUBCMP, FUNC_ADDQ, reg, val);

+}

+

+/*

+ * generate insn to push reg onto stack.

+*/

+static inline void tcg_out_push(TCGContext *s, int reg)

+{

+    tcg_out_inst4i(s, OP_ADDSUBCMP, TCG_REG_30, 8, FUNC_SUBQ, TCG_REG_30);

+    tcg_out_inst3_disp(s, OP_STQ, reg, TCG_REG_30, 0);

+}

+

+/*

+ * generate insn to pop value from stack to reg.

+*/

+static inline void tcg_out_pop(TCGContext *s, int reg)

+{

+    tcg_out_inst3_disp(s, OP_LDQ, reg, TCG_REG_30, 0);

+    tcg_out_inst4i(s, OP_ADDSUBCMP, TCG_REG_30, 8, FUNC_ADDQ, TCG_REG_30);

+}

+
+static const uint8_t tcg_cond_to_jcc[10] = {

+    [TCG_COND_EQ] = FUNC_CMPEQ,

+    [TCG_COND_NE] = FUNC_CMPEQ,

+    [TCG_COND_LT] = FUNC_CMPLT,

+    [TCG_COND_GE] = FUNC_CMPLT,

+    [TCG_COND_LE] = FUNC_CMPLE,

+    [TCG_COND_GT] = FUNC_CMPLE,

+    [TCG_COND_LTU] = FUNC_CMPULT,

+    [TCG_COND_GEU] = FUNC_CMPULT,

+    [TCG_COND_LEU] = FUNC_CMPULE,

+    [TCG_COND_GTU] = FUNC_CMPULE

+};

+

+/*

+ * called by tcg_out_reloc() when the label address is determined, 

+ * i.e., label->has_value is true. what should be done is to patch 

+ * the jmp insn that reference this label.

+ *

+ * code_ptr - position need to patch

+ * type - relocation type

+ * value - label address

+ * addend - not used

+*/

+static void patch_reloc(uint8_t *code_ptr, \

+    int type, tcg_target_long value, tcg_target_long addend)

+{

+    TCGContext s;
+    tcg_target_long val;
+
+    if ( type != R_ALPHA_REFQUAD)
+        tcg_abort();
+    if ( value & 3)
+        tcg_abort();
+
+    s.code_ptr = code_ptr;
+    val = (value - (tcg_target_long)s.code_ptr - 4) >> 2; 
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+
+    tcg_out_inst2(&s, OP_BR, TCG_REG_31, val);
+}

+
+/*

+ * generate insns for BR 

+*/

+static void tcg_out_br(TCGContext *s, int label_index)
+{

+    TCGLabel *l = &s->labels[label_index];
+
+    if (l->has_value) {
+        tcg_target_long val;
+        if ( l->u.value & 0x3)
+            tcg_abort();
+        val = ((tcg_target_long)(l->u.value) - (tcg_target_long)s->code_ptr - 
4) >> 2;
+        if ( val >= -0x100000 && val < 0x100000) {
+            // if distance can be put into 21-bit field
+            tcg_out_inst2(s, OP_BR, TMP_REG1, val);
+       } else {
+            tcg_abort();
+       }
+    } else {
+        /* record relocation infor */
+        tcg_out_reloc(s, s->code_ptr, R_ALPHA_REFQUAD, label_index, 0);
+        s->code_ptr += 4;
+    }
+}
+
+/*

+ * generate insn for INDEX_op_brcond

+*/

+static void tcg_out_brcond( TCGContext *s, int cond, \

+    TCGArg arg1, TCGArg arg2, int const_arg2, int label_index)

+{
+    int func, opc;
+    TCGLabel *l = &s->labels[label_index];
+
+    if ( cond < TCG_COND_EQ || cond > TCG_COND_GTU || const_arg2)
+        tcg_abort();
+

+    func = tcg_cond_to_jcc[cond];
+    tcg_out_inst4(s, OP_ADDSUBCMP, arg1, arg2, func, TMP_REG1);
+
+    // if cond is an odd number, TMP_REG1 = 0 means true
+    opc = (cond & 1) ? OP_BLBC : OP_BLBS;  
+
+    if (l->has_value) {
+        tcg_target_long val;
+        if ( l->u.value & 3)
+            tcg_abort();
+        val = ((tcg_target_long)l->u.value - (tcg_target_long)s->code_ptr - 4) 
>> 2;
+        if ( val >= -0x100000 && val < 0x100000) {
+            // if distance can be put into 21-bit field
+            tcg_out_inst2(s, opc, TMP_REG1, val);
+       } else {
+            tcg_abort();
+       }
+    } else {
+        tcg_out_inst2(s, opc^4, TMP_REG1, 1);
+       /* record relocation infor */
+        tcg_out_reloc(s, s->code_ptr, R_ALPHA_REFQUAD, label_index, 0);
+        s->code_ptr += 4;
+    }
+}

+

+

+#if defined(CONFIG_SOFTMMU)

+

+#include "../../softmmu_defs.h"

+

+static void *qemu_ld_helpers[4] = {

+    __ldb_mmu,

+    __ldw_mmu,

+    __ldl_mmu,

+    __ldq_mmu,

+};

+

+static void *qemu_st_helpers[4] = {

+    __stb_mmu,

+    __stw_mmu,

+    __stl_mmu,

+    __stq_mmu,

+};

+

+#endif

+

+/* 

+ * XXX: qemu_ld and qemu_st could be modified to clobber only EDX and

+ * EAX. It will be useful once fixed registers globals are less common. 

+ *

+ * output host insn for op 'qemu_ldxx t0, t1, flags', which means fetching 
value from t1 to t0.

+ * flags gives the current CPU mode, kernel or user.

+ *

+ * opc argument determines the data width and extension type (zero or signed), 
and has the 

+ * following layout:

+ *                            2  1  0

+ * ------------------------------------

+ * |                        | E |  W  |

+ * ------------------------------------

+ *

+ * E = 0 means zero extention, 1 means signed extension

+ * W = 0 means byte, 1 means word, 2 means dword.

+ *

+ * Note that VM addr space may be 32-bit or 64-bit, below, we take 32-bit addr 
space as example

+ * when doing the comment.

+*/

+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)

+{

+    int addr_reg, data_reg, r0, r1, mem_index, s_bits;

+    tcg_target_long val;
+

+#if defined(CONFIG_SOFTMMU)

+    uint8_t *label1_ptr, *label2_ptr;

+#endif

+

+    data_reg = *args++;

+    addr_reg = *args++;

+    mem_index = *args;

+    s_bits = opc & 3;

+

+    r0 = TCG_REG_16;

+    r1 = TCG_REG_17;

+

+#if defined(CONFIG_SOFTMMU)

+

+    tcg_out_mov(s, r1, addr_reg); 

+    tcg_out_mov(s, r0, addr_reg); 

+ 

+#if TARGET_LONG_BITS == 32

+    /* if VM is of 32-bit arch, clear higher 32-bit of addr */

+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SLL, r0);
+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SRL, r0);

+    tcg_out_inst4i(s, OP_SHIFT, r1, 32, FUNC_SLL, r1);

+    tcg_out_inst4i(s, OP_SHIFT, r1, 32, FUNC_SRL, r1);
+#endif

+

+    tgen_arithi(s, OP_LOGIC, FUNC_AND, r0, TARGET_PAGE_MASK|((1<<s_bits)-1));
+

+    tgen_arithi(s, OP_SHIFT, FUNC_SRL, r1, 
TARGET_PAGE_BITS-CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, OP_LOGIC, FUNC_AND, r1, 
(CPU_TLB_SIZE-1)<<CPU_TLB_ENTRY_BITS);
+    
+    tcg_out_addi(s, r1, offsetof(CPUState, 
tlb_table[mem_index][0].addr_read));        // addq r1, offset, r1
+    tcg_out_inst4(s, OP_ADDSUBCMP, r1, TCG_REG_15, FUNC_ADDQ, r1);             
// addq r1, $15, r1

+#if TARGET_LONG_BITS == 32
+    tcg_out_inst3_disp(s, OP_LDL, TMP_REG1, r1, 0);                            
// ldl TMP_REG1, 0(r1)

+    tcg_out_inst4i(s, OP_SHIFT, TMP_REG1, 32, FUNC_SLL, TMP_REG1);
+    tcg_out_inst4i(s, OP_SHIFT, TMP_REG1, 32, FUNC_SRL, TMP_REG1);
+#else

+    tcg_out_inst3_disp(s, OP_LDQ, TMP_REG1, r1, 0);                            
// ldq TMP_REG1, 0(r1)

+#endif

+               

+    //

+    // now, r0 contains the page# and TMP_REG1 contains the addr to 
tlb_entry.addr_read

+    // we below will compare them
+    //

+    tcg_out_inst4(s, OP_ADDSUBCMP, TMP_REG1, r0, FUNC_CMPEQ, TMP_REG1);
+
+    tcg_out_mov(s, r0, addr_reg);
+#if TARGET_LONG_BITS == 32

+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SLL, r0);
+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SRL, r0);
+#endif
+
+    //

+    // if equal, we jump to label1. since label1 is not resolved yet, 

+    // we just record a relocation.

+    //

+    label1_ptr = s->code_ptr;

+    s->code_ptr += 4;

+

+    //

+    // here, unequal, TLB-miss.

+    //

+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_17, mem_index);              // pass 
argument 2

+    tcg_out_movi(s, TCG_TYPE_I64, \

+        TMP_REG1, (tcg_target_long)qemu_ld_helpers[s_bits]);           // get 
helper func entry
+    tcg_out_push(s, addr_reg);
+    tcg_out_push(s, TCG_REG_26);
+    tcg_out_push(s, TCG_REG_15);
+    tcg_out_mov(s, TCG_REG_27, TMP_REG1);
+    tcg_out_inst3_func(s, OP_CALL, TCG_REG_26, TMP_REG1, FUNC_CALL, 0); // 
call helper func

+    tcg_out_pop(s, TCG_REG_15);
+    tcg_out_pop(s, TCG_REG_26);
+    tcg_out_pop(s, addr_reg);
+       

+    //

+    // after helper function call, the result of ld is saved in $0

+    //

+    switch(opc) {

+    case 0 | 4:

+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, TCG_REG_0, FUNC_SEXTB, data_reg);
+        break;

+    case 1 | 4:

+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, TCG_REG_0, FUNC_SEXTW, data_reg);
+        break;

+    case 2 | 4:

+        tcg_out_inst4i(s, OP_SHIFT, TCG_REG_0, 32, FUNC_SLL, data_reg);
+        tcg_out_inst4i(s, OP_SHIFT, data_reg, 32, FUNC_SRA, data_reg);
+        break;

+    case 0:

+        tcg_out_inst4i(s, OP_SHIFT, TCG_REG_0, 56, FUNC_SLL, data_reg);
+        tcg_out_inst4i(s, OP_SHIFT, data_reg, 56, FUNC_SRL, data_reg);
+        break;

+    case 1:

+        tcg_out_inst4i(s, OP_SHIFT, TCG_REG_0, 48, FUNC_SLL, data_reg);
+        tcg_out_inst4i(s, OP_SHIFT, data_reg, 48, FUNC_SRL, data_reg);
+        break;

+    case 2:

+        tcg_out_inst4i(s, OP_SHIFT, TCG_REG_0, 32, FUNC_SLL, data_reg);
+       tcg_out_inst4i(s, OP_SHIFT, data_reg, 32, FUNC_SRL, data_reg);
+        break;

+    case 3:

+        tcg_out_mov(s, data_reg, TCG_REG_0);

+        break;
+    default:
+        tcg_abort();
+        break;
+    }

+

+    //

+    // we have done, jmp to label2. label2 is not resolved yet, 

+    // we record a relocation.

+    //

+    label2_ptr = s->code_ptr;

+    s->code_ptr += 4;

+    

+    // patch jmp to label1
+    val = (s->code_ptr - label1_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label1_ptr = (uint32_t) \

+        ( ( OP_BNE << 26 ) | ( TMP_REG1 << 21 ) \

+        | ( val & 0x1fffff) );
+

+    //

+    // if we get here, a TLB entry is hit, r0 contains the guest addr and 

+    // r1 contains the ptr that point to tlb_entry.addr_read. what we should

+    // do is to load the tlb_entry.addend (64-bit on alpha) and add it to 

+    // r0 to get the host VA

+    //

+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, \

+       offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read));
+    tcg_out_inst4(s, OP_ADDSUBCMP, r1, TMP_REG1, FUNC_ADDQ, r1);
+    tcg_out_inst3_disp(s, OP_LDQ, TMP_REG1, r1, 0);
+    tcg_out_inst4(s, OP_ADDSUBCMP, r0, TMP_REG1, FUNC_ADDQ, r0);
+       

+#else

+    r0 = addr_reg;

+#endif // endif defined(CONFIG_SOFTMMU)

+

+#ifdef TARGET_WORDS_BIGENDIAN

+    tcg_abort();
+#endif

+

+    //

+    // when we get here, r0 contains the host VA that can be used to access 
guest PA

+    //

+    switch(opc) {
+    case 0:
+        tcg_out_inst3_disp(s, OP_LDBU, data_reg, r0, 0);
+        break;
+    case 0 | 4:
+        tcg_out_inst3_disp(s, OP_LDBU, data_reg, r0, 0);
+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, data_reg, FUNC_SEXTB, data_reg);
+        break;
+    case 1:
+        tcg_out_inst3_disp(s, OP_LDWU, data_reg, r0, 0);
+        break;
+    case 1 | 4:
+        tcg_out_inst3_disp(s, OP_LDWU, data_reg, r0, 0);
+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, data_reg, FUNC_SEXTW, data_reg);
+        break;
+    case 2:

+        tcg_out_inst3_disp(s, OP_LDL, data_reg, r0, 0);
+        tcg_out_inst4i(s, OP_SHIFT, data_reg, 32, FUNC_SLL, data_reg);
+        tcg_out_inst4i(s, OP_SHIFT, data_reg, 32, FUNC_SRL, data_reg);
+        break;

+    case 2 | 4:

+        tcg_out_inst3_disp(s, OP_LDL, data_reg, r0, 0);
+        break;
+    case 3:
+        tcg_out_inst3_disp(s, OP_LDQ, data_reg, r0, 0);
+        break;
+    default:
+        tcg_abort();
+    }
+
+#if defined(CONFIG_SOFTMMU)

+    /* label2: */
+    val = (s->code_ptr - label2_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label2_ptr = (uint32_t)( ( OP_BR << 26 ) \
+        | ( TCG_REG_31  << 21 ) | ( val & 0x1fffff) );
+#endif

+}

+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)

+{

+    int addr_reg, data_reg, r0, r1, mem_index, s_bits;

+    tcg_target_long val;
+
+#if defined(CONFIG_SOFTMMU)

+    uint8_t *label1_ptr, *label2_ptr;

+#endif

+

+    data_reg = *args++;

+    addr_reg = *args++;

+    mem_index = *args;

+    s_bits = opc&3;

+

+    r0 = TCG_REG_16;

+    r1 = TCG_REG_17;

+

+#if defined(CONFIG_SOFTMMU)

+

+    tcg_out_mov(s, r1, addr_reg); 

+    tcg_out_mov(s, r0, addr_reg); 

+ 

+#if TARGET_LONG_BITS == 32

+    /* if VM is of 32-bit arch, clear higher 32-bit of addr */

+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SLL, r0);
+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SRL, r0);
+    tcg_out_inst4i(s, OP_SHIFT, r1, 32, FUNC_SLL, r1);
+    tcg_out_inst4i(s, OP_SHIFT, r1, 32, FUNC_SRL, r1);
+#endif

+

+    tgen_arithi(s, OP_LOGIC, FUNC_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 
1));
+

+    tgen_arithi(s, OP_SHIFT, FUNC_SRL, r1, TARGET_PAGE_BITS - 
CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, OP_LOGIC, FUNC_AND, r1, (CPU_TLB_SIZE-1) << 
CPU_TLB_ENTRY_BITS);
+

+    tcg_out_addi(s, r1, offsetof(CPUState, 
tlb_table[mem_index][0].addr_write));
+    tcg_out_inst4(s, OP_ADDSUBCMP, r1, TCG_REG_15, FUNC_ADDQ, r1);
+

+#if TARGET_LONG_BITS == 32

+    tcg_out_inst3_disp(s, OP_LDL, TMP_REG1, r1, 0);
+    tcg_out_inst4i(s, OP_SHIFT, TMP_REG1, 32, FUNC_SLL, TMP_REG1);
+    tcg_out_inst4i(s, OP_SHIFT, TMP_REG1, 32, FUNC_SRL, TMP_REG1);
+#else

+    tcg_out_inst3_disp(s, OP_LDQ, TMP_REG1, r1, 0);
+#endif

+

+    //

+    // now, r0 contains the page# and TMP_REG1 contains the addr to 
tlb_entry.addr_read

+    // we below will compare them

+    //    

+    tcg_out_inst4(s, OP_ADDSUBCMP, TMP_REG1, r0, FUNC_CMPEQ, TMP_REG1);
+
+    tcg_out_mov(s, r0, addr_reg);
+#if TARGET_LONG_BITS == 32

+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SLL, r0);
+    tcg_out_inst4i(s, OP_SHIFT, r0, 32, FUNC_SRL, r0);
+#endif
+
+    //

+    // if equal, we jump to label1. since label1 is not resolved yet, 

+    // we just record a relocation.

+    //

+    label1_ptr = s->code_ptr;

+    s->code_ptr += 4;

+

+    // here, unequal, TLB-miss, ...

+    tcg_out_mov(s, TCG_REG_17, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_18, mem_index);
+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, 
(tcg_target_long)qemu_st_helpers[s_bits]);
+        
+    tcg_out_push(s, data_reg);
+    tcg_out_push(s, addr_reg);
+    tcg_out_push(s, TCG_REG_26);

+    tcg_out_push(s, TCG_REG_15);
+    tcg_out_mov(s, TCG_REG_27,TMP_REG1);
+    tcg_out_inst3_func(s, OP_CALL, TCG_REG_26, TMP_REG1, FUNC_CALL, 0);
+    tcg_out_pop(s, TCG_REG_15);
+    tcg_out_pop(s, TCG_REG_26);
+    tcg_out_pop(s, addr_reg);
+    tcg_out_pop(s, data_reg);
+
+    //
+    // we have done, jmp to label2. label2 is not resolved yet,
+    // we record a relocation.
+    //
+    label2_ptr = s->code_ptr;
+    s->code_ptr += 4;
+    

+    // patch jmp to label1

+    val = (s->code_ptr - label1_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label1_ptr = (uint32_t) \
+        ( ( OP_BNE << 26) | ( TMP_REG1  << 21 )
+        | ( val & 0x1fffff) );
+

+    //

+    // if we get here, a TLB entry is hit, r0 contains the guest addr and 

+    // r1 contains the ptr that point to tlb_entry.addr_read. what we should

+    // do is to load the tlb_entry.addend (64-bit on alpha) and add it to 

+    // r0 to get the host VA

+    //

+    tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, \

+        offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write));
+    tcg_out_inst4(s, OP_ADDSUBCMP, r1, TMP_REG1, FUNC_ADDQ, r1);
+    tcg_out_inst3_disp(s, OP_LDQ, TMP_REG1, r1,  0);
+    tcg_out_inst4(s, OP_ADDSUBCMP, r0, TMP_REG1, FUNC_ADDQ, r0);
+

+#else

+    r0 = addr_reg;

+#endif

+

+#ifdef TARGET_WORDS_BIGENDIAN

+    tcg_abort();
+#endif

+

+    //

+    // when we get here, r0 contains the host VA that can be used to access 
guest PA

+    //

+    switch(opc) {

+    case 0:

+        tcg_out_inst3_disp(s, OP_STB, data_reg, r0, 0);
+        break;

+    case 1:

+        tcg_out_inst3_disp(s, OP_STW, data_reg, r0, 0);
+        break;
+    case 2:
+        tcg_out_inst3_disp(s, OP_STL, data_reg, r0, 0);
+        break;
+    case 3:
+        tcg_out_inst3_disp(s, OP_STQ, data_reg, r0, 0);
+        break;
+    default:
+        tcg_abort();
+    }
+

+#if defined(CONFIG_SOFTMMU)

+    /* patch jmp to label2: */

+    val = (s->code_ptr - label2_ptr - 4) >> 2;
+    if ( !(val >= -0x100000 && val < 0x100000)) {
+        tcg_abort();
+    }
+    *(uint32_t *)label2_ptr = (uint32_t)( ( OP_BR << 26 ) \

+        | ( TCG_REG_31  << 21 ) | ( val & 0x1fffff));

+#endif
+}

+
+static inline void tgen_ldxx( TCGContext *s, int Ra, int Rb, tcg_target_long 
disp, int flags)

+{

+    int opc_array[4] = { OP_LDBU, OP_LDWU, OP_LDL, OP_LDQ};

+    int opc = opc_array[flags & 3];

+
+    if ( _is_tmp_reg(Ra) || _is_tmp_reg(Rb))
+        tcg_abort();
+
+    if( disp != (int16_t)disp ) {
+        /* disp cannot be stored in insn directly */

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp); 

+        tcg_out_inst4(s, OP_ADDSUBCMP, Rb, TMP_REG1, FUNC_ADDQ, TMP_REG1);
+        tcg_out_inst3_disp(s, opc, Ra, TMP_REG1, 0);
+    } else {

+        tcg_out_inst3_disp(s, opc, Ra, Rb, disp);
+    }
+

+    switch ( flags & 7)        {

+    case 0:

+    case 1:

+    case 2|4:

+    case 3:

+        break;

+    case 0|4:

+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, Ra, FUNC_SEXTB, Ra);
+        break;
+    case 1|4:

+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, Ra, FUNC_SEXTW, Ra);
+        break;
+    case 2:
+        tcg_out_inst4i(s, OP_SHIFT, Ra, 32, FUNC_SLL, Ra);
+        tcg_out_inst4i(s, OP_SHIFT, Ra, 32, FUNC_SRL, Ra);
+        break;
+    default:
+        tcg_abort();
+    }
+}
+

+static inline void tgen_stxx( TCGContext *s, int Ra, int Rb, tcg_target_long 
disp, int flags)

+{

+    int opc_array[4] = { OP_STB, OP_STW, OP_STL, OP_STQ};

+    int opc = opc_array[flags & 3];

+

+    if( disp != (int16_t)disp ) {

+        /* disp cannot be stored in insn directly */

+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, disp);

+        tcg_out_inst4(s, OP_ADDSUBCMP, Rb, TMP_REG1, FUNC_ADDQ, TMP_REG1);
+        tcg_out_inst3_disp(s, opc, Ra, TMP_REG1, 0);
+    } else {
+        tcg_out_inst3_disp(s, opc, Ra, Rb, disp);
+    }

+}

+

+static inline void tcg_out_op(TCGContext *s, \

+       int opc, const TCGArg *args, const int *const_args)

+{

+    int oc, c;

+    switch(opc)
+    {
+    case INDEX_op_exit_tb:
+        /*
+         * exit_tb t0, where t0 is always constant and should be returned to 
engine
+         * since we'll back to engine soon, $0 and $1 will never be used
+        */
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_0, args[0]);
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, (tcg_target_long)tb_ret_addr);
+       tcg_out_inst3_func(s, OP_JMP, TCG_REG_31, TMP_REG1, FUNC_JMP, 0);
+        break;

+

+    case INDEX_op_goto_tb:

+        /* goto_tb idx, where idx is constant 0 or 1, indicating the branch # 
*/

+        if (s->tb_jmp_offset) {

+            /* we don't support direct jmp */

+            tcg_abort();

+        } else {

+            tcg_out_movi( s, TCG_TYPE_I64, TMP_REG1, 
(tcg_target_long)(s->tb_next + args[0]));
+            tcg_out_inst3_disp(s, OP_LDQ, TMP_REG1, TMP_REG1, 0);
+            tcg_out_inst3_func(s, OP_JMP, TCG_REG_31, TMP_REG1, FUNC_JMP, 0);
+        }

+        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;

+        break;

+

+    case INDEX_op_call:

+        if (const_args[0]) {

+            tcg_abort();

+       } else {

+            tcg_out_push( s, TCG_REG_26);
+            tcg_out_push( s, TCG_REG_15);
+            tcg_out_mov( s, TCG_REG_27, args[0]);
+            tcg_out_inst3_func(s, OP_CALL, TCG_REG_26, args[0], FUNC_CALL, 0);

+            tcg_out_pop( s, TCG_REG_15);
+            tcg_out_pop( s, TCG_REG_26);

+        }
+        break;

+

+    case INDEX_op_jmp: 

+        if (const_args[0]) {

+            tcg_abort();

+        } else {

+            tcg_out_inst3_func(s, OP_JMP, TCG_REG_31, args[0], FUNC_JMP, 0);

+        }

+        break;

+

+    case INDEX_op_br:
+        tcg_out_br(s, args[0]);
+        break;

+

+    case INDEX_op_ld8u_i32: 

+    case INDEX_op_ld8u_i64:

+        tgen_ldxx( s, args[0], args[1], args[2], 0);

+        break;


+    case INDEX_op_ld8s_i32: 

+    case INDEX_op_ld8s_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 0|4);

+        break;

+    case INDEX_op_ld16u_i32:

+    case INDEX_op_ld16u_i64:

+        tgen_ldxx( s, args[0], args[1], args[2], 1);

+        break;

+    case INDEX_op_ld16s_i32:

+    case INDEX_op_ld16s_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 1|4);

+        break;
        

+    case INDEX_op_ld32u_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 2);

+        break;

+    case INDEX_op_ld_i32: 

+    case INDEX_op_ld32s_i64:

+        tgen_ldxx( s, args[0], args[1], args[2], 2|4);

+        break;

+    case INDEX_op_ld_i64: 

+        tgen_ldxx( s, args[0], args[1], args[2], 3);

+        break;

+               

+    case INDEX_op_st8_i32:

+    case INDEX_op_st8_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 0);

+        break;


+    case INDEX_op_st16_i32:

+    case INDEX_op_st16_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 1);

+        break;

+    case INDEX_op_st_i32:

+    case INDEX_op_st32_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 2);

+        break;


+    case INDEX_op_st_i64: 

+        tgen_stxx( s, args[0], args[1], args[2], 3);

+        break;

+

+    case INDEX_op_add_i32: 

+    case INDEX_op_add_i64: 

+        oc = OP_ADDSUBCMP;

+        c = FUNC_ADDQ;

+        goto gen_arith;
+    case INDEX_op_sub_i32: 

+    case INDEX_op_sub_i64:
+        oc = OP_ADDSUBCMP;

+        c = FUNC_SUBQ;
+        goto gen_arith;
+    case INDEX_op_mul_i32: 

+        oc = OP_MUL;
+        c = FUNC_MULL;
+       goto gen_arith;
+    case INDEX_op_mul_i64: 

+        oc = OP_MUL;
+        c = FUNC_MULQ;
+        goto gen_arith;

+    case INDEX_op_and_i32:

+    case INDEX_op_and_i64:

+        oc = OP_LOGIC;

+        c = FUNC_AND;

+        goto gen_arith;

+    case INDEX_op_or_i32:

+    case INDEX_op_or_i64: 

+        oc = OP_LOGIC;

+        c = FUNC_BIS;

+        goto gen_arith;

+    case INDEX_op_xor_i32:

+    case INDEX_op_xor_i64:

+        oc = OP_LOGIC;

+        c = FUNC_XOR;

+       goto gen_arith;

+    case INDEX_op_shl_i32:
+    case INDEX_op_shl_i64:

+        oc = OP_SHIFT;

+        c = FUNC_SLL;

+       goto gen_arith;

+    case INDEX_op_shr_i32:
+        tcg_out_inst4i(s, OP_SHIFT, args[1], 32, FUNC_SLL, args[1]);
+        tcg_out_inst4i(s, OP_SHIFT, args[1], 32, FUNC_SRL, args[1]);
+    case INDEX_op_shr_i64: 

+        oc = OP_SHIFT;

+        c = FUNC_SRL;

+        goto gen_arith;

+    case INDEX_op_sar_i32:
+        tcg_out_inst4i(s, OP_SHIFT, args[1], 32, FUNC_SLL, args[1]);
+        tcg_out_inst4i(s, OP_SHIFT, args[1], 32, FUNC_SRA, args[1]);
+    case INDEX_op_sar_i64:

+        oc = OP_SHIFT;

+        c = FUNC_SRA;

+    gen_arith:

+        if (const_args[2]) {
+            tcg_abort();

+        } else {

+            tcg_out_inst4(s, oc, args[1], args[2], c, args[0]);
+        }
+        break;

+

+    case INDEX_op_brcond_i32:
+        tcg_out_mov(s, TMP_REG2, args[0]);
+        tcg_out_mov(s, TMP_REG3, args[1]);
+        if ( args[2] >= TCG_COND_LTU && args[2] <= TCG_COND_GTU) {
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG2, 32, FUNC_SLL, TMP_REG2);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG2, 32, FUNC_SRL, TMP_REG2);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG3, 32, FUNC_SLL, TMP_REG3);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG3, 32, FUNC_SRL, TMP_REG3);
+        } else { 
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG2, 32, FUNC_SLL, TMP_REG2);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG2, 32, FUNC_SRA, TMP_REG2);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG3, 32, FUNC_SLL, TMP_REG3);
+            tcg_out_inst4i(s, OP_SHIFT, TMP_REG3, 32, FUNC_SRA, TMP_REG3);
+       }
+        tcg_out_brcond(s, args[2], TMP_REG2, TMP_REG3, const_args[1], args[3]);
+        break;
+    case INDEX_op_brcond_i64:

+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);

+        break;

+

+    case INDEX_op_ext8s_i32:
+    case INDEX_op_ext8s_i64:
+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, args[1], FUNC_SEXTB, args[0]);
+        printf("ext8s met\n");
+        break;
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+        tcg_out_inst4(s, OP_SEXT, TCG_REG_31, args[1], FUNC_SEXTW, args[0]);
+        printf("ext16s met\n");
+        break;
+    
+    case INDEX_op_qemu_ld8u:

+        tcg_out_qemu_ld(s, args, 0);

+        break;

+    case INDEX_op_qemu_ld8s:

+        tcg_out_qemu_ld(s, args, 0 | 4);

+        break;

+    case INDEX_op_qemu_ld16u:

+        tcg_out_qemu_ld(s, args, 1);

+        break;

+    case INDEX_op_qemu_ld16s:

+        tcg_out_qemu_ld(s, args, 1 | 4);

+        break;

+    case INDEX_op_qemu_ld32u:

+        tcg_out_qemu_ld(s, args, 2);

+        break;

+    case INDEX_op_qemu_ld32s:

+        tcg_out_qemu_ld(s, args, 2 | 4);

+        break;

+    case INDEX_op_qemu_ld64:

+        tcg_out_qemu_ld(s, args, 3);

+        break;

+

+    case INDEX_op_qemu_st8:

+        tcg_out_qemu_st(s, args, 0);

+        break;

+    case INDEX_op_qemu_st16:

+        tcg_out_qemu_st(s, args, 1);

+        break;

+    case INDEX_op_qemu_st32:

+        tcg_out_qemu_st(s, args, 2);

+        break;

+    case INDEX_op_qemu_st64:
+        tcg_out_qemu_st(s, args, 3);

+        break;

+

+    case INDEX_op_movi_i32: 

+    case INDEX_op_movi_i64: 

+    case INDEX_op_mov_i32: 

+    case INDEX_op_mov_i64:

+    case INDEX_op_div2_i32:

+    case INDEX_op_divu2_i32:

+    default:

+        tcg_abort();

+    }

+}

+

+static const TCGTargetOpDef alpha_op_defs[] = {

+    { INDEX_op_exit_tb, { } },

+    { INDEX_op_goto_tb, { } },

+    { INDEX_op_call, { "r" } },
+    { INDEX_op_jmp, { "r" } },
+    { INDEX_op_br, { } },

+

+    { INDEX_op_mov_i32, { "r", "r" } },
+    { INDEX_op_movi_i32, { "r" } },

+    { INDEX_op_ld8u_i32, { "r", "r" } },
+    { INDEX_op_ld8s_i32, { "r", "r" } },

+    { INDEX_op_ld16u_i32, { "r", "r" } },

+    { INDEX_op_ld16s_i32, { "r", "r" } },

+    { INDEX_op_ld_i32, { "r", "r" } },

+    { INDEX_op_st8_i32, { "r", "r" } },
+    { INDEX_op_st16_i32, { "r", "r" } },

+    { INDEX_op_st_i32, { "r", "r" } },

+

+    { INDEX_op_add_i32, { "r", "0", "r" } },

+    { INDEX_op_mul_i32, { "r", "0", "r" } },

+    //{ INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },

+    //{ INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },

+    { INDEX_op_sub_i32, { "r", "0", "r" } },

+    { INDEX_op_and_i32, { "r", "0", "r" } },

+    { INDEX_op_or_i32, { "r", "0", "r" } },

+    { INDEX_op_xor_i32, { "r", "0", "r" } },

+

+    { INDEX_op_shl_i32, { "r", "0", "r" } },

+    { INDEX_op_shr_i32, { "r", "0", "r" } },

+    { INDEX_op_sar_i32, { "r", "0", "r" } },

+

+    { INDEX_op_brcond_i32, { "r", "r" } },             

+

+    { INDEX_op_mov_i64, { "r", "r" } },        

+    { INDEX_op_movi_i64, { "r" } },

+    { INDEX_op_ld8u_i64, { "r", "r" } },

+    { INDEX_op_ld8s_i64, { "r", "r" } },

+    { INDEX_op_ld16u_i64, { "r", "r" } },

+    { INDEX_op_ld16s_i64, { "r", "r" } },

+    { INDEX_op_ld32u_i64, { "r", "r" } },

+    { INDEX_op_ld32s_i64, { "r", "r" } },

+    { INDEX_op_ld_i64, { "r", "r" } },

+    { INDEX_op_st8_i64, { "r", "r" } },        

+    { INDEX_op_st16_i64, { "r", "r" } },

+    { INDEX_op_st32_i64, { "r", "r" } },

+    { INDEX_op_st_i64, { "r", "r" } },

+

+    { INDEX_op_add_i64, { "r", "0", "r" } },

+    { INDEX_op_mul_i64, { "r", "0", "r" } },

+    //{ INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },

+    //{ INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },

+    { INDEX_op_sub_i64, { "r", "0", "r" } },

+    { INDEX_op_and_i64, { "r", "0", "r" } },

+    { INDEX_op_or_i64, { "r", "0", "r" } },

+    { INDEX_op_xor_i64, { "r", "0", "r" } },

+

+    { INDEX_op_shl_i64, { "r", "0", "r" } },

+    { INDEX_op_shr_i64, { "r", "0", "r" } },

+    { INDEX_op_sar_i64, { "r", "0", "r" } },

+

+    { INDEX_op_brcond_i64, { "r", "r" } },

+
+    { INDEX_op_ext8s_i32, { "r", "r"} },
+    { INDEX_op_ext16s_i32, { "r", "r"} },
+    { INDEX_op_ext8s_i64, { "r", "r"} },
+    { INDEX_op_ext16s_i64, { "r", "r"} },
+
+    { INDEX_op_qemu_ld8u, { "r", "L" } },

+    { INDEX_op_qemu_ld8s, { "r", "L" } },

+    { INDEX_op_qemu_ld16u, { "r", "L" } },

+    { INDEX_op_qemu_ld16s, { "r", "L" } },

+    { INDEX_op_qemu_ld32u, { "r", "L" } },

+    { INDEX_op_qemu_ld32s, { "r", "L" } },

+    { INDEX_op_qemu_ld64, { "r", "L" } },

+

+    { INDEX_op_qemu_st8, { "L", "L" } },

+    { INDEX_op_qemu_st16, { "L", "L" } },

+    { INDEX_op_qemu_st32, { "L", "L" } },

+    //{ INDEX_op_qemu_st64, { "L", "L", "L"} },

+    { INDEX_op_qemu_st64, { "L", "L"} },

+    { -1 },

+};

+

+

+static int tcg_target_callee_save_regs[] = {

+    TCG_REG_15,                // used for the global env, so no need to save

+    TCG_REG_9,

+    TCG_REG_10,

+    TCG_REG_11,

+    TCG_REG_12,

+    TCG_REG_13,

+    TCG_REG_14

+};

+

+/*

+ * Generate global QEMU prologue and epilogue code 

+*/

+void tcg_target_qemu_prologue(TCGContext *s)

+{

+    int i, frame_size, push_size, stack_addend;

+   

+    /* TB prologue */

+    /*printf("TB prologue @ %lx\n", s->code_ptr);*/
+       

+    /* save TCG_REG_26 */

+    tcg_out_push(s, TCG_REG_26);
+    tcg_out_push(s, TCG_REG_27);
+    tcg_out_push(s, TCG_REG_28);
+    tcg_out_push(s, TCG_REG_29);
+

+    /* save all callee saved registers */

+    for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {

+        tcg_out_push(s, tcg_target_callee_save_regs[i]);

+    }

+       

+    /* reserve some stack space */

+    push_size = 8 + (4 + ARRAY_SIZE(tcg_target_callee_save_regs)) * 8;

+    frame_size = push_size + 4*TCG_STATIC_CALL_ARGS_SIZE;

+    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & 
~(TCG_TARGET_STACK_ALIGN - 1);

+    stack_addend = frame_size - push_size;

+    tcg_out_addi(s, TCG_REG_30, -stack_addend);

+

+    tcg_out_inst3_func(s, OP_JMP, TCG_REG_31, TCG_REG_16, FUNC_JMP, 0);        
        /* jmp $16 */

+

+    /* TB epilogue */

+    tb_ret_addr = s->code_ptr;

+    tcg_out_addi(s, TCG_REG_30, stack_addend);

+    for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {

+        tcg_out_pop(s, tcg_target_callee_save_regs[i]);

+    }

+

+    tcg_out_pop(s, TCG_REG_29);
+    tcg_out_pop(s, TCG_REG_28);
+    tcg_out_pop(s, TCG_REG_27);
+    tcg_out_pop(s, TCG_REG_26);

+    tcg_out_inst3_func(s, OP_RET, TCG_REG_31, TCG_REG_26, FUNC_RET, 0);        
        /* ret */

+}

+

+

+void tcg_target_init(TCGContext *s)

+{

+    /* fail safe */

+    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))

+        tcg_abort();

+

+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);

+    tcg_regset_set32(tcg_target_call_clobber_regs, 0,

+               (1 << TCG_REG_1  ) | (1 << TCG_REG_2 ) | (1 << TCG_REG_3  ) | 
(1 << TCG_REG_4 ) |

+               (1 << TCG_REG_5  ) | (1 << TCG_REG_6 ) | (1 << TCG_REG_7  ) | 
(1 << TCG_REG_8 ) | 

+               (1 << TCG_REG_22) | (1 << TCG_REG_23) | (1 << TCG_REG_24) | (1 
<< TCG_REG_25) | 

+              (1 << TCG_REG_16) | (1 << TCG_REG_17) | (1 << TCG_REG_18) | (1 
<< TCG_REG_19) | 

+              (1 << TCG_REG_20) | (1 << TCG_REG_21) | (1 << TCG_REG_0 ));
+
+    //tcg_regset_set32( tcg_target_call_clobber_regs, 0, 0xffffffff);
+    

+    tcg_regset_clear(s->reserved_regs);
+    // $26~$31 not allocated by tcg.c
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_26);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_27);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_28);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_29);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_30);

+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_31);
+    // resved registers for tmp usage
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG1);

+    tcg_regset_set_reg(s->reserved_regs, TMP_REG2);
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG3);
+

+    tcg_add_target_add_op_defs(alpha_op_defs);
+}

+

diff --git a/tcg/alpha/tcg-target.h b/tcg/alpha/tcg-target.h
new file mode 100644
index 0000000..3c15a15
--- /dev/null
+++ b/tcg/alpha/tcg-target.h
@@ -0,0 +1,70 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#define TCG_TARGET_ALPHA 1
+
+#define TCG_TARGET_REG_BITS 64
+
+#define TCG_TARGET_NB_REGS 32
+
+enum {
+    TCG_REG_0 = 0, TCG_REG_1, TCG_REG_2, TCG_REG_3,
+    TCG_REG_4, TCG_REG_5, TCG_REG_6, TCG_REG_7,
+    TCG_REG_8, TCG_REG_9, TCG_REG_10, TCG_REG_11,
+    TCG_REG_12, TCG_REG_13, TCG_REG_14, TCG_REG_15,
+    TCG_REG_16, TCG_REG_17, TCG_REG_18, TCG_REG_19,
+    TCG_REG_20, TCG_REG_21, TCG_REG_22, TCG_REG_23,
+    TCG_REG_24, TCG_REG_25, TCG_REG_26, TCG_REG_27,
+    TCG_REG_28, TCG_REG_29, TCG_REG_30, TCG_REG_31
+};
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_30
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* we have signed extension instructions */
+#define TCG_TARGET_HAS_ext8s_i32
+#define TCG_TARGET_HAS_ext16s_i32
+#define TCG_TARGET_HAS_ext8s_i64
+#define TCG_TARGET_HAS_ext16s_i64
+//#define TCG_TARGET_HAS_ext32s_i64
+
+/* Note: must be synced with dyngen-exec.h */
+#define TCG_AREG0 TCG_REG_15
+#define TCG_AREG1 TCG_REG_9
+#define TCG_AREG2 TCG_REG_10
+#define TCG_AREG3 TCG_REG_11
+#define TCG_AREG4 TCG_REG_12
+#define TCG_AREG5 TCG_REG_13
+#define TCG_AREG6 TCG_REG_14
+
+#define TMP_REG1 TCG_REG_23
+#define TMP_REG2 TCG_REG_24
+#define TMP_REG3 TCG_REG_25
+
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+    __asm__ __volatile__ ("call_pal 0x86");
+}
+
-- 
1.6.3.3



      ___________________________________________________________ 
  好玩贺卡等你发,邮箱贺卡全新上线! 
http://card.mail.cn.yahoo.com/




reply via email to

[Prev in Thread] Current Thread [Next in Thread]