diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 3ddf434..cff56ab 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -189,7 +189,8 @@ static inline int tcg_target_get_call_iarg_regs_count(int flags) /* bundle templates: stops (double bar in the IA64 manual) are marked with an uppercase letter. */ -enum { +enum ia64_bundle_type { + nobundle = -1, mii = 0x00, miI = 0x01, mIi = 0x02, @@ -216,7 +217,18 @@ enum { mfB = 0x1d, }; -enum { +enum ia64_insn_type { + INSN_TYPE_N, /* NONE */ + INSN_TYPE_A, + INSN_TYPE_M, + INSN_TYPE_I, + INSN_TYPE_X, + INSN_TYPE_L, + INSN_TYPE_F, + INSN_TYPE_B +}; + +enum ia64_opc { OPC_ADD_A1 = 0x10000000000ull, OPC_AND_A1 = 0x10060000000ull, OPC_AND_A3 = 0x10160000000ull, @@ -231,13 +243,17 @@ enum { OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull, OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull, OPC_BRL_SPTK_MANY_X3 = 0x18000001000ull, + OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull, OPC_CMP_LT_A6 = 0x18000000000ull, OPC_CMP_LTU_A6 = 0x1a000000000ull, OPC_CMP_EQ_A6 = 0x1c000000000ull, OPC_CMP4_LT_A6 = 0x18400000000ull, OPC_CMP4_LTU_A6 = 0x1a400000000ull, OPC_CMP4_EQ_A6 = 0x1c400000000ull, + OPC_DEP_I14 = 0x0ae00000000ull, + OPC_DEP_I15 = 0x08000000000ull, OPC_DEP_Z_I12 = 0x0a600000000ull, + OPC_DEP_Z_I13 = 0x0a604000000ull, OPC_EXTR_I11 = 0x0a400002000ull, OPC_EXTR_U_I11 = 0x0a400000000ull, OPC_FCVT_FX_TRUNC_S1_F10 = 0x004d0000000ull, @@ -266,9 +282,11 @@ enum { OPC_MOV_I_I26 = 0x00150000000ull, OPC_MOVL_X2 = 0x0c000000000ull, OPC_OR_A1 = 0x10070000000ull, + OPC_OR_A3 = 0x10170000000ull, OPC_SETF_EXP_M18 = 0x0c748000000ull, OPC_SETF_SIG_M18 = 0x0c708000000ull, OPC_SHL_I7 = 0x0f240000000ull, + OPC_SHLADD_A2 = 0x10080000000ull, OPC_SHR_I5 = 0x0f220000000ull, OPC_SHR_U_I5 = 0x0f200000000ull, OPC_SHRP_I10 = 0x0ac00000000ull, @@ -284,372 +302,793 @@ enum { OPC_UNPACK4_L_I2 = 0x0f860000000ull, OPC_XMA_L_F2 = 0x1d000000000ull, OPC_XOR_A1 = 0x10078000000ull, + OPC_XOR_A3 = 0x10178000000ull, OPC_ZXT1_I29 = 0x00080000000ull, OPC_ZXT2_I29 = 0x00088000000ull, OPC_ZXT4_I29 = 0x00090000000ull, }; -static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1, - int r2, int r3) +/* The "extra" information we keep is a set of up to 4 insns that have + been "issued" by other parts of the backend, but have yet to be + written to the real instruction stream. + + For each insn that has been queued, we keep track of its type and + whether or not it needs to be followed by a "stop" bit. + + Finally, we track (some of) the resources that have been written + that would require generation of a stop bit by a subsequent insn. + The bits in each WRITTEN word are set for each integer register + that has been written since the previous stop bit. The WRITTEN[0] + word corresponds to insns using an even numbered predicate register; + the WRITTEN[1] corresponds to odd numbered predicate register. If + the insn is not predicated (i.e. using p0), then both WRITTEN words + are updated and checked. + + Certain types of resource conflicts are rare and ignored here: + + (1) FP registers are not tracked. Given that these are only used + for integer multiply, this is easy to do by hand. + + (2) Memory stores are not tracked. In softmmu mode, user memory + references are obviously quite complex and have internal stop bits + generated. In user mode, or for references into the ENV block, we + always use R2 as an intermediate holding the base+ofs address. Thus + we get a stop bit generated for the set of R2, which takes care of + avoiding conflict with a previous store. If we ever implement some + form of temp cse, this may well have to change. + + (3) Predicate registers are not tracked. These only show up in + the setcond and softmmu sequences, where it is easy enough to add + the stop bit by hand. + + (4) Special registers are not tracked. These only show up in the + prologue and epilogue sequences, where they are in fact separated + by the body of the translated code. +*/ + +struct ia64_insn_info +{ + enum ia64_insn_type type; + bool stop; + uint64_t insn; +}; + +struct TCGContextExtra +{ + int n_queue; + struct ia64_insn_info queue[4]; + uint64_t written[2]; +}; + +/* + * Code generation + */ + +static uint8_t *tb_ret_addr; + +static void tcg_out_bundle(TCGContext *s, enum ia64_bundle_type template, + uint64_t slot0, uint64_t slot1, uint64_t slot2) { - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + template &= 0x1f; /* 5 bits */ + slot0 &= 0x1ffffffffffull; /* 41 bits */ + slot1 &= 0x1ffffffffffull; /* 41 bits */ + slot2 &= 0x1ffffffffffull; /* 41 bits */ + + *(uint64_t *)(s->code_ptr + 0) = (slot1 << 46) | (slot0 << 5) | template; + *(uint64_t *)(s->code_ptr + 8) = (slot2 << 23) | (slot1 >> 18); + s->code_ptr += 16; } -static inline uint64_t tcg_opc_a3(int qp, uint64_t opc, int r1, - uint64_t imm, int r3) +/* Explicitly add a stop bit after the previously output insn. */ + +static void tcg_add_stop(TCGContext *s) { - return opc - | ((imm & 0x80) << 29) /* s */ - | ((imm & 0x7f) << 13) /* imm7b */ - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + TCGContextExtra *e = s->extra; + assert (e->n_queue > 0); + e->queue[e->n_queue - 1].stop = true; + e->written[0] = e->written[1] = 0; } -static inline uint64_t tcg_opc_a4(int qp, uint64_t opc, int r1, - uint64_t imm, int r3) +/* Issue one bundle of insns from the queue beginning at START, and + not exceeding MAX insns. Return the number of insns we bundled. */ + +static int tcg_emit_one_bundle(TCGContext *s, int start, int max) { - return opc - | ((imm & 0x2000) << 23) /* s */ - | ((imm & 0x1f80) << 20) /* imm6d */ - | ((imm & 0x007f) << 13) /* imm7b */ - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + const uint64_t nop_m = OPC_NOP_M48; + const uint64_t nop_i = OPC_NOP_I18; + + TCGContextExtra *e = s->extra; + enum ia64_insn_type t0, t1, t2; + enum ia64_bundle_type bt; + uint64_t insn0, insn1, insn2; + int stop, ret; + + t0 = t1 = t2 = INSN_TYPE_N; + insn0 = insn1 = insn2 = 0; + stop = 0; + switch (max) { + case 0: + return 0; + default: + t2 = e->queue[start + 2].type; + insn2 = e->queue[start + 2].insn; + stop |= e->queue[start + 2].stop; + /* FALLTHRU */ + case 2: + t1 = e->queue[start + 1].type; + insn1 = e->queue[start + 1].insn; + stop |= e->queue[start + 1].stop << 1; + /* FALLTHRU */ + case 1: + t0 = e->queue[start].type; + insn0 = e->queue[start].insn; + stop |= e->queue[start].stop << 2; + break; + } + +#define TMPL(A,B,C,S) \ + (S << 9 | INSN_TYPE_##A << 6 | INSN_TYPE_##B << 3 | INSN_TYPE_##C) + + /* We prefer to issue A insns into M slots. The itanium2 can + issue 4 M insns per cycle, and keeping (potential) address + arithmetic in the M unit saves a cycle when it comes to + cross-unit stalls. */ + + bt = nobundle; + ret = 3; + if (max >= 3) { + /* Match a full bundle of three insns. */ + switch ((stop & 6) << 8 | t0 << 6 | t1 << 3 | t2) { + case TMPL(A,I,A,0): + case TMPL(A,I,I,0): + case TMPL(M,I,A,0): + case TMPL(M,I,I,0): bt = mii; break; + + case TMPL(A,A,A,1): + case TMPL(A,A,I,1): + case TMPL(A,I,A,1): + case TMPL(M,A,A,1): + case TMPL(A,I,I,1): + case TMPL(M,A,I,1): + case TMPL(M,I,A,1): + case TMPL(M,I,I,1): bt = mIi; break; + + case TMPL(A,L,X,0): + case TMPL(M,L,X,0): bt = mlx; break; + + case TMPL(A,A,A,0): + case TMPL(A,A,I,0): + case TMPL(A,M,A,0): + case TMPL(M,A,A,0): + case TMPL(A,M,I,0): + case TMPL(M,A,I,0): + case TMPL(M,M,A,0): + case TMPL(M,M,I,0): bt = mmi; break; + + case TMPL(A,A,A,2): + case TMPL(A,A,I,2): + case TMPL(A,M,A,2): + case TMPL(M,A,A,2): + case TMPL(A,M,I,2): + case TMPL(M,A,I,2): + case TMPL(M,M,A,2): + case TMPL(M,M,I,2): bt = Mmi; break; + + case TMPL(A,F,A,0): + case TMPL(A,F,I,0): + case TMPL(M,F,A,0): + case TMPL(M,F,I,0): bt = mfi; break; + + case TMPL(A,A,F,0): + case TMPL(A,M,F,0): + case TMPL(M,A,F,0): + case TMPL(M,M,F,0): bt = mmf; break; + + case TMPL(A,I,B,0): + case TMPL(M,A,B,0): + case TMPL(M,I,B,0): bt = mib; break; + + case TMPL(M,B,B,0): bt = mbb; break; + case TMPL(B,B,B,0): bt = bbb; break; + + case TMPL(A,A,B,0): + case TMPL(M,M,B,0): bt = mmb; break; + + case TMPL(A,F,B,0): + case TMPL(M,F,B,0): bt = mfb; break; + } + } + + if (bt == nobundle) { + /* Match a bundle of two insns. */ + ret = 2; + stop >>= 1; + insn2 = nop_i; + + if (max >= 2) { + switch ((stop & 2) << 8 | t0 << 3 | t1) { + case TMPL(N,A,A,1): + case TMPL(N,A,M,1): + case TMPL(N,M,A,1): + case TMPL(N,M,M,1): + bt = Mmi; + break; + case TMPL(N,A,A,0): + case TMPL(N,A,M,0): + case TMPL(N,M,A,0): + case TMPL(N,M,M,0): + bt = mmi; + break; + case TMPL(N,A,F,0): + case TMPL(N,M,F,0): + bt = mfi; + break; + + case TMPL(N,A,I,1): + case TMPL(N,M,I,1): + bt = Mmi; + insn2 = insn1; + insn1 = nop_m; + break; + + case TMPL(N,I,A,0): + case TMPL(N,I,I,0): + bt = mii; + goto nop_m_0; + case TMPL(N,I,A,1): + case TMPL(N,I,I,1): + bt = mIi; + goto nop_m_0; + case TMPL(N,L,X,0): + bt = mlx; + goto nop_m_0; + case TMPL(N,A,I,0): + case TMPL(N,M,I,0): + bt = mmi; + goto nop_m_0; + case TMPL(N,F,A,0): + case TMPL(N,F,I,0): + bt = mfi; + goto nop_m_0; + case TMPL(N,A,B,0): + case TMPL(N,M,B,0): + bt = mmb; + goto nop_m_0; + case TMPL(N,I,B,0): + bt = mib; + goto nop_m_0; + case TMPL(N,F,B,0): + bt = mfb; + nop_m_0: + insn2 = insn1; + insn1 = insn0; + insn0 = nop_m; + break; + } + } + + if (bt == nobundle) { + /* Bundle a single insn by itself. */ + ret = 1; + stop >>= 1; + + switch (t0) { + case INSN_TYPE_A: + case INSN_TYPE_M: + bt = mmi; + insn2 = nop_i; + insn1 = nop_m; + break; + case INSN_TYPE_I: + bt = mmi; + goto nop_m_01; + case INSN_TYPE_F: + bt = mmf; + goto nop_m_01; + case INSN_TYPE_B: + bt = mmb; + nop_m_01: + insn2 = insn0; + insn1 = nop_m; + insn0 = nop_m; + break; + default: + tcg_abort(); + } + } + } + +#undef TMPL + + /* All bundles can get a final stop bit by adding one. */ + bt += (stop & 1); + + tcg_out_bundle(s, bt, insn0, insn1, insn2); + + return ret; +} + +/* Empty the instruction queue. */ + +static void tcg_flush_queue(TCGContext *s) +{ + TCGContextExtra *e = s->extra; + int i, j, n = e->n_queue; + + if (n > 0) { + /* If there are any outstanding writes, we must add a stop bit, + lest we not be able to add a stop bit when the next insn is + added to the queue. */ + if (e->written[0] | e->written[1]) { + tcg_add_stop(s); + } + + for (i = 0; i < n; i += j) { + j = tcg_emit_one_bundle(s, i, n - i); + } + + e->n_queue = 0; + } +} + +/* Add a single insn to the instruction queue. */ + +static void tcg_out_insn(TCGContext *s, enum ia64_insn_type type, + uint64_t insn, uint64_t out, uint64_t in) +{ + TCGContextExtra *e = s->extra; + struct ia64_insn_info *i; + uint64_t written; + int n = e->n_queue; + int qp; + + /* Don't consider reads and writes to R0. */ + out &= ~1; + in &= ~1; + + /* Extract the predicate for the insn. */ + qp = insn & 0x3f; + + /* We only really use p6 and p7, but for simplicity, make all even + predicates conflict, and all odd predicates conflict. If no + predicate is used, conflict with either. */ + if (qp & 1) { + written = e->written[1]; + } else if (qp) { + written = e->written[0]; + } else { + written = e->written[0] | e->written[1]; + } + + /* If we read from or write to something that has already been + written since the last stop bit, add a new stop bit. */ + if (written & (in | out)) { + e->queue[n - 1].stop = 1; + written = e->written[0] = e->written[1] = 0; + } + + /* If the queue is totally full, emit one bundle to make room. */ + if (n == ARRAY_SIZE(e->queue)) { + int j, i = tcg_emit_one_bundle(s, 0, n); + for (j = i; j < n; ++j) + e->queue[j - i] = e->queue[j]; + n -= i; + } + + /* Add the new instruction. */ + i = &e->queue[n]; + i->type = type; + i->stop = 0; + i->insn = insn; + e->n_queue = n + 1; + + /* Update the cumulative written state. */ + if (qp & 1) { + e->written[1] = written | out; + } else if (qp) { + e->written[0] = written | out; + } else { + e->written[0] |= out; + e->written[1] |= out; + } } -static inline uint64_t tcg_opc_a5(int qp, uint64_t opc, int r1, - uint64_t imm, int r3) +static void tcg_opc_a1(TCGContext *s, int qp, uint64_t opc, + int r1, int r2, int r3) { - return opc - | ((imm & 0x200000) << 15) /* s */ - | ((imm & 0x1f0000) << 6) /* imm5c */ - | ((imm & 0x00ff80) << 20) /* imm9d */ - | ((imm & 0x00007f) << 13) /* imm7b */ - | ((r3 & 0x03) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_A, opc, 1ull << r1, 1ull << r2 | 1ull << r3); } -static inline uint64_t tcg_opc_a6(int qp, uint64_t opc, int p1, - int p2, int r2, int r3) +static void tcg_opc_a2(TCGContext *s, int qp, uint64_t opc, + int r1, int r2, int count, int r3) { - return opc - | ((p2 & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((p1 & 0x3f) << 6) - | (qp & 0x3f); + opc |= ((count & 3) << 27) + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_A, opc, 1ull << r1, 1ull << r2 | 1ull << r3); } -static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm) +static void tcg_opc_a3(TCGContext *s, int qp, uint64_t opc, + int r1, uint64_t imm, int r3) { - return opc - | ((imm & 0x100000) << 16) /* s */ - | ((imm & 0x0fffff) << 13) /* imm20b */ - | (qp & 0x3f); + opc |= ((imm & 0x80) << 29) /* s */ + | ((imm & 0x7f) << 13) /* imm7b */ + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_A, opc, 1ull << r1, 1ull << r3); } -static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2) +static void tcg_opc_a4(TCGContext *s, int qp, uint64_t opc, + int r1, uint64_t imm, int r3) { - return opc - | ((b2 & 0x7) << 13) - | (qp & 0x3f); + opc |= ((imm & 0x2000) << 23) /* s */ + | ((imm & 0x1f80) << 20) /* imm6d */ + | ((imm & 0x007f) << 13) /* imm7b */ + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_A, opc, 1ull << r1, 1ull << r3); } -static inline uint64_t tcg_opc_b5(int qp, uint64_t opc, int b1, int b2) +static void tcg_opc_a5(TCGContext *s, int qp, uint64_t opc, + int r1, uint64_t imm, int r3) { - return opc - | ((b2 & 0x7) << 13) - | ((b1 & 0x7) << 6) - | (qp & 0x3f); + opc |= ((imm & 0x200000) << 15) /* s */ + | ((imm & 0x1f0000) << 6) /* imm5c */ + | ((imm & 0x00ff80) << 20) /* imm9d */ + | ((imm & 0x00007f) << 13) /* imm7b */ + | ((r3 & 0x03) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_A, opc, 1ull << r1, 1ull << r3); } +static void tcg_opc_a6(TCGContext *s, int qp, uint64_t opc, + int p1, int p2, int r2, int r3) +{ + opc |= ((p2 & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((p1 & 0x3f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_A, opc, 0, 1ull << r2 | 1ull << r3); +} -static inline uint64_t tcg_opc_b9(int qp, uint64_t opc, uint64_t imm) +static void tcg_opc_b1(TCGContext *s, int qp, uint64_t opc, uint64_t imm) { - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); + opc |= ((imm & 0x100000) << 16) /* s */ + | ((imm & 0x0fffff) << 13) /* imm20b */ + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_B, opc, 0, 0); } -static inline uint64_t tcg_opc_f1(int qp, uint64_t opc, int f1, - int f3, int f4, int f2) +static void tcg_opc_b4(TCGContext *s, int qp, uint64_t opc, int b2) { - return opc - | ((f4 & 0x7f) << 27) - | ((f3 & 0x7f) << 20) - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((b2 & 0x7) << 13) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_B, opc, 0, 0); } -static inline uint64_t tcg_opc_f2(int qp, uint64_t opc, int f1, - int f3, int f4, int f2) +static void tcg_opc_b5(TCGContext *s, int qp, uint64_t opc, int b1, int b2) { - return opc - | ((f4 & 0x7f) << 27) - | ((f3 & 0x7f) << 20) - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((b2 & 0x7) << 13) + | ((b1 & 0x7) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_B, opc, 0, 0); } -static inline uint64_t tcg_opc_f6(int qp, uint64_t opc, int f1, - int p2, int f2, int f3) +static inline void +tcg_opc_b9(TCGContext *s, int qp, uint64_t opc, uint64_t imm) { - return opc - | ((p2 & 0x3f) << 27) - | ((f3 & 0x7f) << 20) - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((imm & 0x100000) << 16) /* i */ + | ((imm & 0x0fffff) << 6) /* imm20a */ + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_B, opc, 0, 0); } -static inline uint64_t tcg_opc_f10(int qp, uint64_t opc, int f1, int f2) +static inline void +tcg_opc_f1(TCGContext *s, int qp, uint64_t opc, + int f1, int f3, int f4, int f2) { - return opc - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((f4 & 0x7f) << 27) + | ((f3 & 0x7f) << 20) + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_F, opc, 0, 0); } -static inline uint64_t tcg_opc_f11(int qp, uint64_t opc, int f1, int f2) +static void tcg_opc_f2(TCGContext *s, int qp, uint64_t opc, + int f1, int f3, int f4, int f2) { - return opc - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((f4 & 0x7f) << 27) + | ((f3 & 0x7f) << 20) + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_F, opc, 0, 0); } -static inline uint64_t tcg_opc_f16(int qp, uint64_t opc, uint64_t imm) +static inline void +tcg_opc_f6(TCGContext *s, int qp, uint64_t opc, + int f1, int p2, int f2, int f3) { - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); + opc |= ((p2 & 0x3f) << 27) + | ((f3 & 0x7f) << 20) + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_F, opc, 0, 0); } -static inline uint64_t tcg_opc_i2(int qp, uint64_t opc, int r1, - int r2, int r3) +static inline void +tcg_opc_f10(TCGContext *s, int qp, uint64_t opc, int f1, int f2) { - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_F, opc, 0, 0); } -static inline uint64_t tcg_opc_i3(int qp, uint64_t opc, int r1, - int r2, int mbtype) +static inline void +tcg_opc_f11(TCGContext *s, int qp, uint64_t opc, int f1, int f2) { - return opc - | ((mbtype & 0x0f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_F, opc, 0, 0); } -static inline uint64_t tcg_opc_i5(int qp, uint64_t opc, int r1, - int r3, int r2) +static inline void +tcg_opc_f16(TCGContext *s, int qp, uint64_t opc, uint64_t imm) { - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((imm & 0x100000) << 16) /* i */ + | ((imm & 0x0fffff) << 6) /* imm20a */ + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_F, opc, 0, 0); } -static inline uint64_t tcg_opc_i7(int qp, uint64_t opc, int r1, - int r2, int r3) +static void tcg_opc_i2(TCGContext *s, int qp, uint64_t opc, + int r1, int r2, int r3) { - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 1ull << r2 | 1ull << r3); } -static inline uint64_t tcg_opc_i10(int qp, uint64_t opc, int r1, - int r2, int r3, uint64_t count) +static void tcg_opc_i3(TCGContext *s, int qp, uint64_t opc, + int r1, int r2, int mbtype) { - return opc - | ((count & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((mbtype & 0x0f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 1ull << r2); } -static inline uint64_t tcg_opc_i11(int qp, uint64_t opc, int r1, - int r3, uint64_t pos, uint64_t len) +static void tcg_opc_i5(TCGContext *s, int qp, uint64_t opc, + int r1, int r3, int r2) { - return opc - | ((len & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((pos & 0x3f) << 14) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 1ull << r2 | 1ull << r3); } -static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1, - int r2, uint64_t pos, uint64_t len) +static void tcg_opc_i7(TCGContext *s, int qp, uint64_t opc, + int r1, int r2, int r3) { - return opc - | ((len & 0x3f) << 27) - | ((pos & 0x3f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 1ull << r2 | 1ull << r3); } -static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm) +static void tcg_opc_i10(TCGContext *s, int qp, uint64_t opc, + int r1, int r2, int r3, uint64_t count) { - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); + opc |= ((count & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 1ull << r2 | 1ull << r3); } -static inline uint64_t tcg_opc_i21(int qp, uint64_t opc, int b1, - int r2, uint64_t imm) +static void tcg_opc_i11(TCGContext *s, int qp, uint64_t opc, + int r1, int r3, uint64_t pos, uint64_t len) { - return opc - | ((imm & 0x1ff) << 24) - | ((r2 & 0x7f) << 13) - | ((b1 & 0x7) << 6) - | (qp & 0x3f); + opc |= ((len & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((pos & 0x3f) << 14) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 1ull << r3); } -static inline uint64_t tcg_opc_i22(int qp, uint64_t opc, int r1, int b2) +static void tcg_opc_i12(TCGContext *s, int qp, uint64_t opc, + int r1, int r2, uint64_t pos, uint64_t len) { - return opc - | ((b2 & 0x7) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((len & 0x3f) << 27) + | ((pos & 0x3f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 1ull << r2); } -static inline uint64_t tcg_opc_i26(int qp, uint64_t opc, int ar3, int r2) +static void tcg_opc_i13(TCGContext *s, int qp, uint64_t opc, + int r1, uint64_t imm8, uint64_t pos, uint64_t len) { - return opc - | ((ar3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | (qp & 0x3f); + opc |= ((imm8 & 0x80) << 29) /* i */ + | ((imm8 & 0x7f) << 13) /* imm7b */ + | ((len & 0x3f) << 27) + | ((pos & 0x3f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 0); } -static inline uint64_t tcg_opc_i29(int qp, uint64_t opc, int r1, int r3) +static void tcg_opc_i14(TCGContext *s, int qp, uint64_t opc, + int r1, int imm1, int r3, uint64_t pos, uint64_t len) { - return opc - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((imm1 & 1ull) << 36) + | ((len & 0x3f) << 27) + | ((pos & 0x3f) << 14) + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 1ull << r3); } -static inline uint64_t tcg_opc_l2(uint64_t imm) +static void tcg_opc_i21(TCGContext *s, int qp, uint64_t opc, + int b1, int r2, uint64_t imm) { - return (imm & 0x7fffffffffc00000ull) >> 22; + opc |= ((imm & 0x1ff) << 24) + | ((r2 & 0x7f) << 13) + | ((b1 & 0x7) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 0, 1ull << r2); } -static inline uint64_t tcg_opc_l3(uint64_t imm) +static void tcg_opc_i22(TCGContext *s, int qp, uint64_t opc, int r1, int b2) { - return (imm & 0x07fffffffff00000ull) >> 18; + opc |= ((b2 & 0x7) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 0); } -static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3) +static void tcg_opc_i26(TCGContext *s, int qp, uint64_t opc, int ar3, int r2) { - return opc - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((ar3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 0, 1ull << r2); } -static inline uint64_t tcg_opc_m3(int qp, uint64_t opc, int r1, - int r3, uint64_t imm) +static void tcg_opc_i29(TCGContext *s, int qp, uint64_t opc, int r1, int r3) { - return opc - | ((imm & 0x100) << 28) /* s */ - | ((imm & 0x080) << 20) /* i */ - | ((imm & 0x07f) << 13) /* imm7b */ - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_I, opc, 1ull << r1, 1ull << r3); } -static inline uint64_t tcg_opc_m4(int qp, uint64_t opc, int r2, int r3) +static void tcg_opc_l2(TCGContext *s, uint64_t imm) { - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | (qp & 0x3f); + uint64_t opc = (imm & 0x7fffffffffc00000ull) >> 22; + tcg_out_insn(s, INSN_TYPE_L, opc, 0, 0); } -static inline uint64_t tcg_opc_m18(int qp, uint64_t opc, int f1, int r2) +static void tcg_opc_l3(TCGContext *s, uint64_t imm) { - return opc - | ((r2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); + uint64_t opc = (imm & 0x07fffffffff00000ull) >> 18; + tcg_out_insn(s, INSN_TYPE_L, opc, 0, 0); } -static inline uint64_t tcg_opc_m19(int qp, uint64_t opc, int r1, int f2) +static void tcg_opc_m1(TCGContext *s, int qp, uint64_t opc, int r1, int r3) { - return opc - | ((f2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_M, opc, 1ull << r1, 1ull << r3); } -static inline uint64_t tcg_opc_m34(int qp, uint64_t opc, int r1, - int sof, int sol, int sor) +static inline void +tcg_opc_m3(TCGContext *s, int qp, uint64_t opc, int r1, int r3, uint64_t imm) { - return opc - | ((sor & 0x0f) << 27) - | ((sol & 0x7f) << 20) - | ((sof & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((imm & 0x100) << 28) /* s */ + | ((imm & 0x080) << 20) /* i */ + | ((imm & 0x07f) << 13) /* imm7b */ + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_M, opc, 1ull << r1 | 1ull << r3, 1ull << r3); } -static inline uint64_t tcg_opc_m48(int qp, uint64_t opc, uint64_t imm) +static void tcg_opc_m4(TCGContext *s, int qp, uint64_t opc, int r2, int r3) { - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); + opc |= ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_M, opc, 0, 1ull << r2 | 1ull << r3); } -static inline uint64_t tcg_opc_x2(int qp, uint64_t opc, - int r1, uint64_t imm) +static void tcg_opc_m18(TCGContext *s, int qp, uint64_t opc, int f1, int r2) { - return opc - | ((imm & 0x8000000000000000ull) >> 27) /* i */ - | (imm & 0x0000000000200000ull) /* ic */ - | ((imm & 0x00000000001f0000ull) << 6) /* imm5c */ - | ((imm & 0x000000000000ff80ull) << 20) /* imm9d */ - | ((imm & 0x000000000000007full) << 13) /* imm7b */ - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); + opc |= ((r2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_M, opc, 0, 1ull << r2); } -static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm) +static void tcg_opc_m19(TCGContext *s, int qp, uint64_t opc, int r1, int f2) { - return opc - | ((imm & 0x0800000000000000ull) >> 23) /* i */ - | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ - | (qp & 0x3f); + opc |= ((f2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_M, opc, 1ull << r1, 0); } +static void tcg_opc_m34(TCGContext *s, int qp, uint64_t opc, int r1, + int sof, int sol, int sor) +{ + opc |= ((sor & 0x0f) << 27) + | ((sol & 0x7f) << 20) + | ((sof & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_M, opc, 1ull << r1, 0); +} + +static void tcg_opc_x2(TCGContext *s, int qp, uint64_t opc, + int r1, uint64_t imm) +{ + opc |= ((imm & 0x8000000000000000ull) >> 27) /* i */ + | (imm & 0x0000000000200000ull) /* ic */ + | ((imm & 0x00000000001f0000ull) << 6) /* imm5c */ + | ((imm & 0x000000000000ff80ull) << 20) /* imm9d */ + | ((imm & 0x000000000000007full) << 13) /* imm7b */ + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_X, opc, 1ull << r1, 0); +} + +static void tcg_opc_x3(TCGContext *s, int qp, uint64_t opc, uint64_t imm) +{ + opc |= ((imm & 0x0800000000000000ull) >> 23) /* i */ + | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_X, opc, 0, 0); +} + +static void tcg_opc_x4(TCGContext *s, int qp, uint64_t opc, + int b1, uint64_t imm) +{ + opc |= ((imm & 0x0800000000000000ull) >> 23) /* i */ + | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ + | ((b1 & 7) << 6) + | (qp & 0x3f); + tcg_out_insn(s, INSN_TYPE_X, opc, 0, 0); +} /* * Relocations */ -static inline void reloc_pcrel21b (void *pc, tcg_target_long target) +static inline void reloc_pcrel21b(void *pc, tcg_target_long target) { uint64_t imm; int64_t disp; @@ -682,7 +1121,7 @@ static inline void reloc_pcrel21b (void *pc, tcg_target_long target) } } -static inline uint64_t get_reloc_pcrel21b (void *pc) +static inline uint64_t get_reloc_pcrel21b(void *pc) { int64_t low, high; int slot; @@ -709,7 +1148,7 @@ static inline uint64_t get_reloc_pcrel21b (void *pc) } } -static inline void reloc_pcrel60b (void *pc, tcg_target_long target) +static inline void reloc_pcrel60b(void *pc, tcg_target_long target) { int64_t disp; uint64_t imm; @@ -725,7 +1164,7 @@ static inline void reloc_pcrel60b (void *pc, tcg_target_long target) | ((imm & 0x0000000ffff00000ull) << 28); /* imm39 */ } -static inline uint64_t get_reloc_pcrel60b (void *pc) +static inline uint64_t get_reloc_pcrel60b(void *pc) { int64_t low, high; @@ -772,6 +1211,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 'I': ct->ct |= TCG_CT_CONST_S22; break; + case 'J': + ct->ct |= TCG_CT_CONST_S8; + break; case 'S': ct->ct |= TCG_CT_REG; tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); @@ -806,113 +1248,112 @@ static inline int tcg_target_const_match(tcg_target_long val, return 1; else if ((ct & TCG_CT_CONST_S22) && val == ((int32_t)val << 10) >> 10) return 1; + else if ((ct & TCG_CT_CONST_S8) && val == (int8_t)val) + return 1; else return 0; } -/* - * Code generation - */ - -static uint8_t *tb_ret_addr; - -static inline void tcg_out_bundle(TCGContext *s, int template, - uint64_t slot0, uint64_t slot1, - uint64_t slot2) -{ - template &= 0x1f; /* 5 bits */ - slot0 &= 0x1ffffffffffull; /* 41 bits */ - slot1 &= 0x1ffffffffffull; /* 41 bits */ - slot2 &= 0x1ffffffffffull; /* 41 bits */ - - *(uint64_t *)(s->code_ptr + 0) = (slot1 << 46) | (slot0 << 5) | template; - *(uint64_t *)(s->code_ptr + 8) = (slot2 << 23) | (slot1 >> 18); - s->code_ptr += 16; -} - static inline void tcg_out_mov(TCGContext *s, TCGType type, TCGArg ret, TCGArg arg) { - tcg_out_bundle(s, mmI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, 0, arg)); + tcg_opc_a4(s, TCG_REG_P0, OPC_ADDS_A4, ret, 0, arg); } -static inline void tcg_out_movi(TCGContext *s, TCGType type, - TCGArg reg, tcg_target_long arg) +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGArg reg, tcg_target_long val) { - tcg_out_bundle(s, mLX, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_l2 (arg), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg)); + int64_t dep; + int lz, tz; + + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + /* Most constants can be loaded with ADDL. */ + if (val == ((int32_t)val << 10) >> 10) { + tcg_opc_a5(s, TCG_REG_P0, OPC_ADDL_A5, reg, val, TCG_REG_R0); + return; + } + + /* Look for possibilities for dep.z immediate. This is an + 8 bit signed constant deposited into a field inside zero. + This I format insn packs into bundles much nicer than LX. */ + tz = __builtin_ctzll(val); + lz = __builtin_clzll(val); + dep = ((int64_t)val << lz) >> (lz + tz); + if (dep == (int8_t)dep) { + tcg_opc_i13(s, TCG_REG_P0, OPC_DEP_Z_I13, reg, dep, + 63 - tz, 63 - (lz + tz)); + return; + } + + /* Failing that, any 64-bit constant can be loaded with MOVL. */ + tcg_opc_l2(s, val); + tcg_opc_x2(s, TCG_REG_P0, OPC_MOVL_X2, reg, val); } -static inline void tcg_out_addi(TCGContext *s, TCGArg reg, tcg_target_long val) +static void tcg_out_addi2(TCGContext *s, TCGArg r1, TCGArg r2, + tcg_target_long val) { - if (val == ((int32_t)val << 10) >> 10) { - tcg_out_bundle(s, MmI, - tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, - TCG_REG_R2, val, TCG_REG_R0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, reg, - reg, TCG_REG_R2)); + if (val == ((int32_t)val << 18) >> 18) { + tcg_opc_a4(s, TCG_REG_P0, OPC_ADDS_A4, r1, val, r2); + } else if (r2 < 4 && val == ((int32_t)val << 10) >> 10) { + tcg_opc_a5(s, TCG_REG_P0, OPC_ADDL_A5, r1, val, r2); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, val); - tcg_out_bundle(s, mmI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, reg, - reg, TCG_REG_R2)); + int r3 = r1; + if (r2 == r1) { + assert (r2 != TCG_REG_R2); + r3 = TCG_REG_R2; + } + + tcg_out_movi(s, TCG_TYPE_I64, r3, val); + tcg_opc_a1(s, TCG_REG_P0, OPC_ADD_A1, r1, r2, r3); } } +static inline void tcg_out_addi(TCGContext *s, TCGArg reg, tcg_target_long val) +{ + tcg_out_addi2(s, reg, reg, val); +} + static void tcg_out_br(TCGContext *s, int label_index) { - TCGLabel *l = &s->labels[label_index]; + tcg_opc_b1(s, TCG_REG_P0, OPC_BR_SPTK_MANY_B1, 0); + tcg_flush_queue(s); + tcg_out_reloc(s, (s->code_ptr - 16) + 2, R_IA64_PCREL21B, label_index, 0); +} - tcg_out_bundle(s, mmB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_b1 (TCG_REG_P0, OPC_BR_SPTK_MANY_B1, - get_reloc_pcrel21b(s->code_ptr + 2))); +static void tcg_out_call(TCGContext *s, TCGArg addr, int const_addr) +{ + if (const_addr) { + tcg_target_long pc = ((tcg_target_long *)addr)[0]; + tcg_target_long gp = ((tcg_target_long *)addr)[1]; - if (l->has_value) { - reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R1, gp); + tcg_opc_l3(s, 0); + tcg_opc_x4(s, TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4, TCG_REG_B0, 0); + tcg_flush_queue(s); + + reloc_pcrel60b (s->code_ptr - 16, pc); } else { - tcg_out_reloc(s, (s->code_ptr - 16) + 2, - R_IA64_PCREL21B, label_index, 0); + tcg_opc_m3(s, TCG_REG_P0, OPC_LD8_M3, TCG_REG_R2, addr, 8); + tcg_opc_i21(s, TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R2, 0); + tcg_opc_m3(s, TCG_REG_P0, OPC_LD8_M3, TCG_REG_R1, addr, -8); + tcg_opc_b5(s, TCG_REG_P0, OPC_BR_CALL_SPTK_MANY_B5, + TCG_REG_B0, TCG_REG_B6); + tcg_flush_queue(s); } } -static inline void tcg_out_call(TCGContext *s, TCGArg addr) -{ - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, addr), - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R3, 8, addr), - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, - TCG_REG_B6, TCG_REG_R2, 0)); - tcg_out_bundle(s, mmB, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R3), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_b5 (TCG_REG_P0, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); -} - static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) { - int64_t disp; - uint64_t imm; - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); + tcg_opc_l3 (s, 0); + tcg_opc_x3 (s, TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, 0); + tcg_flush_queue(s); - disp = tb_ret_addr - s->code_ptr; - imm = (uint64_t)disp >> 4; - - tcg_out_bundle(s, mLX, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_l3 (imm), - tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm)); + reloc_pcrel60b (s->code_ptr - 16, (tcg_target_long)tb_ret_addr); } static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) @@ -924,73 +1365,43 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) /* indirect jump method */ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, (tcg_target_long)(s->tb_next + arg)); - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, - TCG_REG_R2, TCG_REG_R2), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R2, 0)); - tcg_out_bundle(s, mmB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, - TCG_REG_B6)); + tcg_opc_m1(s, TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2); + tcg_opc_i21(s, TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R2, 0); + tcg_opc_b4(s, TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6); + tcg_flush_queue(s); } s->tb_next_offset[arg] = s->code_ptr - s->code_buf; } static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) { - tcg_out_bundle(s, mmI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0)); - tcg_out_bundle(s, mmB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); -} - -static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, - TCGArg arg1, tcg_target_long arg2) -{ - if (arg2 == ((int16_t)arg2 >> 2) << 2) { - tcg_out_bundle(s, MmI, - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R2, arg2, arg1), - tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, - TCG_REG_R2, TCG_REG_R2, arg1), - tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + tcg_opc_i21(s, TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0); + tcg_opc_b4(s, TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6); + tcg_flush_queue(s); +} + +static void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m1, TCGArg arg, + TCGArg arg1, tcg_target_long arg2) +{ + if (arg2 != 0) { + tcg_out_addi2(s, TCG_REG_R2, arg1, arg2); + arg1 = TCG_REG_R2; } + tcg_opc_m1(s, TCG_REG_P0, opc_m1, arg, arg1); } -static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, - TCGArg arg1, tcg_target_long arg2) +static void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, + TCGArg arg1, tcg_target_long arg2) { - if (arg2 == ((int16_t)arg2 >> 2) << 2) { - tcg_out_bundle(s, MmI, - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R2, arg2, arg1), - tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, - TCG_REG_R2, TCG_REG_R2, arg1), - tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + if (arg2 != 0) { + tcg_out_addi2(s, TCG_REG_R2, arg1, arg2); + arg1 = TCG_REG_R2; } + tcg_opc_m4(s, TCG_REG_P0, opc_m4, arg, arg1); } -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGArg arg, - TCGArg arg1, tcg_target_long arg2) +static void tcg_out_ld(TCGContext *s, TCGType type, TCGArg arg, + TCGArg arg1, tcg_target_long arg2) { if (type == TCG_TYPE_I32) { tcg_out_ld_rel(s, OPC_LD4_M1, arg, arg1, arg2); @@ -999,8 +1410,8 @@ static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGArg arg, } } -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGArg arg, - TCGArg arg1, tcg_target_long arg2) +static void tcg_out_st(TCGContext *s, TCGType type, TCGArg arg, + TCGArg arg1, tcg_target_long arg2) { if (type == TCG_TYPE_I32) { tcg_out_st_rel(s, OPC_ST4_M4, arg, arg1, arg2); @@ -1009,199 +1420,159 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGArg arg, } } -static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) +static void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3, + TCGArg ret, TCGArg arg1, int const_arg1, TCGArg arg2) { - uint64_t opc1, opc2; - - if (const_arg1 && arg1 != 0) { - opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, - TCG_REG_R2, arg1, TCG_REG_R0); - arg1 = TCG_REG_R2; + if (const_arg1) { + assert (arg1 == (int8_t)arg1); + tcg_opc_a3(s, TCG_REG_P0, opc_a3, ret, arg1, arg2); } else { - opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); + tcg_opc_a1(s, TCG_REG_P0, opc_a1, ret, arg1, arg2); } +} - if (const_arg2 && arg2 != 0) { - opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, - TCG_REG_R3, arg2, TCG_REG_R0); +static void tcg_out_and(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2 && arg2 != (int8_t)arg2) { + int64_t test = arg2; + int pos, len; + + /* Look for transitions between sequences of 0 and 1. + We want exactly 1 or 2 such transitions. */ + pos = __builtin_ctzll(~test); + test >>= pos; + + if (test == 0) { + len = 64 - pos; + tcg_opc_i14(s, TCG_REG_P0, OPC_DEP_I14, + ret, 0, arg1, 63 - pos, len - 1); + return; + } + + len = __builtin_ctzll(test); + test >>= len; + + if (test == -1) { + tcg_opc_i14(s, TCG_REG_P0, OPC_DEP_I14, + ret, 0, arg1, 63 - pos, len - 1); + return; + } + + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R3, arg2); arg2 = TCG_REG_R3; - } else { - opc2 = tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0); + const_arg2 = 0; } - tcg_out_bundle(s, mII, - opc1, - opc2, - tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2)); + tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, ret, arg2, const_arg2, arg1); } -static inline void tcg_out_eqv(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) +static inline void tcg_out_or(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2), - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); + /* ??? Do deposit -1. */ + tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, ret, arg2, const_arg2, arg1); } -static inline void tcg_out_nand(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) +static inline void tcg_out_xor(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2), - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); + tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, ret, arg2, const_arg2, arg1); } -static inline void tcg_out_nor(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) +static inline void tcg_out_andc(TCGContext *s, TCGArg ret, + TCGArg arg1, int const_arg1, TCGArg arg2) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2), - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); + tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, ret, arg1, const_arg1, arg2); } -static inline void tcg_out_orc(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) +static void tcg_out_mul(TCGContext *s, TCGArg ret, TCGArg arg1, TCGArg arg2) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2), - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2)); + tcg_opc_m18(s, TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1); + tcg_opc_m18(s, TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2); + tcg_add_stop(s); + tcg_opc_f2(s, TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6, + TCG_REG_F7, TCG_REG_F0); + tcg_add_stop(s); + tcg_opc_m19(s, TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6); } -static inline void tcg_out_mul(TCGContext *s, TCGArg ret, - TCGArg arg1, TCGArg arg2) +static inline void tcg_out_shladd(TCGContext *s, int qp, int ret, + int arg1, int shift, int arg2) { - tcg_out_bundle(s, mmI, - tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1), - tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - tcg_out_bundle(s, mmF, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6, - TCG_REG_F7, TCG_REG_F0)); - tcg_out_bundle(s, miI, - tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + tcg_opc_a2(s, qp, OPC_SHLADD_A2, ret, arg1, shift - 1, arg2); } -static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) +static inline void tcg_out_sar_i32(TCGContext *s, int qp, TCGArg ret, + TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, - ret, arg1, arg2, 31 - arg2)); + tcg_opc_i11(s, qp, OPC_EXTR_I11, ret, arg1, arg2, 31 - arg2); } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, - TCG_REG_R3, 0x1f, arg2), - tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, - TCG_REG_R2, TCG_REG_R3)); + tcg_opc_a3(s, qp, OPC_AND_A3, TCG_REG_R3, 0x1f, arg2); + tcg_opc_i29(s, qp, OPC_SXT4_I29, TCG_REG_R2, arg1); + tcg_opc_i5(s, qp, OPC_SHR_I5, ret, TCG_REG_R2, TCG_REG_R3); } } -static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) +static inline void tcg_out_sar_i64(TCGContext *s, int qp, TCGArg ret, + TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, - ret, arg1, arg2, 63 - arg2)); + tcg_opc_i11(s, qp, OPC_EXTR_I11, ret, arg1, arg2, 63 - arg2); } else { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2)); + tcg_opc_i5(s, qp, OPC_SHR_I5, ret, arg1, arg2); } } -static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) +static inline void tcg_out_shl_i32(TCGContext *s, int qp, TCGArg ret, + TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, - arg1, 63 - arg2, 31 - arg2)); + if (arg2 >= 1 && arg2 <= 4) { + tcg_out_shladd(s, qp, ret, arg1, arg2, TCG_REG_R0); + } else { + tcg_opc_i12(s, qp, OPC_DEP_Z_I12, ret, arg1, 63 - arg2, 31 - arg2); + } } else { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2, - 0x1f, arg2), - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, - arg1, TCG_REG_R2)); + tcg_opc_a3(s, qp, OPC_AND_A3, TCG_REG_R2, 0x1f, arg2); + tcg_opc_i7(s, qp, OPC_SHL_I7, ret, arg1, TCG_REG_R2); } } -static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) +static inline void tcg_out_shl_i64(TCGContext *s, int qp, TCGArg ret, + TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, - arg1, 63 - arg2, 63 - arg2)); + if (arg2 >= 1 && arg2 <= 4) { + tcg_out_shladd(s, qp, ret, arg1, arg2, TCG_REG_R0); + } else { + tcg_opc_i12(s, qp, OPC_DEP_Z_I12, ret, arg1, 63 - arg2, 63 - arg2); + } } else { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, - arg1, arg2)); + tcg_opc_i7(s, qp, OPC_SHL_I7, ret, arg1, arg2); } } -static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) +static inline void tcg_out_shr_i32(TCGContext *s, int qp, TCGArg ret, + TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - arg1, arg2, 31 - arg2)); + tcg_opc_i11(s, qp, OPC_EXTR_U_I11, ret, arg1, arg2, 31 - arg2); } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, - 0x1f, arg2), - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - TCG_REG_R2, TCG_REG_R3)); + tcg_opc_a3(s, qp, OPC_AND_A3, TCG_REG_R3, 0x1f, arg2); + tcg_opc_i29(s, qp, OPC_ZXT4_I29, TCG_REG_R2, arg1); + tcg_opc_i5(s, qp, OPC_SHR_U_I5, ret, TCG_REG_R2, TCG_REG_R3); } } -static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) +static inline void tcg_out_shr_i64(TCGContext *s, int qp, TCGArg ret, + TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - arg1, arg2, 63 - arg2)); + tcg_opc_i11(s, qp, OPC_EXTR_U_I11, ret, arg1, arg2, 63 - arg2); } else { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - arg1, arg2)); + tcg_opc_i5(s, qp, OPC_SHR_U_I5, ret, arg1, arg2); } } @@ -1209,25 +1580,14 @@ static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - TCG_REG_R2, 32 - arg2, 31)); + tcg_opc_i2(s, TCG_REG_P0, OPC_UNPACK4_L_I2, TCG_REG_R2, arg1, arg1); + tcg_opc_i11(s, TCG_REG_P0, OPC_EXTR_U_I11, + ret, TCG_REG_R2, 32 - arg2, 31); } else { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, - 0x1f, arg2)); - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3, - 0x20, TCG_REG_R3), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - TCG_REG_R2, TCG_REG_R3)); + tcg_opc_a3(s, TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, 0x1f, arg2); + tcg_opc_i2(s, TCG_REG_P0, OPC_UNPACK4_L_I2, TCG_REG_R2, arg1, arg1); + tcg_opc_a3(s, TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3, 0x20, TCG_REG_R3); + tcg_opc_i5(s, TCG_REG_P0, OPC_SHR_U_I5, ret, TCG_REG_R2, TCG_REG_R3); } } @@ -1235,24 +1595,12 @@ static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, - arg1, 0x40 - arg2)); + tcg_opc_i10(s, TCG_REG_P0, OPC_SHRP_I10, ret, arg1, arg1, 0x40 - arg2); } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, - 0x40, arg2), - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R3, - arg1, arg2), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2, - arg1, TCG_REG_R2)); - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, - TCG_REG_R2, TCG_REG_R3)); + tcg_opc_a3(s, TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, 0x40, arg2); + tcg_opc_i7(s, TCG_REG_P0, OPC_SHL_I7, TCG_REG_R3, arg1, arg2); + tcg_opc_i5(s, TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2, arg1, TCG_REG_R2); + tcg_opc_a1(s, TCG_REG_P0, OPC_OR_A1, ret, TCG_REG_R2, TCG_REG_R3); } } @@ -1260,20 +1608,12 @@ static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - TCG_REG_R2, arg2, 31)); + tcg_opc_i2(s, TCG_REG_P0, OPC_UNPACK4_L_I2, TCG_REG_R2, arg1, arg1); + tcg_opc_i11(s, TCG_REG_P0, OPC_EXTR_U_I11, ret, TCG_REG_R2, arg2, 31); } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, - 0x1f, arg2), - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - TCG_REG_R2, TCG_REG_R3)); + tcg_opc_a3(s, TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, 0x1f, arg2); + tcg_opc_i2(s, TCG_REG_P0, OPC_UNPACK4_L_I2, TCG_REG_R2, arg1, arg1); + tcg_opc_i5(s, TCG_REG_P0, OPC_SHR_U_I5, ret, TCG_REG_R2, TCG_REG_R3); } } @@ -1281,62 +1621,51 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, - arg1, arg2)); + tcg_opc_i10(s, TCG_REG_P0, OPC_SHRP_I10, ret, arg1, arg1, arg2); } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, - 0x40, arg2), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R3, - arg1, arg2), - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2, - arg1, TCG_REG_R2)); - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, - TCG_REG_R2, TCG_REG_R3)); + tcg_opc_a3(s, TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, 0x40, arg2); + tcg_opc_i5(s, TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R3, arg1, arg2); + tcg_opc_i7(s, TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2, arg1, TCG_REG_R2); + tcg_opc_a1(s, TCG_REG_P0, OPC_OR_A1, ret, TCG_REG_R2, TCG_REG_R3); } } static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29, TCGArg ret, TCGArg arg) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg)); + tcg_opc_i29(s, TCG_REG_P0, opc_i29, ret, arg); } -static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg) +static inline void tcg_out_bswap64(TCGContext *s, int qp, + TCGArg ret, TCGArg arg) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb)); + tcg_opc_i3(s, qp, OPC_MUX1_I3, ret, arg, 0xb); } -static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg) +static inline void tcg_out_bswap16(TCGContext *s, int qp, + TCGArg ret, TCGArg arg, bool sign) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb)); + tcg_out_bswap64(s, qp, ret, arg); + if (sign) { + tcg_out_sar_i64(s, qp, ret, ret, 48, 1); + } else { + tcg_out_shr_i64(s, qp, ret, ret, 48, 1); + } } -static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg) +static inline void tcg_out_bswap32(TCGContext *s, int qp, + TCGArg ret, TCGArg arg, bool sign) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb)); + tcg_out_bswap64(s, qp, ret, arg); + if (sign) { + tcg_out_sar_i64(s, qp, ret, ret, 32, 1); + } else { + tcg_out_shr_i64(s, qp, ret, ret, 32, 1); + } } -static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1, - TCGArg arg2, int cmp4) +static void tcg_opc_cmp_a(TCGContext *s, int qp, TCGCond cond, + TCGArg arg1, TCGArg arg2, int cmp4) { uint64_t opc_eq_a6, opc_lt_a6, opc_ltu_a6; @@ -1352,79 +1681,69 @@ static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1, switch (cond) { case TCG_COND_EQ: - return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + tcg_opc_a6(s, qp, opc_eq_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + break; case TCG_COND_NE: - return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + tcg_opc_a6(s, qp, opc_eq_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + break; case TCG_COND_LT: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + tcg_opc_a6(s, qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + break; case TCG_COND_LTU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + tcg_opc_a6(s, qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + break; case TCG_COND_GE: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + tcg_opc_a6(s, qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + break; case TCG_COND_GEU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + tcg_opc_a6(s, qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + break; case TCG_COND_LE: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); + tcg_opc_a6(s, qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); + break; case TCG_COND_LEU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); + tcg_opc_a6(s, qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); + break; case TCG_COND_GT: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); + tcg_opc_a6(s, qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); + break; case TCG_COND_GTU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); + tcg_opc_a6(s, qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); + break; default: tcg_abort(); break; } } -static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, - int const_arg1, TCGArg arg2, int const_arg2, - int label_index, int cmp4) +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, + int const_arg1, TCGArg arg2, int const_arg2, + int label_index, int cmp4) { - TCGLabel *l = &s->labels[label_index]; - uint64_t opc1, opc2; - if (const_arg1 && arg1 != 0) { - opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2, - arg1, TCG_REG_R0); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, arg1); arg1 = TCG_REG_R2; - } else { - opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); } - if (const_arg2 && arg2 != 0) { - opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3, - arg2, TCG_REG_R0); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R3, arg2); arg2 = TCG_REG_R3; - } else { - opc2 = tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0); } - tcg_out_bundle(s, mII, - opc1, - opc2, - tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4)); - tcg_out_bundle(s, mmB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_b1 (TCG_REG_P6, OPC_BR_DPTK_FEW_B1, - get_reloc_pcrel21b(s->code_ptr + 2))); - - if (l->has_value) { - reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value); - } else { - tcg_out_reloc(s, (s->code_ptr - 16) + 2, - R_IA64_PCREL21B, label_index, 0); - } + tcg_opc_cmp_a(s, TCG_REG_P0, cond, arg1, arg2, cmp4); + /* No stop bit required between compare and branch. */ + + tcg_opc_b1(s, TCG_REG_P6, OPC_BR_DPTK_FEW_B1, 0); + tcg_flush_queue(s); + tcg_out_reloc(s, (s->code_ptr - 16) + 2, R_IA64_PCREL21B, label_index, 0); } -static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg arg1, TCGArg arg2, int cmp4) +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, + TCGArg arg1, TCGArg arg2, int cmp4) { - tcg_out_bundle(s, MmI, - tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), - tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, 1, TCG_REG_R0), - tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0)); + tcg_opc_cmp_a(s, TCG_REG_P0, cond, arg1, arg2, cmp4); + tcg_add_stop(s); + tcg_opc_a5(s, TCG_REG_P6, OPC_ADDL_A5, ret, 1, TCG_REG_R0); + tcg_opc_a5(s, TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0); } #if defined(CONFIG_SOFTMMU) @@ -1432,62 +1751,75 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, #include "../../softmmu_defs.h" /* Load and compare a TLB entry, and return the result in (p6, p7). - R2 is loaded with the address of the addend TLB entry. - R56 is loaded with the address, zero extented on 32-bit targets. */ -static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, - int s_bits, uint64_t offset_rw, - uint64_t offset_addend) -{ - tcg_out_bundle(s, mII, - tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3, - TARGET_PAGE_MASK | ((1 << s_bits) - 1), - TCG_REG_R0), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2, - addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2, - TCG_REG_R2, 63 - CPU_TLB_ENTRY_BITS, - 63 - CPU_TLB_ENTRY_BITS)); - tcg_out_bundle(s, mII, - tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2, - offset_rw, TCG_REG_R2), -#if TARGET_LONG_BITS == 32 - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R56, addr_reg), -#else - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R56, - 0, addr_reg), -#endif - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_AREG0)); - tcg_out_bundle(s, mII, - tcg_opc_m3 (TCG_REG_P0, - (TARGET_LONG_BITS == 32 - ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R57, - TCG_REG_R2, offset_addend - offset_rw), - tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R56), - tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, - TCG_REG_P7, TCG_REG_R3, TCG_REG_R57)); -} - -static void *qemu_ld_helpers[4] = { + R56 is loaded with the address, zero extented on 32-bit targets. + R58 is loaded with the addend TLB entry. */ +static void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, + int s_bits, uint64_t offset_rw, + uint64_t offset_addend) +{ + /* We're going to split the shift into two parts so that we can + combine it with two additions via shladd. This will reduce + the critical path by 1 cycle. */ + assert (CPU_TLB_ENTRY_BITS <= 6); + assert ((offset_rw & 3) == 0); + + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R3, offset_rw / 4); + tcg_opc_i11(s, TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2, + addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1); + tcg_out_shladd(s, TCG_REG_P0, TCG_REG_R2, TCG_REG_R2, + CPU_TLB_ENTRY_BITS - 2, TCG_REG_R3); + + tcg_out_shladd(s, TCG_REG_P0, TCG_REG_R2, TCG_REG_R2, 2, TCG_AREG0); + + tcg_opc_i14(s, TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0, addr_reg, + 63 - s_bits, TARGET_PAGE_BITS - s_bits - 1); + + tcg_opc_m3(s, TCG_REG_P0, + (TARGET_LONG_BITS == 32 ? OPC_LD4_M3 : OPC_LD8_M3), + TCG_REG_R57, TCG_REG_R2, offset_addend - offset_rw); + + if (TARGET_LONG_BITS == 32) { + tcg_opc_i29(s, TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R56, addr_reg); + } else { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R56, addr_reg); + } + + tcg_opc_m1(s, TCG_REG_P0, OPC_LD8_M1, TCG_REG_R58, TCG_REG_R2); + + tcg_opc_a6(s, TCG_REG_P0, + (TARGET_LONG_BITS == 32 ? OPC_CMP4_EQ_A6 : OPC_CMP_EQ_A6), + TCG_REG_P6, TCG_REG_P7, TCG_REG_R3, TCG_REG_R57); + tcg_add_stop(s); +} + +static void * const qemu_ld_helpers[4] = { __ldb_mmu, __ldw_mmu, __ldl_mmu, __ldq_mmu, }; -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { + static uint64_t const opc_ld_m1[4] = { + OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 + }; + static uint64_t const opc_ext_i29[8] = { + OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0, + OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 + }; int addr_reg, data_reg, mem_index, s_bits, bswap; - uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; - uint64_t opc_ext_i29[8] = { OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0, - OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 }; + tcg_target_long pc, gp; data_reg = *args++; addr_reg = *args++; mem_index = *args; s_bits = opc & 3; + /* Extract the absolute addresses from the descriptor. */ + pc = ((tcg_target_long *)qemu_ld_helpers[s_bits])[0]; + gp = ((tcg_target_long *)qemu_ld_helpers[s_bits])[1]; + #ifdef TARGET_WORDS_BIGENDIAN bswap = 1; #else @@ -1500,86 +1832,62 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) offsetof(CPUState, tlb_table[mem_index][0].addend)); /* P6 is the fast path, and P7 the slow path */ - tcg_out_bundle(s, mLX, - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R57, - mem_index, TCG_REG_R0), - tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, - (tcg_target_long) qemu_ld_helpers[s_bits])); - tcg_out_bundle(s, MmI, - tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, - TCG_REG_R2, 8), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R56), - tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R3, 0)); - if (bswap && s_bits == 1) { - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R8, TCG_REG_R8, 15, 15)); - } else if (bswap && s_bits == 2) { - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R8, TCG_REG_R8, 31, 31)); - } else { - tcg_out_bundle(s, mmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - } - if (!bswap || s_bits == 0) { - tcg_out_bundle(s, miB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); - } else { - tcg_out_bundle(s, miB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, - TCG_REG_R8, TCG_REG_R8, 0xb), - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); + tcg_opc_a1(s, TCG_REG_P6, OPC_ADD_A1, + TCG_REG_R3, TCG_REG_R58, TCG_REG_R56); + tcg_opc_l2(s, gp); + tcg_opc_x2(s, TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp); + + tcg_opc_m1(s, TCG_REG_P6, opc_ld_m1[s_bits], TCG_REG_R8, TCG_REG_R3); + tcg_opc_a5(s, TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R57, mem_index, TCG_REG_R0); + + if (bswap) { + if (s_bits == 1) { + tcg_out_bswap16(s, TCG_REG_P6, TCG_REG_R8, TCG_REG_R8, 0); + } else if (s_bits == 2) { + tcg_out_bswap32(s, TCG_REG_P6, TCG_REG_R8, TCG_REG_R8, 0); + } else if (s_bits == 3) { + tcg_out_bswap64(s, TCG_REG_P6, TCG_REG_R8, TCG_REG_R8); + } } + tcg_opc_l3(s, 0); + tcg_opc_x4(s, TCG_REG_P7, OPC_BRL_CALL_SPTK_MANY_X4, TCG_REG_B0, 0); + tcg_flush_queue(s); + reloc_pcrel60b (s->code_ptr - 16, pc); + + /* Note that this cleanup is required for the slow path, and since + we've clobbered p7 by now, we must execute it unconditionally. + Which means the fast path above must place its results in R8. */ if (opc == 3) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, - data_reg, 0, TCG_REG_R8)); + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R8); } else { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc], - data_reg, TCG_REG_R8)); + tcg_out_ext(s, opc_ext_i29[opc], data_reg, TCG_REG_R8); } } -static void *qemu_st_helpers[4] = { +static void * const qemu_st_helpers[4] = { __stb_mmu, __stw_mmu, __stl_mmu, __stq_mmu, }; -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { + static uint64_t const opc_st_m4[4] = { + OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 + }; int addr_reg, data_reg, mem_index, bswap; - uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 }; + tcg_target_long pc, gp; data_reg = *args++; addr_reg = *args++; mem_index = *args; + /* Extract the absolute addresses from the descriptor. */ + pc = ((tcg_target_long *)qemu_st_helpers[opc])[0]; + gp = ((tcg_target_long *)qemu_st_helpers[opc])[1]; + #ifdef TARGET_WORDS_BIGENDIAN bswap = 1; #else @@ -1591,66 +1899,37 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) offsetof(CPUState, tlb_table[mem_index][0].addend)); /* P6 is the fast path, and P7 the slow path */ - tcg_out_bundle(s, mLX, - tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R57, - 0, data_reg), - tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, - (tcg_target_long) qemu_st_helpers[opc])); - tcg_out_bundle(s, MmI, - tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, - TCG_REG_R2, 8), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R56), - tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R3, 0)); - - if (!bswap || opc == 0) { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - } else if (opc == 1) { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 15, 15), - tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, - TCG_REG_R2, TCG_REG_R2, 0xb)); - data_reg = TCG_REG_R2; - } else if (opc == 2) { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 31, 31), - tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, - TCG_REG_R2, TCG_REG_R2, 0xb)); - data_reg = TCG_REG_R2; - } else if (opc == 3) { - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, - TCG_REG_R2, data_reg, 0xb)); - data_reg = TCG_REG_R2; + tcg_opc_a1(s, TCG_REG_P6, OPC_ADD_A1, + TCG_REG_R3, TCG_REG_R58, TCG_REG_R56); + tcg_opc_l2(s, gp); + tcg_opc_x2(s, TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp); + + tcg_opc_a4(s, TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R57, 0, data_reg); + tcg_opc_a5(s, TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58, mem_index, TCG_REG_R0); + + if (bswap) { + if (opc == 1) { + tcg_out_bswap16(s, TCG_REG_P6, TCG_REG_R2, data_reg, 0); + data_reg = TCG_REG_R2; + } else if (opc == 2) { + tcg_out_bswap32(s, TCG_REG_P6, TCG_REG_R2, data_reg, 0); + data_reg = TCG_REG_R2; + } else if (opc == 3) { + tcg_out_bswap64(s, TCG_REG_P6, TCG_REG_R2, data_reg); + data_reg = TCG_REG_R2; + } } - tcg_out_bundle(s, miB, - tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc], - data_reg, TCG_REG_R3), - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58, - mem_index, TCG_REG_R0), - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); + tcg_opc_m4(s, TCG_REG_P6, opc_st_m4[opc], data_reg, TCG_REG_R3); + tcg_opc_l3(s, 0); + tcg_opc_x4(s, TCG_REG_P7, OPC_BRL_CALL_SPTK_MANY_X4, TCG_REG_B0, 0); + tcg_flush_queue(s); + reloc_pcrel60b (s->code_ptr - 16, pc); } #else /* !CONFIG_SOFTMMU */ -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { static uint64_t const opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 @@ -1670,131 +1949,37 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) bswap = 0; #endif -#if TARGET_LONG_BITS == 32 + if (TARGET_LONG_BITS == 32) { + tcg_opc_i29(s, TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, addr_reg); + addr_reg = TCG_REG_R3; + } if (GUEST_BASE != 0) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R3, addr_reg), - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, TCG_REG_R3)); - } else { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R2, addr_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + tcg_opc_a1(s, TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, addr_reg); + addr_reg = TCG_REG_R2; } - if (!bswap || s_bits == 0) { - if (s_bits == opc) { - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits], - data_reg, data_reg)); - } - } else if (s_bits == 3) { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb)); - } else { + tcg_opc_m1(s, TCG_REG_P0, opc_ld_m1[s_bits], data_reg, addr_reg); + + if (bswap) { if (s_bits == 1) { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 15, 15)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 31, 31)); - } - if (opc == s_bits) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb), - tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits], - data_reg, data_reg)); + tcg_out_bswap16(s, TCG_REG_P0, data_reg, data_reg, s_bits != opc); + } else if (s_bits == 2) { + tcg_out_bswap32(s, TCG_REG_P0, data_reg, data_reg, s_bits != opc); + } else if (s_bits == 3) { + tcg_out_bswap64(s, TCG_REG_P0, data_reg, data_reg); } + } else if (s_bits != opc) { + tcg_opc_i29(s, TCG_REG_P0, opc_sxt_i29[s_bits], data_reg, data_reg); } -#else - if (GUEST_BASE != 0) { - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, addr_reg), - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - } else { - tcg_out_bundle(s, mmI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, addr_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - } - - if (bswap && s_bits == 1) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 15, 15), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb)); - } else if (bswap && s_bits == 2) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 31, 31), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb)); - } else if (bswap && s_bits == 3) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - data_reg, data_reg, 0xb)); - } - if (s_bits != opc) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits], - data_reg, data_reg)); - } -#endif } -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { static uint64_t const opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 }; int addr_reg, data_reg, bswap; -#if TARGET_LONG_BITS == 64 - uint64_t add_guest_base; -#endif data_reg = *args++; addr_reg = *args++; @@ -1805,312 +1990,243 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) bswap = 0; #endif -#if TARGET_LONG_BITS == 32 - if (GUEST_BASE != 0) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R3, addr_reg), - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, TCG_REG_R3)); - } else { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R2, addr_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - } - - if (bswap) { - if (opc == 1) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 15, 15), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, TCG_REG_R3, 0xb)); - data_reg = TCG_REG_R3; - } else if (opc == 2) { - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 31, 31), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, TCG_REG_R3, 0xb)); - data_reg = TCG_REG_R3; - } else if (opc == 3) { - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, data_reg, 0xb)); - data_reg = TCG_REG_R3; - } + if (TARGET_LONG_BITS == 32) { + tcg_opc_i29(s, TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, addr_reg); + addr_reg = TCG_REG_R2; } - tcg_out_bundle(s, mmI, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc], - data_reg, TCG_REG_R2), - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); -#else if (GUEST_BASE != 0) { - add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, addr_reg); + tcg_opc_a1(s, TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, addr_reg); addr_reg = TCG_REG_R2; - } else { - add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); } - if (!bswap || opc == 0) { - tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI), - add_guest_base, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc], - data_reg, addr_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); - } else { + if (bswap) { if (opc == 1) { - tcg_out_bundle(s, mII, - add_guest_base, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 15, 15), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, TCG_REG_R3, 0xb)); + tcg_out_bswap16(s, TCG_REG_P0, TCG_REG_R3, data_reg, 0); data_reg = TCG_REG_R3; } else if (opc == 2) { - tcg_out_bundle(s, mII, - add_guest_base, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 31, 31), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, TCG_REG_R3, 0xb)); + tcg_out_bswap32(s, TCG_REG_P0, TCG_REG_R3, data_reg, 0); data_reg = TCG_REG_R3; } else if (opc == 3) { - tcg_out_bundle(s, miI, - add_guest_base, - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, - TCG_REG_R3, data_reg, 0xb)); + tcg_out_bswap64(s, TCG_REG_P0, TCG_REG_R3, data_reg); data_reg = TCG_REG_R3; } - tcg_out_bundle(s, miI, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc], - data_reg, addr_reg), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); } -#endif + tcg_opc_m4(s, TCG_REG_P0, opc_st_m4[opc], data_reg, addr_reg); } #endif -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) { + TCGArg arg0, arg1, arg2; + + arg0 = args[0]; + arg1 = args[1]; + arg2 = args[2]; + switch(opc) { case INDEX_op_exit_tb: - tcg_out_exit_tb(s, args[0]); + tcg_out_exit_tb(s, arg0); break; case INDEX_op_br: - tcg_out_br(s, args[0]); + tcg_out_br(s, arg0); break; case INDEX_op_call: - tcg_out_call(s, args[0]); + tcg_out_call(s, arg0, const_args[0]); break; case INDEX_op_goto_tb: - tcg_out_goto_tb(s, args[0]); + tcg_out_goto_tb(s, arg0); break; case INDEX_op_jmp: - tcg_out_jmp(s, args[0]); + tcg_out_jmp(s, arg0); break; case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); + tcg_out_movi(s, TCG_TYPE_I32, arg0, arg1); break; case INDEX_op_movi_i64: - tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); + tcg_out_movi(s, TCG_TYPE_I64, arg0, arg1); break; case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: - tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); + tcg_out_ld_rel(s, OPC_LD1_M1, arg0, arg1, arg2); break; case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: - tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); - tcg_out_ext(s, OPC_SXT1_I29, args[0], args[0]); + tcg_out_ld_rel(s, OPC_LD1_M1, arg0, arg1, arg2); + tcg_out_ext(s, OPC_SXT1_I29, arg0, arg0); break; case INDEX_op_ld16u_i32: case INDEX_op_ld16u_i64: - tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); + tcg_out_ld_rel(s, OPC_LD2_M1, arg0, arg1, arg2); break; case INDEX_op_ld16s_i32: case INDEX_op_ld16s_i64: - tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); - tcg_out_ext(s, OPC_SXT2_I29, args[0], args[0]); + tcg_out_ld_rel(s, OPC_LD2_M1, arg0, arg1, arg2); + tcg_out_ext(s, OPC_SXT2_I29, arg0, arg0); break; case INDEX_op_ld_i32: case INDEX_op_ld32u_i64: - tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); + tcg_out_ld_rel(s, OPC_LD4_M1, arg0, arg1, arg2); break; case INDEX_op_ld32s_i64: - tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); - tcg_out_ext(s, OPC_SXT4_I29, args[0], args[0]); + tcg_out_ld_rel(s, OPC_LD4_M1, arg0, arg1, arg2); + tcg_out_ext(s, OPC_SXT4_I29, arg0, arg0); break; case INDEX_op_ld_i64: - tcg_out_ld_rel(s, OPC_LD8_M1, args[0], args[1], args[2]); + tcg_out_ld_rel(s, OPC_LD8_M1, arg0, arg1, arg2); break; case INDEX_op_st8_i32: case INDEX_op_st8_i64: - tcg_out_st_rel(s, OPC_ST1_M4, args[0], args[1], args[2]); + tcg_out_st_rel(s, OPC_ST1_M4, arg0, arg1, arg2); break; case INDEX_op_st16_i32: case INDEX_op_st16_i64: - tcg_out_st_rel(s, OPC_ST2_M4, args[0], args[1], args[2]); + tcg_out_st_rel(s, OPC_ST2_M4, arg0, arg1, arg2); break; case INDEX_op_st_i32: case INDEX_op_st32_i64: - tcg_out_st_rel(s, OPC_ST4_M4, args[0], args[1], args[2]); + tcg_out_st_rel(s, OPC_ST4_M4, arg0, arg1, arg2); break; case INDEX_op_st_i64: - tcg_out_st_rel(s, OPC_ST8_M4, args[0], args[1], args[2]); + tcg_out_st_rel(s, OPC_ST8_M4, arg0, arg1, arg2); break; case INDEX_op_add_i32: case INDEX_op_add_i64: - tcg_out_alu(s, OPC_ADD_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); + if (const_args[2]) { + tcg_out_addi2(s, arg0, arg1, arg2); + } else { + tcg_opc_a1(s, TCG_REG_P0, OPC_ADD_A1, arg0, arg1, arg2); + } break; case INDEX_op_sub_i32: case INDEX_op_sub_i64: - tcg_out_alu(s, OPC_SUB_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_I64, arg0, arg1 - arg2); + } else { + tcg_opc_a3(s, TCG_REG_P0, OPC_SUB_A3, arg0, arg1, arg2); + } + } else if (const_args[2]) { + tcg_out_addi2(s, arg0, arg1, -arg2); + } else { + tcg_opc_a1(s, TCG_REG_P0, OPC_SUB_A1, arg0, arg1, arg2); + } break; case INDEX_op_and_i32: case INDEX_op_and_i64: - tcg_out_alu(s, OPC_AND_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); + tcg_out_and(s, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_andc_i32: case INDEX_op_andc_i64: - tcg_out_alu(s, OPC_ANDCM_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - tcg_out_eqv(s, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - tcg_out_nand(s, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - tcg_out_nor(s, args[0], args[1], const_args[1], - args[2], const_args[2]); + if (const_args[2]) { + if (const_args[1]) { + tcg_out_movi(s, TCG_TYPE_I64, arg0, arg1 & ~arg2); + } else { + tcg_out_and(s, arg0, arg1, ~arg2, 1); + } + } else { + tcg_out_andc(s, arg0, arg1, const_args[1], arg2); + } break; case INDEX_op_or_i32: case INDEX_op_or_i64: - tcg_out_alu(s, OPC_OR_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - tcg_out_orc(s, args[0], args[1], const_args[1], - args[2], const_args[2]); + tcg_out_or(s, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_xor_i32: case INDEX_op_xor_i64: - tcg_out_alu(s, OPC_XOR_A1, args[0], args[1], const_args[1], - args[2], const_args[2]); + tcg_out_xor(s, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_mul_i32: case INDEX_op_mul_i64: - tcg_out_mul(s, args[0], args[1], args[2]); + tcg_out_mul(s, arg0, arg1, arg2); break; case INDEX_op_sar_i32: - tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]); + tcg_out_sar_i32(s, TCG_REG_P0, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_sar_i64: - tcg_out_sar_i64(s, args[0], args[1], args[2], const_args[2]); + tcg_out_sar_i64(s, TCG_REG_P0, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_shl_i32: - tcg_out_shl_i32(s, args[0], args[1], args[2], const_args[2]); + tcg_out_shl_i32(s, TCG_REG_P0, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_shl_i64: - tcg_out_shl_i64(s, args[0], args[1], args[2], const_args[2]); + tcg_out_shl_i64(s, TCG_REG_P0, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_shr_i32: - tcg_out_shr_i32(s, args[0], args[1], args[2], const_args[2]); + tcg_out_shr_i32(s, TCG_REG_P0, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_shr_i64: - tcg_out_shr_i64(s, args[0], args[1], args[2], const_args[2]); + tcg_out_shr_i64(s, TCG_REG_P0, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_rotl_i32: - tcg_out_rotl_i32(s, args[0], args[1], args[2], const_args[2]); + tcg_out_rotl_i32(s, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_rotl_i64: - tcg_out_rotl_i64(s, args[0], args[1], args[2], const_args[2]); + tcg_out_rotl_i64(s, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_rotr_i32: - tcg_out_rotr_i32(s, args[0], args[1], args[2], const_args[2]); + tcg_out_rotr_i32(s, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_rotr_i64: - tcg_out_rotr_i64(s, args[0], args[1], args[2], const_args[2]); + tcg_out_rotr_i64(s, arg0, arg1, arg2, const_args[2]); break; case INDEX_op_ext8s_i32: case INDEX_op_ext8s_i64: - tcg_out_ext(s, OPC_SXT1_I29, args[0], args[1]); + tcg_out_ext(s, OPC_SXT1_I29, arg0, arg1); break; case INDEX_op_ext8u_i32: case INDEX_op_ext8u_i64: - tcg_out_ext(s, OPC_ZXT1_I29, args[0], args[1]); + tcg_out_ext(s, OPC_ZXT1_I29, arg0, arg1); break; case INDEX_op_ext16s_i32: case INDEX_op_ext16s_i64: - tcg_out_ext(s, OPC_SXT2_I29, args[0], args[1]); + tcg_out_ext(s, OPC_SXT2_I29, arg0, arg1); break; case INDEX_op_ext16u_i32: case INDEX_op_ext16u_i64: - tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]); + tcg_out_ext(s, OPC_ZXT2_I29, arg0, arg1); break; case INDEX_op_ext32s_i64: - tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]); + tcg_out_ext(s, OPC_SXT4_I29, arg0, arg1); break; case INDEX_op_ext32u_i64: - tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]); + tcg_out_ext(s, OPC_ZXT4_I29, arg0, arg1); break; case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: - tcg_out_bswap16(s, args[0], args[1]); + tcg_out_bswap16(s, TCG_REG_P0, arg0, arg1, 0); break; case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: - tcg_out_bswap32(s, args[0], args[1]); + tcg_out_bswap32(s, TCG_REG_P0, arg0, arg1, 0); break; case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, args[0], args[1]); + tcg_out_bswap64(s, TCG_REG_P0, arg0, arg1); break; case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], const_args[0], - args[1], const_args[1], args[3], 1); + tcg_out_brcond(s, arg2, arg0, const_args[0], + arg1, const_args[1], args[3], 1); break; case INDEX_op_brcond_i64: - tcg_out_brcond(s, args[2], args[0], const_args[0], - args[1], const_args[1], args[3], 0); + tcg_out_brcond(s, arg2, arg0, const_args[0], + arg1, const_args[1], args[3], 0); break; case INDEX_op_setcond_i32: - tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1); + tcg_out_setcond(s, args[3], arg0, arg1, arg2, 1); break; case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0); + tcg_out_setcond(s, args[3], arg0, arg1, arg2, 0); break; case INDEX_op_qemu_ld8u: @@ -2156,13 +2272,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_br, { } }, - { INDEX_op_call, { "r" } }, + { INDEX_op_call, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_jmp, { "r" } }, { INDEX_op_mov_i32, { "r", "r" } }, + { INDEX_op_mov_i64, { "r", "r" } }, { INDEX_op_movi_i32, { "r" } }, + { INDEX_op_movi_i64, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, @@ -2173,40 +2291,6 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_st16_i32, { "rZ", "r" } }, { INDEX_op_st_i32, { "rZ", "r" } }, - { INDEX_op_add_i32, { "r", "rI", "rI" } }, - { INDEX_op_sub_i32, { "r", "rI", "rI" } }, - - { INDEX_op_and_i32, { "r", "rI", "rI" } }, - { INDEX_op_andc_i32, { "r", "rI", "rI" } }, - { INDEX_op_eqv_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_nand_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_or_i32, { "r", "rI", "rI" } }, - { INDEX_op_orc_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_xor_i32, { "r", "rI", "rI" } }, - - { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, - - { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, - { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, - { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, - { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, - { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, - - { INDEX_op_ext8s_i32, { "r", "rZ"} }, - { INDEX_op_ext8u_i32, { "r", "rZ"} }, - { INDEX_op_ext16s_i32, { "r", "rZ"} }, - { INDEX_op_ext16u_i32, { "r", "rZ"} }, - - { INDEX_op_bswap16_i32, { "r", "rZ" } }, - { INDEX_op_bswap32_i32, { "r", "rZ" } }, - - { INDEX_op_brcond_i32, { "rI", "rI" } }, - { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, - - { INDEX_op_mov_i64, { "r", "r" } }, - { INDEX_op_movi_i64, { "r" } }, - { INDEX_op_ld8u_i64, { "r", "r" } }, { INDEX_op_ld8s_i64, { "r", "r" } }, { INDEX_op_ld16u_i64, { "r", "r" } }, @@ -2219,26 +2303,39 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_st32_i64, { "rZ", "r" } }, { INDEX_op_st_i64, { "rZ", "r" } }, - { INDEX_op_add_i64, { "r", "rI", "rI" } }, - { INDEX_op_sub_i64, { "r", "rI", "rI" } }, + { INDEX_op_add_i32, { "r", "rZ", "ri" } }, + { INDEX_op_add_i64, { "r", "rZ", "ri" } }, + { INDEX_op_sub_i32, { "r", "rJ", "ri" } }, + { INDEX_op_sub_i64, { "r", "rJ", "ri" } }, - { INDEX_op_and_i64, { "r", "rI", "rI" } }, - { INDEX_op_andc_i64, { "r", "rI", "rI" } }, - { INDEX_op_eqv_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_nand_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_nor_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_or_i64, { "r", "rI", "rI" } }, - { INDEX_op_orc_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_xor_i64, { "r", "rI", "rI" } }, + { INDEX_op_and_i32, { "r", "rZ", "ri" } }, + { INDEX_op_and_i64, { "r", "rZ", "ri" } }, + { INDEX_op_andc_i32, { "r", "rJ", "ri" } }, + { INDEX_op_andc_i64, { "r", "rJ", "ri" } }, + { INDEX_op_or_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_or_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_xor_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_xor_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, { INDEX_op_mul_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, { INDEX_op_sar_i64, { "r", "rZ", "ri" } }, + { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, { INDEX_op_shl_i64, { "r", "rZ", "ri" } }, + { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, { INDEX_op_shr_i64, { "r", "rZ", "ri" } }, + { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, { INDEX_op_rotl_i64, { "r", "rZ", "ri" } }, + { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, { INDEX_op_rotr_i64, { "r", "rZ", "ri" } }, + { INDEX_op_ext8s_i32, { "r", "rZ"} }, + { INDEX_op_ext8u_i32, { "r", "rZ"} }, + { INDEX_op_ext16s_i32, { "r", "rZ"} }, + { INDEX_op_ext16u_i32, { "r", "rZ"} }, + { INDEX_op_ext8s_i64, { "r", "rZ"} }, { INDEX_op_ext8u_i64, { "r", "rZ"} }, { INDEX_op_ext16s_i64, { "r", "rZ"} }, @@ -2246,11 +2343,16 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_ext32s_i64, { "r", "rZ"} }, { INDEX_op_ext32u_i64, { "r", "rZ"} }, + { INDEX_op_bswap16_i32, { "r", "rZ" } }, + { INDEX_op_bswap32_i32, { "r", "rZ" } }, + { INDEX_op_bswap16_i64, { "r", "rZ" } }, { INDEX_op_bswap32_i64, { "r", "rZ" } }, { INDEX_op_bswap64_i64, { "r", "rZ" } }, + { INDEX_op_brcond_i32, { "rI", "rI" } }, { INDEX_op_brcond_i64, { "rI", "rI" } }, + { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } }, { INDEX_op_qemu_ld8u, { "r", "r" } }, @@ -2284,46 +2386,31 @@ static void tcg_target_qemu_prologue(TCGContext *s) *(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */ s->code_ptr += 16; /* skip GP */ + s->extra = qemu_mallocz(sizeof(TCGContextExtra)); + /* prologue */ - tcg_out_bundle(s, mII, - tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34, - TCG_REG_R33, 32, 24, 0), - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, - TCG_REG_B6, TCG_REG_R32, 0), - tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, - TCG_REG_R32, TCG_REG_B0)); - - /* ??? If GUEST_BASE < 0x200000, we could load the register via - an ADDL in the M slot of the next bundle. */ + tcg_opc_m34(s, TCG_REG_P0, OPC_ALLOC_M34, TCG_REG_R33, 32, 24, 0); + tcg_opc_i21(s, TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R32, 0); + tcg_opc_i22(s, TCG_REG_P0, OPC_MOV_I22, TCG_REG_R32, TCG_REG_B0); + if (GUEST_BASE != 0) { - tcg_out_bundle(s, mlx, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_l2 (GUEST_BASE), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, - TCG_GUEST_BASE_REG, GUEST_BASE)); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } - tcg_out_bundle(s, miB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R12, -frame_size, TCG_REG_R12), - tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); + tcg_out_addi(s, TCG_REG_R12, -frame_size); + tcg_opc_b4(s, TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6); + tcg_flush_queue(s); /* epilogue */ tb_ret_addr = s->code_ptr; - tcg_out_bundle(s, miI, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, - TCG_REG_B0, TCG_REG_R32, 0), - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R12, frame_size, TCG_REG_R12)); - tcg_out_bundle(s, miB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26, - TCG_REG_PFS, TCG_REG_R33), - tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4, - TCG_REG_B0)); + + tcg_out_addi(s, TCG_REG_R12, frame_size); + tcg_opc_i21(s, TCG_REG_P0, OPC_MOV_I21, TCG_REG_B0, TCG_REG_R32, 0); + tcg_opc_i26(s, TCG_REG_P0, OPC_MOV_I_I26, TCG_REG_PFS, TCG_REG_R33); + tcg_opc_b4(s, TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4, TCG_REG_B0); + + tcg_flush_queue(s); } static void tcg_target_init(TCGContext *s) diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index e56e88f..ab5b944 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -96,7 +96,8 @@ enum { }; #define TCG_CT_CONST_ZERO 0x100 -#define TCG_CT_CONST_S22 0x200 +#define TCG_CT_CONST_S22 0x200 +#define TCG_CT_CONST_S8 0x400 /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_R12 @@ -111,8 +112,6 @@ enum { #define TCG_TARGET_HAS_bswap32_i32 #define TCG_TARGET_HAS_bswap32_i64 #define TCG_TARGET_HAS_bswap64_i64 -#define TCG_TARGET_HAS_eqv_i32 -#define TCG_TARGET_HAS_eqv_i64 #define TCG_TARGET_HAS_ext8s_i32 #define TCG_TARGET_HAS_ext16s_i32 #define TCG_TARGET_HAS_ext8s_i64 @@ -123,12 +122,6 @@ enum { #define TCG_TARGET_HAS_ext8u_i64 #define TCG_TARGET_HAS_ext16u_i64 #define TCG_TARGET_HAS_ext32u_i64 -#define TCG_TARGET_HAS_nand_i32 -#define TCG_TARGET_HAS_nand_i64 -#define TCG_TARGET_HAS_nor_i32 -#define TCG_TARGET_HAS_nor_i64 -#define TCG_TARGET_HAS_orc_i32 -#define TCG_TARGET_HAS_orc_i64 #define TCG_TARGET_HAS_rot_i32 #define TCG_TARGET_HAS_rot_i64 @@ -138,12 +131,26 @@ enum { #undef TCG_TARGET_HAS_not_i32 /* xor r1, -1, r3 */ #undef TCG_TARGET_HAS_not_i64 /* xor r1, -1, r3 */ +/* optional instructions not implemented */ +#undef TCG_TARGET_HAS_eqv_i32 +#undef TCG_TARGET_HAS_eqv_i64 +#undef TCG_TARGET_HAS_nand_i32 +#undef TCG_TARGET_HAS_nand_i64 +#undef TCG_TARGET_HAS_nor_i32 +#undef TCG_TARGET_HAS_nor_i64 +#undef TCG_TARGET_HAS_orc_i32 +#undef TCG_TARGET_HAS_orc_i64 + /* Note: must be synced with dyngen-exec.h */ #define TCG_AREG0 TCG_REG_R7 /* Guest base is supported */ #define TCG_TARGET_HAS_GUEST_BASE +/* We need to store extra info in the tcg struct, and need to have the + instruction queue flushed. */ +#define TCG_TARGET_HAS_EXTRAINFO + static inline void flush_icache_range(unsigned long start, unsigned long stop) { start = start & ~(32UL - 1UL); diff --git a/tcg/tcg.c b/tcg/tcg.c index 5dd6a2c..2a8a9a1 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2060,6 +2060,9 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, break; case INDEX_op_set_label: tcg_reg_alloc_bb_end(s, s->reserved_regs); +#ifdef TCG_TARGET_HAS_EXTRAINFO + tcg_flush_queue(s); +#endif tcg_out_label(s, args[0], (long)s->code_ptr); break; case INDEX_op_call: diff --git a/tcg/tcg.h b/tcg/tcg.h index e1afde2..ac372f0 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -266,6 +266,10 @@ typedef struct TCGHelperInfo { const char *name; } TCGHelperInfo; +#ifdef TCG_TARGET_HAS_EXTRAINFO +typedef struct TCGContextExtra TCGContextExtra; +#endif + typedef struct TCGContext TCGContext; struct TCGContext { @@ -306,6 +310,11 @@ struct TCGContext { int allocated_helpers; int helpers_sorted; +#ifdef TCG_TARGET_HAS_EXTRAINFO + /* For tcg-target.c to store extra information. */ + TCGContextExtra *extra; +#endif + #ifdef CONFIG_PROFILER /* profiling info */ int64_t tb_count1;