/* [<][>][^][v][top][bottom][index][help] */
/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is [Open Source Virtual Machine].
*
* The Initial Developer of the Original Code is
* Adobe System Incorporated.
* Portions created by the Initial Developer are Copyright (C) 2008
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Adobe AS3 Team
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef __nanojit_NativeX64__
#define __nanojit_NativeX64__
#ifndef NANOJIT_64BIT
#error "NANOJIT_64BIT must be defined for X64 backend"
#endif
#ifdef PERFM
#define DOPROF
#include "../vprof/vprof.h"
#define count_instr() _nvprof("x64",1)
#define count_prolog() _nvprof("x64-prolog",1); count_instr();
#define count_imt() _nvprof("x64-imt",1) count_instr()
#else
#define count_instr()
#define count_prolog()
#define count_imt()
#endif
namespace nanojit
{
#define NJ_MAX_STACK_ENTRY 256
#define NJ_ALIGN_STACK 16
#define NJ_JTBL_SUPPORTED 1
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
enum Register {
RAX = 0, // 1st int return, # of sse varargs
RCX = 1, // 4th int arg
RDX = 2, // 3rd int arg 2nd return
RBX = 3, // saved
RSP = 4, // stack ptr
RBP = 5, // frame ptr, saved, sib reqd
RSI = 6, // 2nd int arg
RDI = 7, // 1st int arg
R8 = 8, // 5th int arg
R9 = 9, // 6th int arg
R10 = 10, // scratch
R11 = 11, // scratch
R12 = 12, // saved
R13 = 13, // saved, sib reqd like rbp
R14 = 14, // saved
R15 = 15, // saved
XMM0 = 16, // 1st double arg, return
XMM1 = 17, // 2nd double arg, return
XMM2 = 18, // 3rd double arg
XMM3 = 19, // 4th double arg
XMM4 = 20, // 5th double arg
XMM5 = 21, // 6th double arg
XMM6 = 22, // 7th double arg
XMM7 = 23, // 8th double arg
XMM8 = 24, // scratch
XMM9 = 25, // scratch
XMM10 = 26, // scratch
XMM11 = 27, // scratch
XMM12 = 28, // scratch
XMM13 = 29, // scratch
XMM14 = 30, // scratch
XMM15 = 31, // scratch
FP = RBP,
UnknownReg = 32,
FirstReg = RAX,
LastReg = XMM15
};
/*
* Micro-templating variable-length opcodes, idea first
* describe by Mike Pall of Luajit.
*
* X86-64 opcode encodings: LSB encodes the length of the
* opcode in bytes, remaining bytes are encoded as 1-7 bytes
* in a single uint64_t value. The value is written as a single
* store into the code stream, and the code pointer is decremented
* by the length. each successive instruction partially overlaps
* the previous one.
*
* emit methods below are able to encode mod/rm, sib, rex, and
* register and small immediate values into these opcode values
* without much branchy code.
*
* these opcodes encapsulate all the const parts of the instruction.
* for example, the alu-immediate opcodes (add, sub, etc) encode
* part of their opcode in the R field of the mod/rm byte; this
* hardcoded value is in the constant below, and the R argument
* to emitrr() is 0. In a few cases, a whole instruction is encoded
* this way (eg callrax).
*
* when a disp32, imm32, or imm64 suffix can't fit in an 8-byte
* opcode, then it is written into the code separately and not counted
* in the opcode length.
*/
enum X64Opcode
#if defined(_MSC_VER) && _MSC_VER >= 1400
#pragma warning(disable:4480) // nonstandard extension used: specifying underlying type for enum
: uint64_t
#endif
{
// 64bit opcode constants
// msb lsb len
X64_addqrr = 0xC003480000000003LL, // 64bit add r += b
X64_addqri = 0xC081480000000003LL, // 64bit add r += int64(imm32)
X64_addqr8 = 0x00C0834800000004LL, // 64bit add r += int64(imm8)
X64_andqri = 0xE081480000000003LL, // 64bit and r &= int64(imm32)
X64_andqr8 = 0x00E0834800000004LL, // 64bit and r &= int64(imm8)
X64_orqri = 0xC881480000000003LL, // 64bit or r |= int64(imm32)
X64_orqr8 = 0x00C8834800000004LL, // 64bit or r |= int64(imm8)
X64_xorqri = 0xF081480000000003LL, // 64bit xor r ^= int64(imm32)
X64_xorqr8 = 0x00F0834800000004LL, // 64bit xor r ^= int64(imm8)
X64_addlri = 0xC081400000000003LL, // 32bit add r += imm32
X64_addlr8 = 0x00C0834000000004LL, // 32bit add r += imm8
X64_andlri = 0xE081400000000003LL, // 32bit and r &= imm32
X64_andlr8 = 0x00E0834000000004LL, // 32bit and r &= imm8
X64_orlri = 0xC881400000000003LL, // 32bit or r |= imm32
X64_orlr8 = 0x00C8834000000004LL, // 32bit or r |= imm8
X64_sublri = 0xE881400000000003LL, // 32bit sub r -= imm32
X64_sublr8 = 0x00E8834000000004LL, // 32bit sub r -= imm8
X64_xorlri = 0xF081400000000003LL, // 32bit xor r ^= imm32
X64_xorlr8 = 0x00F0834000000004LL, // 32bit xor r ^= imm8
X64_addrr = 0xC003400000000003LL, // 32bit add r += b
X64_andqrr = 0xC023480000000003LL, // 64bit and r &= b
X64_andrr = 0xC023400000000003LL, // 32bit and r &= b
X64_call = 0x00000000E8000005LL, // near call
X64_callrax = 0xD0FF000000000002LL, // indirect call to addr in rax (no REX)
X64_cmovqno = 0xC0410F4800000004LL, // 64bit conditional mov if (no overflow) r = b
X64_cmovqnae= 0xC0420F4800000004LL, // 64bit conditional mov if (uint <) r = b
X64_cmovqnb = 0xC0430F4800000004LL, // 64bit conditional mov if (uint >=) r = b
X64_cmovqne = 0xC0450F4800000004LL, // 64bit conditional mov if (c) r = b
X64_cmovqna = 0xC0460F4800000004LL, // 64bit conditional mov if (uint <=) r = b
X64_cmovqnbe= 0xC0470F4800000004LL, // 64bit conditional mov if (uint >) r = b
X64_cmovqnge= 0xC04C0F4800000004LL, // 64bit conditional mov if (int <) r = b
X64_cmovqnl = 0xC04D0F4800000004LL, // 64bit conditional mov if (int >=) r = b
X64_cmovqng = 0xC04E0F4800000004LL, // 64bit conditional mov if (int <=) r = b
X64_cmovqnle= 0xC04F0F4800000004LL, // 64bit conditional mov if (int >) r = b
X64_cmovno = 0xC0410F4000000004LL, // 32bit conditional mov if (no overflow) r = b
X64_cmovnae = 0xC0420F4000000004LL, // 32bit conditional mov if (uint <) r = b
X64_cmovnb = 0xC0430F4000000004LL, // 32bit conditional mov if (uint >=) r = b
X64_cmovne = 0xC0450F4000000004LL, // 32bit conditional mov if (c) r = b
X64_cmovna = 0xC0460F4000000004LL, // 32bit conditional mov if (uint <=) r = b
X64_cmovnbe = 0xC0470F4000000004LL, // 32bit conditional mov if (uint >) r = b
X64_cmovnge = 0xC04C0F4000000004LL, // 32bit conditional mov if (int <) r = b
X64_cmovnl = 0xC04D0F4000000004LL, // 32bit conditional mov if (int >=) r = b
X64_cmovng = 0xC04E0F4000000004LL, // 32bit conditional mov if (int <=) r = b
X64_cmovnle = 0xC04F0F4000000004LL, // 32bit conditional mov if (int >) r = b
X64_cmplr = 0xC03B400000000003LL, // 32bit compare r,b
X64_cmpqr = 0xC03B480000000003LL, // 64bit compare r,b
X64_cmplri = 0xF881400000000003LL, // 32bit compare r,imm32
X64_cmpqri = 0xF881480000000003LL, // 64bit compare r,int64(imm32)
X64_cmplr8 = 0x00F8834000000004LL, // 32bit compare r,imm8
X64_cmpqr8 = 0x00F8834800000004LL, // 64bit compare r,int64(imm8)
X64_cvtsi2sd= 0xC02A0F40F2000005LL, // convert int32 to double r = (double) b
X64_cvtsq2sd= 0xC02A0F48F2000005LL, // convert int64 to double r = (double) b
X64_cvtss2sd= 0xC05A0F40F3000005LL, // convert float to double r = (double) b
X64_cvtsd2ss= 0xC05A0F40F2000005LL, // convert double to float r = (float) b
X64_divsd = 0xC05E0F40F2000005LL, // divide scalar double r /= b
X64_mulsd = 0xC0590F40F2000005LL, // multiply scalar double r *= b
X64_addsd = 0xC0580F40F2000005LL, // add scalar double r += b
X64_idiv = 0xF8F7400000000003LL, // 32bit signed div (rax = rdx:rax/r, rdx=rdx:rax%r)
X64_imul = 0xC0AF0F4000000004LL, // 32bit signed mul r *= b
X64_imuli = 0xC069400000000003LL, // 32bit signed mul r = b * imm32
X64_imul8 = 0x00C06B4000000004LL, // 32bit signed mul r = b * imm8
X64_jmp = 0x00000000E9000005LL, // jump near rel32
X64_jmp8 = 0x00EB000000000002LL, // jump near rel8
X64_jo = 0x00000000800F0006LL, // jump near if overflow
X64_jb = 0x00000000820F0006LL, // jump near if below (uint <)
X64_jae = 0x00000000830F0006LL, // jump near if above or equal (uint >=)
X64_ja = 0x00000000870F0006LL, // jump near if above (uint >)
X64_jbe = 0x00000000860F0006LL, // jump near if below or equal (uint <=)
X64_je = 0x00000000840F0006LL, // near jump if equal
X64_jl = 0x000000008C0F0006LL, // jump near if less (int <)
X64_jge = 0x000000008D0F0006LL, // jump near if greater or equal (int >=)
X64_jg = 0x000000008F0F0006LL, // jump near if greater (int >)
X64_jle = 0x000000008E0F0006LL, // jump near if less or equal (int <=)
X64_jp = 0x000000008A0F0006LL, // jump near if parity (PF == 1)
X64_jneg = 0x0000000001000000LL, // xor with this mask to negate the condition
X64_jo8 = 0x0070000000000002LL, // jump near if overflow
X64_jb8 = 0x0072000000000002LL, // jump near if below (uint <)
X64_jae8 = 0x0073000000000002LL, // jump near if above or equal (uint >=)
X64_ja8 = 0x0077000000000002LL, // jump near if above (uint >)
X64_jbe8 = 0x0076000000000002LL, // jump near if below or equal (uint <=)
X64_je8 = 0x0074000000000002LL, // near jump if equal
X64_jne8 = 0x0075000000000002LL, // jump near if not equal
X64_jl8 = 0x007C000000000002LL, // jump near if less (int <)
X64_jge8 = 0x007D000000000002LL, // jump near if greater or equal (int >=)
X64_jg8 = 0x007F000000000002LL, // jump near if greater (int >)
X64_jle8 = 0x007E000000000002LL, // jump near if less or equal (int <=)
X64_jp8 = 0x007A000000000002LL, // jump near if parity (PF == 1)
X64_jnp8 = 0x007B000000000002LL, // jump near if not parity (PF == 0)
X64_jneg8 = 0x0001000000000000LL, // xor with this mask to negate the condition
X64_leaqrm = 0x00000000808D4807LL, // 64bit load effective addr reg <- disp32+base
X64_learm = 0x00000000808D4007LL, // 32bit load effective addr reg <- disp32+base
X64_learip = 0x00000000058D4807LL, // 64bit RIP-relative lea. reg <- disp32+rip (modrm = 00rrr101 = 05)
X64_movlr = 0xC08B400000000003LL, // 32bit mov r <- b
X64_movbmr = 0x0000000080884007LL, // 8bit store r -> [b+d32]
X64_movsmr = 0x8089406600000004LL, // 16bit store r -> [b+d32]
X64_movlmr = 0x0000000080894007LL, // 32bit store r -> [b+d32]
X64_movlrm = 0x00000000808B4007LL, // 32bit load r <- [b+d32]
X64_movqmr = 0x0000000080894807LL, // 64bit store gpr -> [b+d32]
X64_movqspr = 0x0024448948000005LL, // 64bit store gpr -> [rsp+d32] (sib required)
X64_movqr = 0xC08B480000000003LL, // 64bit mov r <- b
X64_movqi = 0xB848000000000002LL, // 64bit mov r <- imm64
X64_movi = 0xB840000000000002LL, // 32bit mov r <- imm32
X64_movqi32 = 0xC0C7480000000003LL, // 64bit mov r <- int64(imm32)
X64_movapsr = 0xC0280F4000000004LL, // 128bit mov xmm <- xmm
X64_movqrx = 0xC07E0F4866000005LL, // 64bit mov b <- xmm-r (reverses the usual r/b order)
X64_movqxr = 0xC06E0F4866000005LL, // 64bit mov b -> xmm-r
X64_movqrm = 0x00000000808B4807LL, // 64bit load r <- [b+d32]
X64_movsdrr = 0xC0100F40F2000005LL, // 64bit mov xmm-r <- xmm-b (upper 64bits unchanged)
X64_movsdrm = 0x80100F40F2000005LL, // 64bit load xmm-r <- [b+d32] (upper 64 cleared)
X64_movsdmr = 0x80110F40F2000005LL, // 64bit store xmm-r -> [b+d32]
X64_movssrm = 0x80100F40F3000005LL, // 32bit load xmm-r <- [b+d32] (upper 96 cleared)
X64_movssmr = 0x80110F40F3000005LL, // 32bit store xmm-r -> [b+d32]
X64_movsxdr = 0xC063480000000003LL, // sign extend i32 to i64 r = (int64)(int32) b
X64_movzx8 = 0xC0B60F4000000004LL, // zero extend i8 to i64 r = (uint64)(uint8) b
X64_movzx8m = 0x80B60F4000000004LL, // zero extend i8 load to i32 r <- [b+d32]
X64_movzx16m= 0x80B70F4000000004LL, // zero extend i16 load to i32 r <- [b+d32]
X64_movsx8m = 0x80BE0F4000000004LL, // sign extend i8 load to i32 r <- [b+d32]
X64_movsx16m= 0x80BF0F4000000004LL, // sign extend i16 load to i32 r <- [b+d32]
X64_neg = 0xD8F7400000000003LL, // 32bit two's compliment b = -b
X64_nop1 = 0x9000000000000001LL, // one byte NOP
X64_nop2 = 0x9066000000000002LL, // two byte NOP
X64_nop3 = 0x001F0F0000000003LL, // three byte NOP
X64_nop4 = 0x00401F0F00000004LL, // four byte NOP
X64_nop5 = 0x0000441F0F000005LL, // five byte NOP
X64_nop6 = 0x0000441F0F660006LL, // six byte NOP
X64_nop7 = 0x00000000801F0F07LL, // seven byte NOP
X64_not = 0xD0F7400000000003LL, // 32bit ones compliment b = ~b
X64_orlrr = 0xC00B400000000003LL, // 32bit or r |= b
X64_orqrr = 0xC00B480000000003LL, // 64bit or r |= b
X64_popr = 0x5840000000000002LL, // 64bit pop r <- [rsp++]
X64_pushr = 0x5040000000000002LL, // 64bit push r -> [--rsp]
X64_pxor = 0xC0EF0F4066000005LL, // 128bit xor xmm-r ^= xmm-b
X64_ret = 0xC300000000000001LL, // near return from called procedure
X64_sete = 0xC0940F4000000004LL, // set byte if equal (ZF == 1)
X64_seto = 0xC0900F4000000004LL, // set byte if overflow (OF == 1)
X64_setc = 0xC0920F4000000004LL, // set byte if carry (CF == 1)
X64_setl = 0xC09C0F4000000004LL, // set byte if less (int <) (SF != OF)
X64_setle = 0xC09E0F4000000004LL, // set byte if less or equal (int <=) (ZF == 1 || SF != OF)
X64_setg = 0xC09F0F4000000004LL, // set byte if greater (int >) (ZF == 0 && SF == OF)
X64_setge = 0xC09D0F4000000004LL, // set byte if greater or equal (int >=) (SF == OF)
X64_seta = 0xC0970F4000000004LL, // set byte if above (uint >) (CF == 0 && ZF == 0)
X64_setae = 0xC0930F4000000004LL, // set byte if above or equal (uint >=) (CF == 0)
X64_setb = 0xC0920F4000000004LL, // set byte if below (uint <) (CF == 1)
X64_setbe = 0xC0960F4000000004LL, // set byte if below or equal (uint <=) (ZF == 1 || CF == 1)
X64_subsd = 0xC05C0F40F2000005LL, // subtract scalar double r -= b
X64_shl = 0xE0D3400000000003LL, // 32bit left shift r <<= rcx
X64_shlq = 0xE0D3480000000003LL, // 64bit left shift r <<= rcx
X64_shr = 0xE8D3400000000003LL, // 32bit uint right shift r >>= rcx
X64_shrq = 0xE8D3480000000003LL, // 64bit uint right shift r >>= rcx
X64_sar = 0xF8D3400000000003LL, // 32bit int right shift r >>= rcx
X64_sarq = 0xF8D3480000000003LL, // 64bit int right shift r >>= rcx
X64_shli = 0x00E0C14000000004LL, // 32bit left shift r <<= imm8
X64_shlqi = 0x00E0C14800000004LL, // 64bit left shift r <<= imm8
X64_sari = 0x00F8C14000000004LL, // 32bit int right shift r >>= imm8
X64_sarqi = 0x00F8C14800000004LL, // 64bit int right shift r >>= imm8
X64_shri = 0x00E8C14000000004LL, // 32bit uint right shift r >>= imm8
X64_shrqi = 0x00E8C14800000004LL, // 64bit uint right shift r >>= imm8
X64_subqrr = 0xC02B480000000003LL, // 64bit sub r -= b
X64_subrr = 0xC02B400000000003LL, // 32bit sub r -= b
X64_subqri = 0xE881480000000003LL, // 64bit sub r -= int64(imm32)
X64_subqr8 = 0x00E8834800000004LL, // 64bit sub r -= int64(imm8)
X64_ucomisd = 0xC02E0F4066000005LL, // unordered compare scalar double
X64_xorqrr = 0xC033480000000003LL, // 64bit xor r &= b
X64_xorrr = 0xC033400000000003LL, // 32bit xor r &= b
X64_xorpd = 0xC0570F4066000005LL, // 128bit xor xmm (two packed doubles)
X64_xorps = 0xC0570F4000000004LL, // 128bit xor xmm (four packed singles), one byte shorter
X64_xorpsm = 0x05570F4000000004LL, // 128bit xor xmm, [rip+disp32]
X64_xorpsa = 0x2504570F40000005LL, // 128bit xor xmm, [disp32]
X64_jmpx = 0xC524ff4000000004LL, // jmp [d32+x*8]
X64_jmpxb = 0xC024ff4000000004LL, // jmp [b+x*8]
X86_and8r = 0xC022000000000002LL, // and rl,rh
X86_sete = 0xC0940F0000000003LL, // no-rex version of X64_sete
X86_setnp = 0xC09B0F0000000003LL // no-rex set byte if odd parity (ordered fcmp result) (PF == 0)
};
typedef uint32_t RegisterMask;
static const RegisterMask GpRegs = 0xffff;
static const RegisterMask FpRegs = 0xffff0000;
#ifdef _MSC_VER
static const RegisterMask SavedRegs = 1<<RBX | 1<<RSI | 1<<RDI | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15;
static const int NumSavedRegs = 7; // rbx, rsi, rdi, r12-15
static const int NumArgRegs = 4;
#else
static const RegisterMask SavedRegs = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15;
static const int NumSavedRegs = 5; // rbx, r12-15
static const int NumArgRegs = 6;
#endif
static inline bool isValidDisplacement(LOpcode, int32_t) {
return true;
}
static inline bool IsFpReg(Register r) {
return ((1<<r) & FpRegs) != 0;
}
static inline bool IsGpReg(Register r) {
return ((1<<r) & GpRegs) != 0;
}
verbose_only( extern const char* regNames[]; )
verbose_only( extern const char* gpRegNames32[]; )
verbose_only( extern const char* gpRegNames8[]; )
verbose_only( extern const char* gpRegNames8hi[]; )
#define DECLARE_PLATFORM_STATS()
#define DECLARE_PLATFORM_REGALLOC()
#define DECLARE_PLATFORM_ASSEMBLER() \
const static Register argRegs[NumArgRegs], retRegs[1]; \
void underrunProtect(ptrdiff_t bytes); \
void nativePageReset(); \
void nativePageSetup(); \
void asm_qbinop(LIns*); \
void MR(Register, Register);\
void JMP(NIns*);\
void JMPl(NIns*);\
void emit(uint64_t op);\
void emit8(uint64_t op, int64_t val);\
void emit_target8(size_t underrun, uint64_t op, NIns* target);\
void emit_target32(size_t underrun, uint64_t op, NIns* target);\
void emitrr(uint64_t op, Register r, Register b);\
void emitrxb(uint64_t op, Register r, Register x, Register b);\
void emitxb(uint64_t op, Register x, Register b) { emitrxb(op, (Register)0, x, b); }\
void emitrr8(uint64_t op, Register r, Register b);\
void emitr(uint64_t op, Register b) { emitrr(op, (Register)0, b); }\
void emitr8(uint64_t op, Register b) { emitrr8(op, (Register)0, b); }\
void emitprr(uint64_t op, Register r, Register b);\
void emitrm8(uint64_t op, Register r, int32_t d, Register b);\
void emitrm(uint64_t op, Register r, int32_t d, Register b);\
void emitrm_wide(uint64_t op, Register r, int32_t d, Register b);\
uint64_t emit_disp32(uint64_t op, int32_t d);\
void emitprm(uint64_t op, Register r, int32_t d, Register b);\
void emitrr_imm(uint64_t op, Register r, Register b, int32_t imm);\
void emitr_imm64(uint64_t op, Register r, uint64_t imm);\
void emitrxb_imm(uint64_t op, Register r, Register x, Register b, int32_t imm);\
void emitr_imm(uint64_t op, Register r, int32_t imm) { emitrr_imm(op, (Register)0, r, imm); }\
void emitr_imm8(uint64_t op, Register b, int32_t imm8);\
void emitxm_abs(uint64_t op, Register r, int32_t addr32);\
void emitxm_rel(uint64_t op, Register r, NIns* addr64);\
bool isTargetWithinS8(NIns* target);\
bool isTargetWithinS32(NIns* target);\
void asm_quad(Register r, uint64_t v);\
void asm_regarg(ArgSize, LIns*, Register);\
void asm_stkarg(ArgSize, LIns*, int);\
void asm_shift(LIns*);\
void asm_shift_imm(LIns*);\
void asm_arith_imm(LIns*);\
void regalloc_unary(LIns *ins, RegisterMask allow, Register &rr, Register &ra);\
void regalloc_binary(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb);\
void regalloc_load(LIns *ins, RegisterMask allow, Register &rr, int32_t &d, Register &rb);\
void dis(NIns *p, int bytes);\
void asm_cmp(LIns*);\
void asm_cmp_imm(LIns*);\
void fcmp(LIns*, LIns*);\
NIns* asm_fbranch(bool, LIns*, NIns*);\
void asm_div_mod(LIns *i);\
int max_stk_used;\
void PUSHR(Register r);\
void POPR(Register r);\
void NOT(Register r);\
void NEG(Register r);\
void IDIV(Register r);\
void SHR(Register r);\
void SAR(Register r);\
void SHL(Register r);\
void SHRQ(Register r);\
void SARQ(Register r);\
void SHLQ(Register r);\
void SHRI(Register r, int i);\
void SARI(Register r, int i);\
void SHLI(Register r, int i);\
void SHRQI(Register r, int i);\
void SARQI(Register r, int i);\
void SHLQI(Register r, int i);\
void SETE(Register r);\
void SETL(Register r);\
void SETLE(Register r);\
void SETG(Register r);\
void SETGE(Register r);\
void SETB(Register r);\
void SETBE(Register r);\
void SETA(Register r);\
void SETAE(Register r);\
void SETO(Register r);\
void ADDRR(Register l, Register r);\
void SUBRR(Register l, Register r);\
void ANDRR(Register l, Register r);\
void ORLRR(Register l, Register r);\
void XORRR(Register l, Register r);\
void IMUL(Register l, Register r);\
void CMPLR(Register l, Register r);\
void MOVLR(Register l, Register r);\
void ADDQRR(Register l, Register r);\
void SUBQRR(Register l, Register r);\
void ANDQRR(Register l, Register r);\
void ORQRR(Register l, Register r);\
void XORQRR(Register l, Register r);\
void CMPQR(Register l, Register r);\
void MOVQR(Register l, Register r);\
void MOVAPSR(Register l, Register r);\
void CMOVNO(Register l, Register r);\
void CMOVNE(Register l, Register r);\
void CMOVNL(Register l, Register r);\
void CMOVNLE(Register l, Register r);\
void CMOVNG(Register l, Register r);\
void CMOVNGE(Register l, Register r);\
void CMOVNB(Register l, Register r);\
void CMOVNBE(Register l, Register r);\
void CMOVNA(Register l, Register r);\
void CMOVNAE(Register l, Register r);\
void CMOVQNO(Register l, Register r);\
void CMOVQNE(Register l, Register r);\
void CMOVQNL(Register l, Register r);\
void CMOVQNLE(Register l, Register r);\
void CMOVQNG(Register l, Register r);\
void CMOVQNGE(Register l, Register r);\
void CMOVQNB(Register l, Register r);\
void CMOVQNBE(Register l, Register r);\
void CMOVQNA(Register l, Register r);\
void CMOVQNAE(Register l, Register r);\
void MOVSXDR(Register l, Register r);\
void MOVZX8(Register l, Register r);\
void XORPS(Register r);\
void DIVSD(Register l, Register r);\
void MULSD(Register l, Register r);\
void ADDSD(Register l, Register r);\
void SUBSD(Register l, Register r);\
void CVTSQ2SD(Register l, Register r);\
void CVTSI2SD(Register l, Register r);\
void CVTSS2SD(Register l, Register r);\
void CVTSD2SS(Register l, Register r);\
void UCOMISD(Register l, Register r);\
void MOVQRX(Register l, Register r);\
void MOVQXR(Register l, Register r);\
void MOVI(Register r, int32_t i32);\
void ADDLRI(Register r, int32_t i32);\
void SUBLRI(Register r, int32_t i32);\
void ANDLRI(Register r, int32_t i32);\
void ORLRI(Register r, int32_t i32);\
void XORLRI(Register r, int32_t i32);\
void CMPLRI(Register r, int32_t i32);\
void ADDQRI(Register r, int32_t i32);\
void SUBQRI(Register r, int32_t i32);\
void ANDQRI(Register r, int32_t i32);\
void ORQRI(Register r, int32_t i32);\
void XORQRI(Register r, int32_t i32);\
void CMPQRI(Register r, int32_t i32);\
void MOVQI32(Register r, int32_t i32);\
void ADDLR8(Register r, int32_t i8);\
void SUBLR8(Register r, int32_t i8);\
void ANDLR8(Register r, int32_t i8);\
void ORLR8(Register r, int32_t i8);\
void XORLR8(Register r, int32_t i8);\
void CMPLR8(Register r, int32_t i8);\
void ADDQR8(Register r, int32_t i8);\
void SUBQR8(Register r, int32_t i8);\
void ANDQR8(Register r, int32_t i8);\
void ORQR8(Register r, int32_t i8);\
void XORQR8(Register r, int32_t i8);\
void CMPQR8(Register r, int32_t i8);\
void IMULI(Register l, Register r, int32_t i32);\
void MOVQI(Register r, uint64_t u64);\
void LEARIP(Register r, int32_t d);\
void LEAQRM(Register r1, int d, Register r2);\
void MOVLRM(Register r1, int d, Register r2);\
void MOVQRM(Register r1, int d, Register r2);\
void MOVBMR(Register r1, int d, Register r2);\
void MOVSMR(Register r1, int d, Register r2);\
void MOVLMR(Register r1, int d, Register r2);\
void MOVQMR(Register r1, int d, Register r2);\
void MOVZX8M(Register r1, int d, Register r2);\
void MOVZX16M(Register r1, int d, Register r2);\
void MOVSX8M(Register r1, int d, Register r2);\
void MOVSX16M(Register r1, int d, Register r2);\
void MOVSDRM(Register r1, int d, Register r2);\
void MOVSDMR(Register r1, int d, Register r2);\
void MOVSSMR(Register r1, int d, Register r2);\
void MOVSSRM(Register r1, int d, Register r2);\
void JMP8(size_t n, NIns* t);\
void JMP32(size_t n, NIns* t);\
void JMPX(Register indexreg, NIns** table);\
void JMPXB(Register indexreg, Register tablereg);\
void JO(size_t n, NIns* t);\
void JE(size_t n, NIns* t);\
void JL(size_t n, NIns* t);\
void JLE(size_t n, NIns* t);\
void JG(size_t n, NIns* t);\
void JGE(size_t n, NIns* t);\
void JB(size_t n, NIns* t);\
void JBE(size_t n, NIns* t);\
void JA(size_t n, NIns* t);\
void JAE(size_t n, NIns* t);\
void JP(size_t n, NIns* t);\
void JNO(size_t n, NIns* t);\
void JNE(size_t n, NIns* t);\
void JNL(size_t n, NIns* t);\
void JNLE(size_t n, NIns* t);\
void JNG(size_t n, NIns* t);\
void JNGE(size_t n, NIns* t);\
void JNB(size_t n, NIns* t);\
void JNBE(size_t n, NIns* t);\
void JNA(size_t n, NIns* t);\
void JNAE(size_t n, NIns* t);\
void JO8(size_t n, NIns* t);\
void JE8(size_t n, NIns* t);\
void JL8(size_t n, NIns* t);\
void JLE8(size_t n, NIns* t);\
void JG8(size_t n, NIns* t);\
void JGE8(size_t n, NIns* t);\
void JB8(size_t n, NIns* t);\
void JBE8(size_t n, NIns* t);\
void JA8(size_t n, NIns* t);\
void JAE8(size_t n, NIns* t);\
void JP8(size_t n, NIns* t);\
void JNO8(size_t n, NIns* t);\
void JNE8(size_t n, NIns* t);\
void JNL8(size_t n, NIns* t);\
void JNLE8(size_t n, NIns* t);\
void JNG8(size_t n, NIns* t);\
void JNGE8(size_t n, NIns* t);\
void JNB8(size_t n, NIns* t);\
void JNBE8(size_t n, NIns* t);\
void JNA8(size_t n, NIns* t);\
void JNAE8(size_t n, NIns* t);\
void CALL(size_t n, NIns* t);\
void CALLRAX();\
void RET();\
void MOVQSPR(int d, Register r);\
void XORPSA(Register r, int32_t i32);\
void XORPSM(Register r, NIns* a64);\
void X86_AND8R(Register r);\
void X86_SETNP(Register r);\
void X86_SETE(Register r);\
const int LARGEST_UNDERRUN_PROT = 32; // largest value passed to underrunProtect
typedef uint8_t NIns;
// Bytes of icache to flush after Assembler::patch
const size_t LARGEST_BRANCH_PATCH = 16 * sizeof(NIns);
} // namespace nanojit
#endif // __nanojit_NativeX64__