/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- genPrologue
- genEpilogue
- asm_qjoin
- asm_load32
- asm_store32
- asm_load64
- asm_li
- asm_li32
- asm_li64
- asm_store64
- asm_cond
- asm_fcond
- asm_branch_near
- asm_branch_far
- asm_cmp
- asm_ret
- asm_nongp_copy
- asm_restore
- asm_int
- asm_fneg
- asm_param
- asm_call
- asm_regarg
- asm_spill
- asm_arith
- asm_fop
- asm_i2f
- asm_u2f
- asm_promote
- asm_quad
- br
- br_far
- underrunProtect
- asm_cmov
- hint
- asm_neg_not
- asm_qlo
- asm_qhi
- nInit
- nBeginAssembly
- nativePageSetup
- nativePageReset
- verbose_only
- cntzlw
- nRegisterAllocFromSet
- nRegisterResetAll
- asm_qbinop
- nFragExit
- asm_jtbl
- swapCodeChunks
/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is [Open Source Virtual Machine].
*
* The Initial Developer of the Original Code is
* Adobe System Incorporated.
* Portions created by the Initial Developer are Copyright (C) 2008
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Adobe AS3 Team
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nanojit.h"
#if defined FEATURE_NANOJIT && defined NANOJIT_PPC
namespace nanojit
{
const Register Assembler::retRegs[] = { R3, R4 }; // high=R3, low=R4
const Register Assembler::argRegs[] = { R3, R4, R5, R6, R7, R8, R9, R10 };
const Register Assembler::savedRegs[] = {
#if !defined NANOJIT_64BIT
R13,
#endif
R14, R15, R16, R17, R18, R19, R20, R21, R22,
R23, R24, R25, R26, R27, R28, R29, R30
};
const char *regNames[] = {
"r0", "sp", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
};
const char *bitNames[] = { "lt", "gt", "eq", "so" };
#define TODO(x) do{ avmplus::AvmLog(#x); NanoAssertMsgf(false, "%s", #x); } while(0)
/*
* see http://developer.apple.com/documentation/developertools/Conceptual/LowLevelABI/index.html
* stack layout (higher address going down)
* sp -> out linkage area
* out parameter area
* local variables
* saved registers
* sp' -> in linkage area
* in parameter area
*
* linkage area layout:
* PPC32 PPC64
* sp+0 sp+0 saved sp
* sp+4 sp+8 saved cr
* sp+8 sp+16 saved lr
* sp+12 sp+24 reserved
*/
const int linkage_size = 6*sizeof(void*);
const int lr_offset = 2*sizeof(void*); // linkage.lr
const int cr_offset = 1*sizeof(void*); // linkage.cr
NIns* Assembler::genPrologue() {
// mflr r0
// stw r0, lr_offset(sp)
// stwu sp, -framesize(sp)
// activation frame is 4 bytes per entry even on 64bit machines
uint32_t stackNeeded = max_param_size + linkage_size + _activation.tos * 4;
uint32_t aligned = alignUp(stackNeeded, NJ_ALIGN_STACK);
UNLESS_PEDANTIC( if (isS16(aligned)) {
STPU(SP, -aligned, SP); // *(sp-aligned) = sp; sp -= aligned
} else ) {
STPUX(SP, SP, R0);
asm_li(R0, -aligned);
}
NIns *patchEntry = _nIns;
MR(FP,SP); // save SP to use as a FP
STP(FP, cr_offset, SP); // cheat and save our FP in linkage.cr
STP(R0, lr_offset, SP); // save LR in linkage.lr
MFLR(R0);
return patchEntry;
}
NIns* Assembler::genEpilogue() {
BLR();
MTLR(R0);
LP(R0, lr_offset, SP);
LP(FP, cr_offset, SP); // restore FP from linkage.cr
MR(SP,FP);
return _nIns;
}
void Assembler::asm_qjoin(LIns *ins) {
int d = findMemFor(ins);
NanoAssert(d && isS16(d));
LIns* lo = ins->oprnd1();
LIns* hi = ins->oprnd2();
Register r = findRegFor(hi, GpRegs);
STW(r, d+4, FP);
// okay if r gets recycled.
r = findRegFor(lo, GpRegs);
STW(r, d, FP);
freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
}
void Assembler::asm_load32(LIns *ins) {
LIns* base = ins->oprnd1();
int d = ins->disp();
Register rr = prepResultReg(ins, GpRegs);
Register ra = getBaseReg(ins->opcode(), base, d, GpRegs);
switch(ins->opcode()) {
case LIR_ldzb:
case LIR_ldcb:
if (isS16(d)) {
LBZ(rr, d, ra);
} else {
LBZX(rr, ra, R0); // rr = [ra+R0]
asm_li(R0,d);
}
return;
case LIR_ldzs:
case LIR_ldcs:
// these are expected to be 2 or 4-byte aligned
if (isS16(d)) {
LHZ(rr, d, ra);
} else {
LHZX(rr, ra, R0); // rr = [ra+R0]
asm_li(R0,d);
}
return;
case LIR_ld:
case LIR_ldc:
// these are expected to be 4-byte aligned
if (isS16(d)) {
LWZ(rr, d, ra);
} else {
LWZX(rr, ra, R0); // rr = [ra+R0]
asm_li(R0,d);
}
return;
case LIR_ldsb:
case LIR_ldss:
case LIR_ldcsb:
case LIR_ldcss:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
return;
}
}
void Assembler::asm_store32(LOpcode op, LIns *value, int32_t dr, LIns *base) {
switch (op) {
case LIR_sti:
// handled by mainline code below for now
break;
case LIR_stb:
case LIR_sts:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
return;
}
Register rs = findRegFor(value, GpRegs);
Register ra = value == base ? rs : getBaseReg(LIR_sti, base, dr, GpRegs & ~rmask(rs));
#if !PEDANTIC
if (isS16(dr)) {
STW(rs, dr, ra);
return;
}
#endif
// general case store, any offset size
STWX(rs, ra, R0);
asm_li(R0, dr);
}
void Assembler::asm_load64(LIns *ins) {
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
// handled by mainline code below for now
break;
case LIR_ld32f:
case LIR_ldc32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
return;
}
LIns* base = ins->oprnd1();
#ifdef NANOJIT_64BIT
Register rr = ins->getReg();
if (isKnownReg(rr) && (rmask(rr) & FpRegs)) {
// FPR already assigned, fine, use it
freeRsrcOf(ins, false);
} else {
// use a GPR register; its okay to copy doubles with GPR's
// but *not* okay to copy non-doubles with FPR's
rr = prepResultReg(ins, GpRegs);
}
#else
Register rr = prepResultReg(ins, FpRegs);
#endif
int dr = ins->disp();
Register ra = getBaseReg(ins->opcode(), base, dr, GpRegs);
#ifdef NANOJIT_64BIT
if (rmask(rr) & GpRegs) {
#if !PEDANTIC
if (isS16(dr)) {
LD(rr, dr, ra);
return;
}
#endif
// general case 64bit GPR load
LDX(rr, ra, R0);
asm_li(R0, dr);
return;
}
#endif
// FPR
#if !PEDANTIC
if (isS16(dr)) {
LFD(rr, dr, ra);
return;
}
#endif
// general case FPR load
LFDX(rr, ra, R0);
asm_li(R0, dr);
}
void Assembler::asm_li(Register r, int32_t imm) {
#if !PEDANTIC
if (isS16(imm)) {
LI(r, imm);
return;
}
if ((imm & 0xffff) == 0) {
imm = uint32_t(imm) >> 16;
LIS(r, imm);
return;
}
#endif
asm_li32(r, imm);
}
void Assembler::asm_li32(Register r, int32_t imm) {
// general case
// TODO use ADDI instead of ORI if r != r0, impl might have 3way adder
ORI(r, r, imm);
LIS(r, imm>>16); // on ppc64, this sign extends
}
void Assembler::asm_li64(Register r, uint64_t imm) {
underrunProtect(5*sizeof(NIns)); // must be contiguous to be patchable
ORI(r,r,uint16_t(imm)); // r[0:15] = imm[0:15]
ORIS(r,r,uint16_t(imm>>16)); // r[16:31] = imm[16:31]
SLDI(r,r,32); // r[32:63] = r[0:31], r[0:31] = 0
asm_li32(r, int32_t(imm>>32)); // r[0:31] = imm[32:63]
}
void Assembler::asm_store64(LOpcode op, LIns *value, int32_t dr, LIns *base) {
NanoAssert(value->isQuad());
switch (op) {
case LIR_stqi:
// handled by mainline code below for now
break;
case LIR_st32f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
return;
}
Register ra = getBaseReg(LIR_stqi, base, dr, GpRegs);
#if !PEDANTIC && !defined NANOJIT_64BIT
if (value->isop(LIR_quad) && isS16(dr) && isS16(dr+4)) {
// quad constant and short offset
uint64_t q = value->imm64();
STW(R0, dr, ra); // hi
asm_li(R0, int32_t(q>>32)); // hi
STW(R0, dr+4, ra); // lo
asm_li(R0, int32_t(q)); // lo
return;
}
if (value->isop(LIR_qjoin) && isS16(dr) && isS16(dr+4)) {
// short offset and qjoin(lo,hi) - store lo & hi separately
RegisterMask allow = GpRegs & ~rmask(ra);
LIns *lo = value->oprnd1();
Register rlo = findRegFor(lo, allow);
LIns *hi = value->oprnd2();
Register rhi = hi == lo ? rlo : findRegFor(hi, allow & ~rmask(rlo));
STW(rhi, dr, ra); // hi
STW(rlo, dr+4, ra); // lo
return;
}
#endif // !PEDANTIC
// general case for any value
#if !defined NANOJIT_64BIT
// on 32bit cpu's, we only use store64 for doubles
Register rs = findRegFor(value, FpRegs);
#else
// if we have to choose a register, use a GPR
Register rs = ( value->isUnusedOrHasUnknownReg()
? findRegFor(value, GpRegs & ~rmask(ra))
: value->getReg() );
if (rmask(rs) & GpRegs) {
#if !PEDANTIC
if (isS16(dr)) {
// short offset
STD(rs, dr, ra);
return;
}
#endif
// general case store 64bit GPR
STDX(rs, ra, R0);
asm_li(R0, dr);
return;
}
#endif // NANOJIT_64BIT
#if !PEDANTIC
if (isS16(dr)) {
// short offset
STFD(rs, dr, ra);
return;
}
#endif
// general case for any offset
STFDX(rs, ra, R0);
asm_li(R0, dr);
}
void Assembler::asm_cond(LIns *ins) {
LOpcode op = ins->opcode();
LIns *a = ins->oprnd1();
LIns *b = ins->oprnd2();
ConditionRegister cr = CR7;
Register r = prepResultReg(ins, GpRegs);
switch (op) {
case LIR_eq: case LIR_feq:
case LIR_qeq:
EXTRWI(r, r, 1, 4*cr+COND_eq); // extract CR7.eq
MFCR(r);
break;
case LIR_lt: case LIR_ult:
case LIR_flt: case LIR_fle:
case LIR_qlt: case LIR_qult:
EXTRWI(r, r, 1, 4*cr+COND_lt); // extract CR7.lt
MFCR(r);
break;
case LIR_gt: case LIR_ugt:
case LIR_fgt: case LIR_fge:
case LIR_qgt: case LIR_qugt:
EXTRWI(r, r, 1, 4*cr+COND_gt); // extract CR7.gt
MFCR(r);
break;
case LIR_le: case LIR_ule:
case LIR_qle: case LIR_qule:
EXTRWI(r, r, 1, 4*cr+COND_eq); // extract CR7.eq
MFCR(r);
CROR(CR7, eq, lt, eq);
break;
case LIR_ge: case LIR_uge:
case LIR_qge: case LIR_quge:
EXTRWI(r, r, 1, 4*cr+COND_eq); // select CR7.eq
MFCR(r);
CROR(CR7, eq, gt, eq);
break;
default:
debug_only(outputf("%s",lirNames[ins->opcode()]);)
TODO(asm_cond);
break;
}
asm_cmp(op, a, b, cr);
}
void Assembler::asm_fcond(LIns *ins) {
asm_cond(ins);
}
// cause 32bit sign extension to test bits
#define isS14(i) ((int32_t(bd<<18)>>18) == (i))
NIns* Assembler::asm_branch(bool onfalse, LIns *cond, NIns * const targ) {
LOpcode condop = cond->opcode();
NanoAssert(cond->isCond());
// powerpc offsets are based on the address of the branch instruction
NIns *patch;
#if !PEDANTIC
ptrdiff_t bd = targ - (_nIns-1);
if (targ && isS24(bd))
patch = asm_branch_near(onfalse, cond, targ);
else
#endif
patch = asm_branch_far(onfalse, cond, targ);
asm_cmp(condop, cond->oprnd1(), cond->oprnd2(), CR7);
return patch;
}
NIns* Assembler::asm_branch_near(bool onfalse, LIns *cond, NIns * const targ) {
NanoAssert(targ != 0);
underrunProtect(4);
ptrdiff_t bd = targ - (_nIns-1);
NIns *patch = 0;
if (!isS14(bd)) {
underrunProtect(8);
bd = targ - (_nIns-1);
if (isS24(bd)) {
// can't fit conditional branch offset into 14 bits, but
// we can fit in 24, so invert the condition and branch
// around an unconditional jump
verbose_only(verbose_outputf("%p:", _nIns);)
NIns *skip = _nIns;
B(bd);
patch = _nIns; // this is the patchable branch to the given target
onfalse = !onfalse;
bd = skip - (_nIns-1);
NanoAssert(isS14(bd));
verbose_only(verbose_outputf("branch24");)
}
else {
// known far target
return asm_branch_far(onfalse, cond, targ);
}
}
ConditionRegister cr = CR7;
switch (cond->opcode()) {
case LIR_eq:
case LIR_feq:
case LIR_qeq:
if (onfalse) BNE(cr,bd); else BEQ(cr,bd);
break;
case LIR_lt: case LIR_ult:
case LIR_flt: case LIR_fle:
case LIR_qlt: case LIR_qult:
if (onfalse) BNL(cr,bd); else BLT(cr,bd);
break;
case LIR_le: case LIR_ule:
case LIR_qle: case LIR_qule:
if (onfalse) BGT(cr,bd); else BLE(cr,bd);
break;
case LIR_gt: case LIR_ugt:
case LIR_fgt: case LIR_fge:
case LIR_qgt: case LIR_qugt:
if (onfalse) BNG(cr,bd); else BGT(cr,bd);
break;
case LIR_ge: case LIR_uge:
case LIR_qge: case LIR_quge:
if (onfalse) BLT(cr,bd); else BGE(cr,bd);
break;
default:
debug_only(outputf("%s",lirNames[cond->opcode()]);)
TODO(unknown_cond);
}
if (!patch)
patch = _nIns;
return patch;
}
// general case branch to any address (using CTR)
NIns *Assembler::asm_branch_far(bool onfalse, LIns *cond, NIns * const targ) {
LOpcode condop = cond->opcode();
ConditionRegister cr = CR7;
underrunProtect(16);
switch (condop) {
case LIR_eq:
case LIR_feq:
case LIR_qeq:
if (onfalse) BNECTR(cr); else BEQCTR(cr);
break;
case LIR_lt: case LIR_ult:
case LIR_qlt: case LIR_qult:
case LIR_flt: case LIR_fle:
if (onfalse) BNLCTR(cr); else BLTCTR(cr);
break;
case LIR_le: case LIR_ule:
case LIR_qle: case LIR_qule:
if (onfalse) BGTCTR(cr); else BLECTR(cr);
break;
case LIR_gt: case LIR_ugt:
case LIR_qgt: case LIR_qugt:
case LIR_fgt: case LIR_fge:
if (onfalse) BNGCTR(cr); else BGTCTR(cr);
break;
case LIR_ge: case LIR_uge:
case LIR_qge: case LIR_quge:
if (onfalse) BLTCTR(cr); else BGECTR(cr);
break;
default:
debug_only(outputf("%s",lirNames[condop]);)
TODO(unknown_cond);
}
#if !defined NANOJIT_64BIT
MTCTR(R0);
asm_li32(R0, (int)targ);
#else
MTCTR(R0);
if (!targ || !isU32(uintptr_t(targ))) {
asm_li64(R0, uint64_t(targ));
} else {
asm_li32(R0, uint32_t(uintptr_t(targ)));
}
#endif
return _nIns;
}
void Assembler::asm_cmp(LOpcode condop, LIns *a, LIns *b, ConditionRegister cr) {
RegisterMask allow = condop >= LIR_feq && condop <= LIR_fge ? FpRegs : GpRegs;
Register ra = findRegFor(a, allow);
#if !PEDANTIC
if (b->isconst()) {
int32_t d = b->imm32();
if (isS16(d)) {
if (condop >= LIR_eq && condop <= LIR_ge) {
CMPWI(cr, ra, d);
return;
}
if (condop >= LIR_qeq && condop <= LIR_qge) {
CMPDI(cr, ra, d);
TODO(cmpdi);
return;
}
}
if (isU16(d)) {
if ((condop == LIR_eq || condop >= LIR_ult && condop <= LIR_uge)) {
CMPLWI(cr, ra, d);
return;
}
if ((condop == LIR_qeq || condop >= LIR_qult && condop <= LIR_quge)) {
CMPLDI(cr, ra, d);
TODO(cmpldi);
return;
}
}
}
#endif
// general case
Register rb = b==a ? ra : findRegFor(b, allow & ~rmask(ra));
if (condop >= LIR_eq && condop <= LIR_ge) {
CMPW(cr, ra, rb);
} else if (condop >= LIR_ult && condop <= LIR_uge) {
CMPLW(cr, ra, rb);
} else if (condop >= LIR_qeq && condop <= LIR_qge) {
CMPD(cr, ra, rb);
}
else if (condop >= LIR_qult && condop <= LIR_quge) {
CMPLD(cr, ra, rb);
}
else if (condop >= LIR_feq && condop <= LIR_fge) {
// set the lt/gt bit for fle/fge. We don't do this for
// int/uint because in those cases we can invert the branch condition.
// for float, we can't because of unordered comparisons
if (condop == LIR_fle)
CROR(cr, lt, lt, eq); // lt = lt|eq
else if (condop == LIR_fge)
CROR(cr, gt, gt, eq); // gt = gt|eq
FCMPU(cr, ra, rb);
}
else {
TODO(asm_cmp);
}
}
void Assembler::asm_ret(LIns *ins) {
genEpilogue();
assignSavedRegs();
LIns *value = ins->oprnd1();
Register r = ins->isop(LIR_ret) ? R3 : F1;
findSpecificRegFor(value, r);
}
void Assembler::asm_nongp_copy(Register r, Register s) {
// PPC doesn't support any GPR<->FPR moves
NanoAssert((rmask(r) & FpRegs) && (rmask(s) & FpRegs));
FMR(r, s);
}
void Assembler::asm_restore(LIns *i, Register r) {
int d;
if (i->isop(LIR_alloc)) {
d = disp(i);
ADDI(r, FP, d);
}
else if (i->isconst()) {
if (!i->getArIndex()) {
i->markAsClear();
}
asm_li(r, i->imm32());
}
else {
d = findMemFor(i);
if (IsFpReg(r)) {
NanoAssert(i->isQuad());
LFD(r, d, FP);
} else if (i->isQuad()) {
LD(r, d, FP);
} else {
LWZ(r, d, FP);
}
}
}
void Assembler::asm_int(LIns *ins) {
Register rr = prepResultReg(ins, GpRegs);
asm_li(rr, ins->imm32());
}
void Assembler::asm_fneg(LIns *ins) {
Register rr = prepResultReg(ins, FpRegs);
Register ra = findRegFor(ins->oprnd1(), FpRegs);
FNEG(rr,ra);
}
void Assembler::asm_param(LIns *ins) {
uint32_t a = ins->paramArg();
uint32_t kind = ins->paramKind();
if (kind == 0) {
// ordinary param
// first eight args always in R3..R10 for PPC
if (a < 8) {
// incoming arg in register
prepResultReg(ins, rmask(argRegs[a]));
} else {
// todo: support stack based args, arg 0 is at [FP+off] where off
// is the # of regs to be pushed in genProlog()
TODO(asm_param_stk);
}
}
else {
// saved param
prepResultReg(ins, rmask(savedRegs[a]));
}
}
void Assembler::asm_call(LIns *ins) {
Register retReg = ( ins->isop(LIR_fcall) ? F1 : retRegs[0] );
prepResultReg(ins, rmask(retReg));
// Do this after we've handled the call result, so we don't
// force the call result to be spilled unnecessarily.
evictScratchRegs();
const CallInfo* call = ins->callInfo();
ArgSize sizes[MAXARGS];
uint32_t argc = call->get_sizes(sizes);
bool indirect;
if (!(indirect = call->isIndirect())) {
verbose_only(if (_logc->lcbits & LC_Assembly)
outputf(" %p:", _nIns);
)
br((NIns*)call->_address, 1);
} else {
// Indirect call: we assign the address arg to R11 since it's not
// used for regular arguments, and is otherwise scratch since it's
// clobberred by the call.
underrunProtect(8); // underrunProtect might clobber CTR
BCTRL();
MTCTR(R11);
asm_regarg(ARGSIZE_P, ins->arg(--argc), R11);
}
int param_size = 0;
Register r = R3;
Register fr = F1;
for(uint32_t i = 0; i < argc; i++) {
uint32_t j = argc - i - 1;
ArgSize sz = sizes[j];
LInsp arg = ins->arg(j);
if (sz & ARGSIZE_MASK_INT) {
// GP arg
if (r <= R10) {
asm_regarg(sz, arg, r);
r = nextreg(r);
param_size += sizeof(void*);
} else {
// put arg on stack
TODO(stack_int32);
}
} else if (sz == ARGSIZE_F) {
// double
if (fr <= F13) {
asm_regarg(sz, arg, fr);
fr = nextreg(fr);
#ifdef NANOJIT_64BIT
r = nextreg(r);
#else
r = nextreg(nextreg(r)); // skip 2 gpr's
#endif
param_size += sizeof(double);
} else {
// put arg on stack
TODO(stack_double);
}
} else {
TODO(ARGSIZE_UNK);
}
}
if (param_size > max_param_size)
max_param_size = param_size;
}
void Assembler::asm_regarg(ArgSize sz, LInsp p, Register r)
{
NanoAssert(r != UnknownReg);
if (sz & ARGSIZE_MASK_INT)
{
#ifdef NANOJIT_64BIT
if (sz == ARGSIZE_I) {
// sign extend 32->64
EXTSW(r, r);
} else if (sz == ARGSIZE_U) {
// zero extend 32->64
CLRLDI(r, r, 32);
}
#endif
// arg goes in specific register
if (p->isconst()) {
asm_li(r, p->imm32());
} else {
if (p->isUsed()) {
if (!p->hasKnownReg()) {
// load it into the arg reg
int d = findMemFor(p);
if (p->isop(LIR_alloc)) {
NanoAssert(isS16(d));
ADDI(r, FP, d);
} else if (p->isQuad()) {
LD(r, d, FP);
} else {
LWZ(r, d, FP);
}
} else {
// it must be in a saved reg
MR(r, p->getReg());
}
}
else {
// this is the last use, so fine to assign it
// to the scratch reg, it's dead after this point.
findSpecificRegFor(p, r);
}
}
}
else if (sz == ARGSIZE_F) {
if (p->isUsed()) {
Register rp = p->getReg();
if (!isKnownReg(rp) || !IsFpReg(rp)) {
// load it into the arg reg
int d = findMemFor(p);
LFD(r, d, FP);
} else {
// it must be in a saved reg
NanoAssert(IsFpReg(r) && IsFpReg(rp));
FMR(r, rp);
}
}
else {
// this is the last use, so fine to assign it
// to the scratch reg, it's dead after this point.
findSpecificRegFor(p, r);
}
}
else {
TODO(ARGSIZE_UNK);
}
}
void Assembler::asm_spill(Register rr, int d, bool /* pop */, bool quad) {
(void)quad;
if (d) {
if (IsFpReg(rr)) {
NanoAssert(quad);
STFD(rr, d, FP);
}
#ifdef NANOJIT_64BIT
else if (quad) {
STD(rr, d, FP);
}
#endif
else {
NanoAssert(!quad);
STW(rr, d, FP);
}
}
}
void Assembler::asm_arith(LIns *ins) {
LOpcode op = ins->opcode();
LInsp lhs = ins->oprnd1();
LInsp rhs = ins->oprnd2();
RegisterMask allow = GpRegs;
Register rr = prepResultReg(ins, allow);
Register ra = findRegFor(lhs, GpRegs);
if (rhs->isconst()) {
int32_t rhsc = rhs->imm32();
if (isS16(rhsc)) {
// ppc arith immediate ops sign-exted the imm16 value
switch (op) {
case LIR_add:
case LIR_iaddp:
IF_64BIT(case LIR_qiadd:)
IF_64BIT(case LIR_qaddp:)
ADDI(rr, ra, rhsc);
return;
case LIR_sub:
SUBI(rr, ra, rhsc);
return;
case LIR_mul:
MULLI(rr, ra, rhsc);
return;
}
}
if (isU16(rhsc)) {
// ppc logical immediate zero-extend the imm16 value
switch (op) {
IF_64BIT(case LIR_qior:)
case LIR_or:
ORI(rr, ra, rhsc);
return;
IF_64BIT(case LIR_qiand:)
case LIR_and:
ANDI(rr, ra, rhsc);
return;
IF_64BIT(case LIR_qxor:)
case LIR_xor:
XORI(rr, ra, rhsc);
return;
}
}
// LIR shift ops only use last 5bits of shift const
switch (op) {
case LIR_lsh:
SLWI(rr, ra, rhsc&31);
return;
case LIR_ush:
SRWI(rr, ra, rhsc&31);
return;
case LIR_rsh:
SRAWI(rr, ra, rhsc&31);
return;
}
}
// general case, put rhs in register
Register rb = rhs==lhs ? ra : findRegFor(rhs, GpRegs&~rmask(ra));
switch (op) {
IF_64BIT(case LIR_qiadd:)
IF_64BIT(case LIR_qaddp:)
case LIR_add:
case LIR_iaddp:
ADD(rr, ra, rb);
break;
IF_64BIT(case LIR_qiand:)
case LIR_and:
AND(rr, ra, rb);
break;
IF_64BIT(case LIR_qior:)
case LIR_or:
OR(rr, ra, rb);
break;
IF_64BIT(case LIR_qxor:)
case LIR_xor:
XOR(rr, ra, rb);
break;
case LIR_sub: SUBF(rr, rb, ra); break;
case LIR_lsh: SLW(rr, ra, R0); ANDI(R0, rb, 31); break;
case LIR_rsh: SRAW(rr, ra, R0); ANDI(R0, rb, 31); break;
case LIR_ush: SRW(rr, ra, R0); ANDI(R0, rb, 31); break;
case LIR_mul: MULLW(rr, ra, rb); break;
#ifdef NANOJIT_64BIT
case LIR_qilsh:
SLD(rr, ra, R0);
ANDI(R0, rb, 63);
break;
case LIR_qursh:
SRD(rr, ra, R0);
ANDI(R0, rb, 63);
break;
case LIR_qirsh:
SRAD(rr, ra, R0);
ANDI(R0, rb, 63);
TODO(qirsh);
break;
#endif
default:
debug_only(outputf("%s",lirNames[op]);)
TODO(asm_arith);
}
}
void Assembler::asm_fop(LIns *ins) {
LOpcode op = ins->opcode();
LInsp lhs = ins->oprnd1();
LInsp rhs = ins->oprnd2();
RegisterMask allow = FpRegs;
Register rr = prepResultReg(ins, allow);
Register ra, rb;
findRegFor2(allow, lhs, ra, rhs, rb);
switch (op) {
case LIR_fadd: FADD(rr, ra, rb); break;
case LIR_fsub: FSUB(rr, ra, rb); break;
case LIR_fmul: FMUL(rr, ra, rb); break;
case LIR_fdiv: FDIV(rr, ra, rb); break;
default:
debug_only(outputf("%s",lirNames[op]);)
TODO(asm_fop);
}
}
void Assembler::asm_i2f(LIns *ins) {
Register r = prepResultReg(ins, FpRegs);
Register v = findRegFor(ins->oprnd1(), GpRegs);
const int d = 16; // natural aligned
#if defined NANOJIT_64BIT && !PEDANTIC
FCFID(r, r); // convert to double
LFD(r, d, SP); // load into fpu register
STD(v, d, SP); // save int64
EXTSW(v, v); // extend sign destructively, ok since oprnd1 only is 32bit
#else
FSUB(r, r, F0);
LFD(r, d, SP); // scratch area in outgoing linkage area
STW(R0, d+4, SP);
XORIS(R0, v, 0x8000);
LFD(F0, d, SP);
STW(R0, d+4, SP);
LIS(R0, 0x8000);
STW(R0, d, SP);
LIS(R0, 0x4330);
#endif
}
void Assembler::asm_u2f(LIns *ins) {
Register r = prepResultReg(ins, FpRegs);
Register v = findRegFor(ins->oprnd1(), GpRegs);
const int d = 16;
#if defined NANOJIT_64BIT && !PEDANTIC
FCFID(r, r); // convert to double
LFD(r, d, SP); // load into fpu register
STD(v, d, SP); // save int64
CLRLDI(v, v, 32); // zero-extend destructively
#else
FSUB(r, r, F0);
LFD(F0, d, SP);
STW(R0, d+4, SP);
LI(R0, 0);
LFD(r, d, SP);
STW(v, d+4, SP);
STW(R0, d, SP);
LIS(R0, 0x4330);
#endif
}
void Assembler::asm_promote(LIns *ins) {
LOpcode op = ins->opcode();
Register r = prepResultReg(ins, GpRegs);
Register v = findRegFor(ins->oprnd1(), GpRegs);
switch (op) {
default:
debug_only(outputf("%s",lirNames[op]));
TODO(asm_promote);
case LIR_u2q:
CLRLDI(r, v, 32); // clears the top 32 bits
break;
case LIR_i2q:
EXTSW(r, v);
break;
}
}
void Assembler::asm_quad(LIns *ins) {
#ifdef NANOJIT_64BIT
Register r = ins->getReg();
if (isKnownReg(r) && (rmask(r) & FpRegs)) {
// FPR already assigned, fine, use it
freeRsrcOf(ins, false);
} else {
// use a GPR register; its okay to copy doubles with GPR's
// but *not* okay to copy non-doubles with FPR's
r = prepResultReg(ins, GpRegs);
}
#else
Register r = prepResultReg(ins, FpRegs);
#endif
if (rmask(r) & FpRegs) {
union {
double d;
struct {
int32_t hi, lo;
} w;
};
d = ins->imm64f();
LFD(r, 12, SP);
STW(R0, 12, SP);
asm_li(R0, w.hi);
STW(R0, 16, SP);
asm_li(R0, w.lo);
}
else {
int64_t q = ins->imm64();
if (isS32(q)) {
asm_li(r, int32_t(q));
return;
}
RLDIMI(r,R0,32,0); // or 32,32?
asm_li(R0, int32_t(q>>32)); // hi bits into R0
asm_li(r, int32_t(q)); // lo bits into dest reg
}
}
void Assembler::br(NIns* addr, int link) {
// destination unknown, then use maximum branch possible
if (!addr) {
br_far(addr,link);
return;
}
// powerpc offsets are based on the address of the branch instruction
underrunProtect(4); // ensure _nIns is addr of Bx
ptrdiff_t offset = addr - (_nIns-1); // we want ptr diff's implicit >>2 here
#if !PEDANTIC
if (isS24(offset)) {
Bx(offset, 0, link); // b addr or bl addr
return;
}
ptrdiff_t absaddr = addr - (NIns*)0; // ptr diff implies >>2
if (isS24(absaddr)) {
Bx(absaddr, 1, link); // ba addr or bla addr
return;
}
#endif // !PEDANTIC
br_far(addr,link);
}
void Assembler::br_far(NIns* addr, int link) {
// far jump.
// can't have a page break in this sequence, because the break
// would also clobber ctr and r2. We use R2 here because it's not available
// to the register allocator, and we use R0 everywhere else as scratch, so using
// R2 here avoids clobbering anything else besides CTR.
#ifdef NANOJIT_64BIT
if (addr==0 || !isU32(uintptr_t(addr))) {
// really far jump to 64bit abs addr
underrunProtect(28); // 7 instructions
BCTR(link);
MTCTR(R2);
asm_li64(R2, uintptr_t(addr)); // 5 instructions
return;
}
#endif
underrunProtect(16);
BCTR(link);
MTCTR(R2);
asm_li32(R2, uint32_t(uintptr_t(addr))); // 2 instructions
}
void Assembler::underrunProtect(int bytes) {
NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
int instr = (bytes + sizeof(NIns) - 1) / sizeof(NIns);
NIns *pc = _nIns;
NIns *top = codeStart; // this may be in a normal code chunk or an exit code chunk
#if PEDANTIC
// pedanticTop is based on the last call to underrunProtect; any time we call
// underrunProtect and would use more than what's already protected, then insert
// a page break jump. Sometimes, this will be to a new page, usually it's just
// the next instruction and the only effect is to clobber R2 & CTR
NanoAssert(pedanticTop >= top);
if (pc - instr < pedanticTop) {
// no page break required, but insert a far branch anyway just to be difficult
#ifdef NANOJIT_64BIT
const int br_size = 7;
#else
const int br_size = 4;
#endif
if (pc - instr - br_size < top) {
// really do need a page break
verbose_only(if (_logc->lcbits & LC_Assembly) outputf("newpage %p:", pc);)
codeAlloc();
}
// now emit the jump, but make sure we won't need another page break.
// we're pedantic, but not *that* pedantic.
pedanticTop = _nIns - br_size;
br(pc, 0);
pedanticTop = _nIns - instr;
}
#else
if (pc - instr < top) {
verbose_only(if (_logc->lcbits & LC_Assembly) outputf("newpage %p:", pc);)
// This may be in a normal code chunk or an exit code chunk.
codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
// This jump will call underrunProtect again, but since we're on a new
// page, nothing will happen.
br(pc, 0);
}
#endif
}
void Assembler::asm_cmov(LIns *ins) {
NanoAssert(ins->isop(LIR_cmov) || ins->isop(LIR_qcmov));
LIns* cond = ins->oprnd1();
LIns* iftrue = ins->oprnd2();
LIns* iffalse = ins->oprnd3();
NanoAssert(cond->isCmp());
NanoAssert(iftrue->isQuad() == iffalse->isQuad());
// fixme: we could handle fpu registers here, too, since we're just branching
Register rr = prepResultReg(ins, GpRegs);
findSpecificRegFor(iftrue, rr);
Register rf = findRegFor(iffalse, GpRegs & ~rmask(rr));
NIns *after = _nIns;
verbose_only(if (_logc->lcbits & LC_Assembly) outputf("%p:",after);)
MR(rr, rf);
asm_branch(false, cond, after);
}
RegisterMask Assembler::hint(LIns *i, RegisterMask allow) {
LOpcode op = i->opcode();
RegisterMask prefer = ~0LL;
if (op == LIR_icall || op == LIR_qcall)
prefer = rmask(R3);
else if (op == LIR_fcall)
prefer = rmask(F1);
else if (op == LIR_param) {
if (i->paramArg() < 8) {
prefer = rmask(argRegs[i->paramArg()]);
}
}
// narrow the allow set to whatever is preferred and also free
if (_allocator.free & allow & prefer)
allow &= prefer;
return allow;
}
void Assembler::asm_neg_not(LIns *ins) {
Register rr = prepResultReg(ins, GpRegs);
Register ra = findRegFor(ins->oprnd1(), GpRegs);
if (ins->isop(LIR_neg)) {
NEG(rr, ra);
} else {
NOT(rr, ra);
}
}
void Assembler::asm_qlo(LIns *ins) {
Register rr = prepResultReg(ins, GpRegs);
int d = findMemFor(ins->oprnd1());
LWZ(rr, d+4, FP);
}
void Assembler::asm_qhi(LIns *ins) {
Register rr = prepResultReg(ins, GpRegs);
int d = findMemFor(ins->oprnd1());
LWZ(rr, d, FP);
TODO(asm_qhi);
}
void Assembler::nInit(AvmCore*) {
}
void Assembler::nBeginAssembly() {
max_param_size = 0;
}
void Assembler::nativePageSetup() {
NanoAssert(!_inExit);
if (!_nIns) {
codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
IF_PEDANTIC( pedanticTop = _nIns; )
}
if (!_nExitIns) {
codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
}
}
void Assembler::nativePageReset()
{}
// Increment the 32-bit profiling counter at pCtr, without
// changing any registers.
verbose_only(
void Assembler::asm_inc_m32(uint32_t* /*pCtr*/)
{
}
)
void Assembler::nPatchBranch(NIns *branch, NIns *target) {
// ppc relative offsets are based on the addr of the branch instruction
ptrdiff_t bd = target - branch;
if (branch[0] == PPC_b) {
// unconditional, 24bit offset. Whoever generated the unpatched jump
// must have known the final size would fit in 24bits! otherwise the
// jump would be (lis,ori,mtctr,bctr) and we'd be patching the lis,ori.
NanoAssert(isS24(bd));
branch[0] |= (bd & 0xffffff) << 2;
}
else if ((branch[0] & PPC_bc) == PPC_bc) {
// conditional, 14bit offset. Whoever generated the unpatched jump
// must have known the final size would fit in 14bits! otherwise the
// jump would be (lis,ori,mtctr,bcctr) and we'd be patching the lis,ori below.
NanoAssert(isS14(bd));
NanoAssert(((branch[0] & 0x3fff)<<2) == 0);
branch[0] |= (bd & 0x3fff) << 2;
TODO(patch_bc);
}
#ifdef NANOJIT_64BIT
// patch 64bit branch
else if ((branch[0] & ~(31<<21)) == PPC_addis) {
// general branch, using lis,ori,sldi,oris,ori to load the const 64bit addr.
Register rd = Register((branch[0] >> 21) & 31);
NanoAssert(branch[1] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16);
NanoAssert(branch[3] == PPC_oris | GPR(rd)<<21 | GPR(rd)<<16);
NanoAssert(branch[4] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16);
uint64_t imm = uintptr_t(target);
uint32_t lo = uint32_t(imm);
uint32_t hi = uint32_t(imm>>32);
branch[0] = PPC_addis | GPR(rd)<<21 | uint16_t(hi>>16);
branch[1] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(hi);
branch[3] = PPC_oris | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(lo>>16);
branch[4] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(lo);
}
#else // NANOJIT_64BIT
// patch 32bit branch
else if ((branch[0] & ~(31<<21)) == PPC_addis) {
// general branch, using lis,ori to load the const addr.
// patch a lis,ori sequence with a 32bit value
Register rd = Register((branch[0] >> 21) & 31);
NanoAssert(branch[1] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16);
uint32_t imm = uint32_t(target);
branch[0] = PPC_addis | GPR(rd)<<21 | uint16_t(imm >> 16); // lis rd, imm >> 16
branch[1] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(imm); // ori rd, rd, imm & 0xffff
}
#endif // !NANOJIT_64BIT
else {
TODO(unknown_patch);
}
}
static int cntzlw(int set) {
// On PowerPC, prefer higher registers, to minimize
// size of nonvolatile area that must be saved.
register Register i;
#ifdef __GNUC__
asm ("cntlzw %0,%1" : "=r" (i) : "r" (set));
#else // __GNUC__
# error("unsupported compiler")
#endif // __GNUC__
return 31-i;
}
Register Assembler::nRegisterAllocFromSet(RegisterMask set) {
Register i;
// note, deliberate truncation of 64->32 bits
if (set & 0xffffffff) {
i = Register(cntzlw(int(set))); // gp reg
} else {
i = Register(32+cntzlw(int(set>>32))); // fp reg
}
_allocator.free &= ~rmask(i);
return i;
}
void Assembler::nRegisterResetAll(RegAlloc ®s) {
regs.clear();
regs.free = SavedRegs | 0x1ff8 /* R3-12 */ | 0x3ffe00000000LL /* F1-13 */;
debug_only(regs.managed = regs.free);
}
#ifdef NANOJIT_64BIT
void Assembler::asm_qbinop(LIns *ins) {
LOpcode op = ins->opcode();
switch (op) {
case LIR_qaddp:
case LIR_qior:
case LIR_qiand:
case LIR_qursh:
case LIR_qirsh:
case LIR_qilsh:
case LIR_qxor:
case LIR_qiadd:
asm_arith(ins);
break;
default:
debug_only(outputf("%s",lirNames[op]));
TODO(asm_qbinop);
}
}
#endif // NANOJIT_64BIT
void Assembler::nFragExit(LIns*) {
TODO(nFragExit);
}
void Assembler::asm_jtbl(LIns* ins, NIns** native_table)
{
// R0 = index*4, R2 = table, CTR = computed address to jump to.
// must ensure no page breaks in here because R2 & CTR can get clobbered.
Register indexreg = findRegFor(ins->oprnd1(), GpRegs);
#ifdef NANOJIT_64BIT
underrunProtect(9*4);
BCTR(0); // jump to address in CTR
MTCTR(R2); // CTR = R2
LDX(R2, R2, R0); // R2 = [table + index*8]
SLDI(R0, indexreg, 3); // R0 = index*8
asm_li64(R2, uint64_t(native_table)); // R2 = table (5 instr)
#else // 64bit
underrunProtect(6*4);
BCTR(0); // jump to address in CTR
MTCTR(R2); // CTR = R2
LWZX(R2, R2, R0); // R2 = [table + index*4]
SLWI(R0, indexreg, 2); // R0 = index*4
asm_li(R2, int32_t(native_table)); // R2 = table (up to 2 instructions)
#endif // 64bit
}
void Assembler::swapCodeChunks() {
SWAP(NIns*, _nIns, _nExitIns);
SWAP(NIns*, codeStart, exitStart);
SWAP(NIns*, codeEnd, exitEnd);
verbose_only( SWAP(size_t, codeBytes, exitBytes); )
}
} // namespace nanojit
#endif // FEATURE_NANOJIT && NANOJIT_PPC