// [AsmJit] // Complete x86/x64 JIT and Remote Assembler for C++. // // [License] // Zlib - See LICENSE.md file in the package. // [Export] #define ASMJIT_EXPORTS // [Guard] #include "../asmjit_build.h" #if defined(ASMJIT_BUILD_X86) && !defined(ASMJIT_DISABLE_COMPILER) // [Dependencies] #include "../base/cpuinfo.h" #include "../base/utils.h" #include "../x86/x86assembler.h" #include "../x86/x86compiler.h" #include "../x86/x86internal_p.h" #include "../x86/x86regalloc_p.h" // [Api-Begin] #include "../asmjit_apibegin.h" namespace asmjit { // ============================================================================ // [Forward Declarations] // ============================================================================ enum { kCompilerDefaultLookAhead = 64 }; static Error X86RAPass_translateOperands(X86RAPass* self, Operand_* opArray, uint32_t opCount); // ============================================================================ // [asmjit::X86RAPass - SpecialInst] // ============================================================================ struct X86SpecialInst { uint8_t inReg; uint8_t outReg; uint16_t flags; }; static ASMJIT_INLINE const X86SpecialInst* X86SpecialInst_get(uint32_t instId, const Operand* opArray, uint32_t opCount) noexcept { enum { kAny = Globals::kInvalidRegId }; #define R(ri) { uint8_t(ri) , uint8_t(kAny), uint16_t(TiedReg::kRReg) } #define W(ri) { uint8_t(kAny), uint8_t(ri) , uint16_t(TiedReg::kWReg) } #define X(ri) { uint8_t(ri) , uint8_t(ri) , uint16_t(TiedReg::kXReg) } #define NONE() { uint8_t(kAny), uint8_t(kAny), 0 } static const X86SpecialInst instCpuid[] = { X(X86Gp::kIdAx), W(X86Gp::kIdBx), X(X86Gp::kIdCx), W(X86Gp::kIdDx) }; static const X86SpecialInst instCbwCdqeCwde[] = { X(X86Gp::kIdAx) }; static const X86SpecialInst instCdqCwdCqo[] = { W(X86Gp::kIdDx), R(X86Gp::kIdAx) }; static const X86SpecialInst instCmpxchg[] = { X(kAny), R(kAny), X(X86Gp::kIdAx) }; static const X86SpecialInst instCmpxchg8b16b[] = { NONE(), X(X86Gp::kIdDx), X(X86Gp::kIdAx), R(X86Gp::kIdCx), R(X86Gp::kIdBx) }; static const X86SpecialInst instDaaDas[] = { X(X86Gp::kIdAx) }; static const X86SpecialInst instDiv2[] = { X(X86Gp::kIdAx), R(kAny) }; static const X86SpecialInst instDiv3[] = { X(X86Gp::kIdDx), X(X86Gp::kIdAx), R(kAny) }; static const X86SpecialInst instJecxz[] = { R(X86Gp::kIdCx) }; static const X86SpecialInst instMul2[] = { X(X86Gp::kIdAx), R(kAny) }; static const X86SpecialInst instMul3[] = { W(X86Gp::kIdDx), X(X86Gp::kIdAx), R(kAny) }; static const X86SpecialInst instMulx[] = { W(kAny), W(kAny), R(kAny), R(X86Gp::kIdDx) }; static const X86SpecialInst instLahf[] = { W(X86Gp::kIdAx) }; static const X86SpecialInst instSahf[] = { R(X86Gp::kIdAx) }; static const X86SpecialInst instMaskmovq[] = { R(kAny), R(kAny), R(X86Gp::kIdDi) }; static const X86SpecialInst instRdtscRdtscp[] = { W(X86Gp::kIdDx), W(X86Gp::kIdAx), W(X86Gp::kIdCx) }; static const X86SpecialInst instRot[] = { X(kAny), R(X86Gp::kIdCx) }; static const X86SpecialInst instShldShrd[] = { X(kAny), R(kAny), R(X86Gp::kIdCx) }; static const X86SpecialInst instThirdXMM0[] = { W(kAny), R(kAny), R(0) }; static const X86SpecialInst instPcmpestri[] = { R(kAny), R(kAny), NONE(), W(X86Gp::kIdCx) }; static const X86SpecialInst instPcmpestrm[] = { R(kAny), R(kAny), NONE(), W(0) }; static const X86SpecialInst instPcmpistri[] = { R(kAny), R(kAny), NONE(), W(X86Gp::kIdCx), R(X86Gp::kIdAx), R(X86Gp::kIdDx) }; static const X86SpecialInst instPcmpistrm[] = { R(kAny), R(kAny), NONE(), W(0) , R(X86Gp::kIdAx), R(X86Gp::kIdDx) }; static const X86SpecialInst instXsaveXrstor[] = { W(kAny), R(X86Gp::kIdDx), R(X86Gp::kIdAx) }; static const X86SpecialInst instReadMR[] = { W(X86Gp::kIdDx), W(X86Gp::kIdAx), R(X86Gp::kIdCx) }; static const X86SpecialInst instWriteMR[] = { R(X86Gp::kIdDx), R(X86Gp::kIdAx), R(X86Gp::kIdCx) }; static const X86SpecialInst instCmps[] = { X(X86Gp::kIdSi), X(X86Gp::kIdDi) }; static const X86SpecialInst instLods[] = { W(X86Gp::kIdAx), X(X86Gp::kIdSi) }; static const X86SpecialInst instMovs[] = { X(X86Gp::kIdDi), X(X86Gp::kIdSi) }; static const X86SpecialInst instScas[] = { X(X86Gp::kIdDi), R(X86Gp::kIdAx) }; static const X86SpecialInst instStos[] = { X(X86Gp::kIdDi), R(X86Gp::kIdAx) }; #undef NONE #undef X #undef W #undef R switch (instId) { case X86Inst::kIdCpuid : return instCpuid; case X86Inst::kIdCbw : case X86Inst::kIdCdqe : case X86Inst::kIdCwde : return instCbwCdqeCwde; case X86Inst::kIdCdq : case X86Inst::kIdCwd : case X86Inst::kIdCqo : return instCdqCwdCqo; case X86Inst::kIdCmps : return instCmps; case X86Inst::kIdCmpxchg : return instCmpxchg; case X86Inst::kIdCmpxchg8b : case X86Inst::kIdCmpxchg16b : return instCmpxchg8b16b; case X86Inst::kIdDaa : case X86Inst::kIdDas : return instDaaDas; case X86Inst::kIdDiv : return (opCount == 2) ? instDiv2 : instDiv3; case X86Inst::kIdIdiv : return (opCount == 2) ? instDiv2 : instDiv3; case X86Inst::kIdImul : if (opCount == 2) return nullptr; if (opCount == 3 && !(opArray[0].isReg() && opArray[1].isReg() && opArray[2].isRegOrMem())) return nullptr; ASMJIT_FALLTHROUGH; case X86Inst::kIdMul : return (opCount == 2) ? instMul2 : instMul3; case X86Inst::kIdMulx : return instMulx; case X86Inst::kIdJecxz : return instJecxz; case X86Inst::kIdLods : return instLods; case X86Inst::kIdMovs : return instMovs; case X86Inst::kIdLahf : return instLahf; case X86Inst::kIdSahf : return instSahf; case X86Inst::kIdMaskmovq : case X86Inst::kIdMaskmovdqu : case X86Inst::kIdVmaskmovdqu: return instMaskmovq; case X86Inst::kIdEnter : return nullptr; // Not supported. case X86Inst::kIdLeave : return nullptr; // Not supported. case X86Inst::kIdRet : return nullptr; // Not supported. case X86Inst::kIdMonitor : return nullptr; // TODO: [COMPILER] Monitor/MWait. case X86Inst::kIdMwait : return nullptr; // TODO: [COMPILER] Monitor/MWait. case X86Inst::kIdPop : return nullptr; // TODO: [COMPILER] Pop/Push. case X86Inst::kIdPush : return nullptr; // TODO: [COMPILER] Pop/Push. case X86Inst::kIdPopa : return nullptr; // Not supported. case X86Inst::kIdPopf : return nullptr; // Not supported. case X86Inst::kIdPusha : return nullptr; // Not supported. case X86Inst::kIdPushf : return nullptr; // Not supported. case X86Inst::kIdRcl : case X86Inst::kIdRcr : case X86Inst::kIdRol : case X86Inst::kIdRor : case X86Inst::kIdSal : case X86Inst::kIdSar : case X86Inst::kIdShl : // Rot instruction is special only if the last operand is a variable. case X86Inst::kIdShr : if (!opArray[1].isReg()) return nullptr; return instRot; case X86Inst::kIdShld : // Shld/Shrd instruction is special only if the last operand is a variable. case X86Inst::kIdShrd : if (!opArray[2].isReg()) return nullptr; return instShldShrd; case X86Inst::kIdRdtsc : case X86Inst::kIdRdtscp : return instRdtscRdtscp; case X86Inst::kIdScas : return instScas; case X86Inst::kIdStos : return instStos; case X86Inst::kIdBlendvpd : case X86Inst::kIdBlendvps : case X86Inst::kIdPblendvb : case X86Inst::kIdSha256rnds2: return instThirdXMM0; case X86Inst::kIdPcmpestri : case X86Inst::kIdVpcmpestri : return instPcmpestri; case X86Inst::kIdPcmpistri : case X86Inst::kIdVpcmpistri : return instPcmpistri; case X86Inst::kIdPcmpestrm : case X86Inst::kIdVpcmpestrm : return instPcmpestrm; case X86Inst::kIdPcmpistrm : case X86Inst::kIdVpcmpistrm : return instPcmpistrm; case X86Inst::kIdXrstor : case X86Inst::kIdXrstor64 : case X86Inst::kIdXsave : case X86Inst::kIdXsave64 : case X86Inst::kIdXsaveopt : case X86Inst::kIdXsaveopt64 : return instXsaveXrstor; case X86Inst::kIdRdmsr : case X86Inst::kIdRdpmc : case X86Inst::kIdXgetbv : return instReadMR; case X86Inst::kIdWrmsr : case X86Inst::kIdXsetbv : return instWriteMR; default : return nullptr; } } // ============================================================================ // [asmjit::X86RAPass - Construction / Destruction] // ============================================================================ X86RAPass::X86RAPass() noexcept : RAPass() { _state = &_x86State; _varMapToVaListOffset = ASMJIT_OFFSET_OF(X86RAData, tiedArray); } X86RAPass::~X86RAPass() noexcept {} // ============================================================================ // [asmjit::X86RAPass - Interface] // ============================================================================ Error X86RAPass::process(Zone* zone) noexcept { return Base::process(zone); } Error X86RAPass::prepare(CCFunc* func) noexcept { ASMJIT_PROPAGATE(Base::prepare(func)); uint32_t archType = cc()->getArchType(); _regCount._gp = archType == ArchInfo::kTypeX86 ? 8 : 16; _regCount._mm = 8; _regCount._k = 8; _regCount._vec = archType == ArchInfo::kTypeX86 ? 8 : 16; _zsp = cc()->zsp(); _zbp = cc()->zbp(); _gaRegs[X86Reg::kKindGp ] = Utils::bits(_regCount.getGp()) & ~Utils::mask(X86Gp::kIdSp); _gaRegs[X86Reg::kKindMm ] = Utils::bits(_regCount.getMm()); _gaRegs[X86Reg::kKindK ] = Utils::bits(_regCount.getK()); _gaRegs[X86Reg::kKindVec] = Utils::bits(_regCount.getVec()); _x86State.reset(0); _clobberedRegs.reset(); _avxEnabled = false; _varBaseRegId = Globals::kInvalidRegId; // Used by patcher. _varBaseOffset = 0; // Used by patcher. return kErrorOk; } // ============================================================================ // [asmjit::X86RAPass - Emit] // ============================================================================ Error X86RAPass::emitMove(VirtReg* vReg, uint32_t dstId, uint32_t srcId, const char* reason) { const char* comment = nullptr; if (_emitComments) { _stringBuilder.setFormat("[%s] %s", reason, vReg->getName()); comment = _stringBuilder.getData(); } X86Reg dst(X86Reg::fromSignature(vReg->getSignature(), dstId)); X86Reg src(X86Reg::fromSignature(vReg->getSignature(), srcId)); return X86Internal::emitRegMove(reinterpret_cast(cc()), dst, src, vReg->getTypeId(), _avxEnabled, comment); } Error X86RAPass::emitLoad(VirtReg* vReg, uint32_t id, const char* reason) { const char* comment = nullptr; if (_emitComments) { _stringBuilder.setFormat("[%s] %s", reason, vReg->getName()); comment = _stringBuilder.getData(); } X86Reg dst(X86Reg::fromSignature(vReg->getSignature(), id)); X86Mem src(getVarMem(vReg)); return X86Internal::emitRegMove(reinterpret_cast(cc()), dst, src, vReg->getTypeId(), _avxEnabled, comment); } Error X86RAPass::emitSave(VirtReg* vReg, uint32_t id, const char* reason) { const char* comment = nullptr; if (_emitComments) { _stringBuilder.setFormat("[%s] %s", reason, vReg->getName()); comment = _stringBuilder.getData(); } X86Mem dst(getVarMem(vReg)); X86Reg src(X86Reg::fromSignature(vReg->getSignature(), id)); return X86Internal::emitRegMove(reinterpret_cast(cc()), dst, src, vReg->getTypeId(), _avxEnabled, comment); } Error X86RAPass::emitSwapGp(VirtReg* dstReg, VirtReg* srcReg, uint32_t dstPhysId, uint32_t srcPhysId, const char* reason) noexcept { ASMJIT_ASSERT(dstPhysId != Globals::kInvalidRegId); ASMJIT_ASSERT(srcPhysId != Globals::kInvalidRegId); uint32_t is64 = std::max(dstReg->getTypeId(), srcReg->getTypeId()) >= TypeId::kI64; uint32_t sign = is64 ? uint32_t(X86RegTraits::kSignature) : uint32_t(X86RegTraits::kSignature); X86Reg a = X86Reg::fromSignature(sign, dstPhysId); X86Reg b = X86Reg::fromSignature(sign, srcPhysId); ASMJIT_PROPAGATE(cc()->emit(X86Inst::kIdXchg, a, b)); if (_emitComments) cc()->getCursor()->setInlineComment(cc()->_cbDataZone.sformat("[%s] %s, %s", reason, dstReg->getName(), srcReg->getName())); return kErrorOk; } Error X86RAPass::emitImmToReg(uint32_t dstTypeId, uint32_t dstPhysId, const Imm* src) noexcept { ASMJIT_ASSERT(dstPhysId != Globals::kInvalidRegId); X86Reg r0; Imm imm(*src); switch (dstTypeId) { case TypeId::kI8: case TypeId::kU8: imm.truncateTo8Bits(); ASMJIT_FALLTHROUGH; case TypeId::kI16: case TypeId::kU16: imm.truncateTo16Bits(); ASMJIT_FALLTHROUGH; case TypeId::kI32: case TypeId::kU32: Mov32Truncate: imm.truncateTo32Bits(); r0.setX86RegT(dstPhysId); cc()->emit(X86Inst::kIdMov, r0, imm); break; case TypeId::kI64: case TypeId::kU64: // Move to GPD register will also clear the high DWORD of GPQ // register in 64-bit mode. if (imm.isUInt32()) goto Mov32Truncate; r0.setX86RegT(dstPhysId); cc()->emit(X86Inst::kIdMov, r0, imm); break; case TypeId::kF32: case TypeId::kF64: // Compiler doesn't manage FPU stack. ASMJIT_NOT_REACHED(); break; case TypeId::kMmx32: case TypeId::kMmx64: // TODO: [COMPILER] EmitMoveImmToReg. break; default: // TODO: [COMPILER] EmitMoveImmToReg. break; } return kErrorOk; } Error X86RAPass::emitImmToStack(uint32_t dstTypeId, const X86Mem* dst, const Imm* src) noexcept { X86Mem mem(*dst); Imm imm(*src); // One stack entry has the same size as the native register size. That means // that if we want to move a 32-bit integer on the stack in 64-bit mode, we // need to extend it to a 64-bit integer first. In 32-bit mode, pushing a // 64-bit on stack is done in two steps by pushing low and high parts // separately. uint32_t gpSize = cc()->getGpSize(); switch (dstTypeId) { case TypeId::kI8: case TypeId::kU8: imm.truncateTo8Bits(); ASMJIT_FALLTHROUGH; case TypeId::kI16: case TypeId::kU16: imm.truncateTo16Bits(); ASMJIT_FALLTHROUGH; case TypeId::kI32: case TypeId::kU32: case TypeId::kF32: mem.setSize(4); imm.truncateTo32Bits(); cc()->emit(X86Inst::kIdMov, mem, imm); break; case TypeId::kI64: case TypeId::kU64: case TypeId::kF64: case TypeId::kMmx32: case TypeId::kMmx64: if (gpSize == 4) { uint32_t hi = imm.getUInt32Hi(); // Lo-Part. mem.setSize(4); imm.truncateTo32Bits(); cc()->emit(X86Inst::kIdMov, mem, imm); mem.addOffsetLo32(gpSize); // Hi-Part. imm.setUInt32(hi); cc()->emit(X86Inst::kIdMov, mem, imm); } else { mem.setSize(8); cc()->emit(X86Inst::kIdMov, mem, imm); } break; default: return DebugUtils::errored(kErrorInvalidState); } return kErrorOk; } Error X86RAPass::emitRegToStack(uint32_t dstTypeId, const X86Mem* dst, uint32_t srcTypeId, uint32_t srcPhysId) noexcept { ASMJIT_ASSERT(srcPhysId != Globals::kInvalidRegId); X86Mem m0(*dst); X86Reg r0, r1; uint32_t gpSize = cc()->getGpSize(); uint32_t instId = 0; switch (dstTypeId) { case TypeId::kI64: case TypeId::kU64: // Extend BYTE->QWORD (GP). if (TypeId::isGpb(srcTypeId)) { r1.setX86RegT(srcPhysId); instId = (dstTypeId == TypeId::kI64 && srcTypeId == TypeId::kI8) ? X86Inst::kIdMovsx : X86Inst::kIdMovzx; goto _ExtendMovGpXQ; } // Extend WORD->QWORD (GP). if (TypeId::isGpw(srcTypeId)) { r1.setX86RegT(srcPhysId); instId = (dstTypeId == TypeId::kI64 && srcTypeId == TypeId::kI16) ? X86Inst::kIdMovsx : X86Inst::kIdMovzx; goto _ExtendMovGpXQ; } // Extend DWORD->QWORD (GP). if (TypeId::isGpd(srcTypeId)) { r1.setX86RegT(srcPhysId); instId = X86Inst::kIdMovsxd; if (dstTypeId == TypeId::kI64 && srcTypeId == TypeId::kI32) goto _ExtendMovGpXQ; else goto _ZeroExtendGpDQ; } // Move QWORD (GP). if (TypeId::isGpq(srcTypeId)) goto MovGpQ; if (TypeId::isMmx(srcTypeId)) goto MovMmQ; if (TypeId::isVec(srcTypeId)) goto MovXmmQ; break; case TypeId::kI32: case TypeId::kU32: case TypeId::kI16: case TypeId::kU16: // DWORD <- WORD (Zero|Sign Extend). if (TypeId::isGpw(srcTypeId)) { bool isDstSigned = dstTypeId == TypeId::kI16 || dstTypeId == TypeId::kI32; bool isSrcSigned = srcTypeId == TypeId::kI8 || srcTypeId == TypeId::kI16; r1.setX86RegT(srcPhysId); instId = isDstSigned && isSrcSigned ? X86Inst::kIdMovsx : X86Inst::kIdMovzx; goto _ExtendMovGpD; } // DWORD <- BYTE (Zero|Sign Extend). if (TypeId::isGpb(srcTypeId)) { bool isDstSigned = dstTypeId == TypeId::kI16 || dstTypeId == TypeId::kI32; bool isSrcSigned = srcTypeId == TypeId::kI8 || srcTypeId == TypeId::kI16; r1.setX86RegT(srcPhysId); instId = isDstSigned && isSrcSigned ? X86Inst::kIdMovsx : X86Inst::kIdMovzx; goto _ExtendMovGpD; } ASMJIT_FALLTHROUGH; case TypeId::kI8: case TypeId::kU8: if (TypeId::isInt(srcTypeId)) goto MovGpD; if (TypeId::isMmx(srcTypeId)) goto MovMmD; if (TypeId::isVec(srcTypeId)) goto MovXmmD; break; case TypeId::kMmx32: case TypeId::kMmx64: // Extend BYTE->QWORD (GP). if (TypeId::isGpb(srcTypeId)) { r1.setX86RegT(srcPhysId); instId = X86Inst::kIdMovzx; goto _ExtendMovGpXQ; } // Extend WORD->QWORD (GP). if (TypeId::isGpw(srcTypeId)) { r1.setX86RegT(srcPhysId); instId = X86Inst::kIdMovzx; goto _ExtendMovGpXQ; } if (TypeId::isGpd(srcTypeId)) goto _ExtendMovGpDQ; if (TypeId::isGpq(srcTypeId)) goto MovGpQ; if (TypeId::isMmx(srcTypeId)) goto MovMmQ; if (TypeId::isVec(srcTypeId)) goto MovXmmQ; break; case TypeId::kF32: case TypeId::kF32x1: if (TypeId::isVec(srcTypeId)) goto MovXmmD; break; case TypeId::kF64: case TypeId::kF64x1: if (TypeId::isVec(srcTypeId)) goto MovXmmQ; break; default: // TODO: Vector types by stack. break; } return DebugUtils::errored(kErrorInvalidState); // Extend+Move Gp. _ExtendMovGpD: m0.setSize(4); r0.setX86RegT(srcPhysId); cc()->emit(instId, r0, r1); cc()->emit(X86Inst::kIdMov, m0, r0); return kErrorOk; _ExtendMovGpXQ: if (gpSize == 8) { m0.setSize(8); r0.setX86RegT(srcPhysId); cc()->emit(instId, r0, r1); cc()->emit(X86Inst::kIdMov, m0, r0); } else { m0.setSize(4); r0.setX86RegT(srcPhysId); cc()->emit(instId, r0, r1); _ExtendMovGpDQ: cc()->emit(X86Inst::kIdMov, m0, r0); m0.addOffsetLo32(4); cc()->emit(X86Inst::kIdAnd, m0, 0); } return kErrorOk; _ZeroExtendGpDQ: m0.setSize(4); r0.setX86RegT(srcPhysId); goto _ExtendMovGpDQ; // Move Gp. MovGpD: m0.setSize(4); r0.setX86RegT(srcPhysId); return cc()->emit(X86Inst::kIdMov, m0, r0); MovGpQ: m0.setSize(8); r0.setX86RegT(srcPhysId); return cc()->emit(X86Inst::kIdMov, m0, r0); // Move Mm. MovMmD: m0.setSize(4); r0.setX86RegT(srcPhysId); return cc()->emit(X86Inst::kIdMovd, m0, r0); MovMmQ: m0.setSize(8); r0.setX86RegT(srcPhysId); return cc()->emit(X86Inst::kIdMovq, m0, r0); // Move XMM. MovXmmD: m0.setSize(4); r0.setX86RegT(srcPhysId); return cc()->emit(X86Inst::kIdMovss, m0, r0); MovXmmQ: m0.setSize(8); r0.setX86RegT(srcPhysId); return cc()->emit(X86Inst::kIdMovlps, m0, r0); } // ============================================================================ // [asmjit::X86RAPass - Register Management] // ============================================================================ #if defined(ASMJIT_DEBUG) template static ASMJIT_INLINE void X86RAPass_checkStateVars(X86RAPass* self) { X86RAState* state = self->getState(); VirtReg** sVars = state->getListByKind(C); uint32_t physId; uint32_t regMask; uint32_t regCount = self->_regCount.get(C); uint32_t occupied = state->_occupied.get(C); uint32_t modified = state->_modified.get(C); for (physId = 0, regMask = 1; physId < regCount; physId++, regMask <<= 1) { VirtReg* vreg = sVars[physId]; if (!vreg) { ASMJIT_ASSERT((occupied & regMask) == 0); ASMJIT_ASSERT((modified & regMask) == 0); } else { ASMJIT_ASSERT((occupied & regMask) != 0); ASMJIT_ASSERT((modified & regMask) == (static_cast(vreg->isModified()) << physId)); ASMJIT_ASSERT(vreg->getKind() == C); ASMJIT_ASSERT(vreg->getState() == VirtReg::kStateReg); ASMJIT_ASSERT(vreg->getPhysId() == physId); } } } void X86RAPass::_checkState() { X86RAPass_checkStateVars(this); X86RAPass_checkStateVars(this); X86RAPass_checkStateVars(this); } #else void X86RAPass::_checkState() {} #endif // ASMJIT_DEBUG // ============================================================================ // [asmjit::X86RAPass - State - Load] // ============================================================================ template static ASMJIT_INLINE void X86RAPass_loadStateVars(X86RAPass* self, X86RAState* src) { X86RAState* cur = self->getState(); VirtReg** cVars = cur->getListByKind(C); VirtReg** sVars = src->getListByKind(C); uint32_t physId; uint32_t modified = src->_modified.get(C); uint32_t regCount = self->_regCount.get(C); for (physId = 0; physId < regCount; physId++, modified >>= 1) { VirtReg* vreg = sVars[physId]; cVars[physId] = vreg; if (!vreg) continue; vreg->setState(VirtReg::kStateReg); vreg->setPhysId(physId); vreg->setModified(modified & 0x1); } } void X86RAPass::loadState(RAState* src_) { X86RAState* cur = getState(); X86RAState* src = static_cast(src_); VirtReg** vregs = _contextVd.getData(); uint32_t count = static_cast(_contextVd.getLength()); // Load allocated variables. X86RAPass_loadStateVars(this, src); X86RAPass_loadStateVars(this, src); X86RAPass_loadStateVars(this, src); // Load masks. cur->_occupied = src->_occupied; cur->_modified = src->_modified; // Load states of other variables and clear their 'Modified' flags. for (uint32_t i = 0; i < count; i++) { uint32_t vState = src->_cells[i].getState(); if (vState == VirtReg::kStateReg) continue; vregs[i]->setState(vState); vregs[i]->setPhysId(Globals::kInvalidRegId); vregs[i]->setModified(false); } ASMJIT_X86_CHECK_STATE } // ============================================================================ // [asmjit::X86RAPass - State - Save] // ============================================================================ RAState* X86RAPass::saveState() { VirtReg** vregs = _contextVd.getData(); uint32_t count = static_cast(_contextVd.getLength()); size_t size = Utils::alignTo( sizeof(X86RAState) + count * sizeof(X86StateCell), sizeof(void*)); X86RAState* cur = getState(); X86RAState* dst = _zone->allocT(size); if (!dst) return nullptr; // Store links. ::memcpy(dst->_list, cur->_list, X86RAState::kAllCount * sizeof(VirtReg*)); // Store masks. dst->_occupied = cur->_occupied; dst->_modified = cur->_modified; // Store cells. for (uint32_t i = 0; i < count; i++) { VirtReg* vreg = static_cast(vregs[i]); X86StateCell& cell = dst->_cells[i]; cell.reset(); cell.setState(vreg->getState()); } return dst; } // ============================================================================ // [asmjit::X86RAPass - State - Switch] // ============================================================================ template static ASMJIT_INLINE void X86RAPass_switchStateVars(X86RAPass* self, X86RAState* src) { X86RAState* dst = self->getState(); VirtReg** dVars = dst->getListByKind(C); VirtReg** sVars = src->getListByKind(C); X86StateCell* cells = src->_cells; uint32_t regCount = self->_regCount.get(C); bool didWork; do { didWork = false; for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) { VirtReg* dVReg = dVars[physId]; VirtReg* sVd = sVars[physId]; if (dVReg == sVd) continue; if (dVReg) { const X86StateCell& cell = cells[dVReg->_raId]; if (cell.getState() != VirtReg::kStateReg) { if (cell.getState() == VirtReg::kStateMem) self->spill(dVReg); else self->unuse(dVReg); dVReg = nullptr; didWork = true; if (!sVd) continue; } } if (!dVReg && sVd) { _MoveOrLoad: if (sVd->getPhysId() != Globals::kInvalidRegId) self->move(sVd, physId); else self->load(sVd, physId); didWork = true; continue; } if (dVReg) { const X86StateCell& cell = cells[dVReg->_raId]; if (!sVd) { if (cell.getState() == VirtReg::kStateReg) continue; if (cell.getState() == VirtReg::kStateMem) self->spill(dVReg); else self->unuse(dVReg); didWork = true; continue; } else { if (cell.getState() == VirtReg::kStateReg) { if (dVReg->getPhysId() != Globals::kInvalidRegId && sVd->getPhysId() != Globals::kInvalidRegId) { if (C == X86Reg::kKindGp) { self->swapGp(dVReg, sVd); } else { self->spill(dVReg); self->move(sVd, physId); } didWork = true; continue; } else { didWork = true; continue; } } if (cell.getState() == VirtReg::kStateMem) self->spill(dVReg); else self->unuse(dVReg); goto _MoveOrLoad; } } } } while (didWork); uint32_t dModified = dst->_modified.get(C); uint32_t sModified = src->_modified.get(C); if (dModified != sModified) { for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) { VirtReg* vreg = dVars[physId]; if (!vreg) continue; if ((dModified & regMask) && !(sModified & regMask)) { self->save(vreg); continue; } if (!(dModified & regMask) && (sModified & regMask)) { self->modify(vreg); continue; } } } } void X86RAPass::switchState(RAState* src_) { ASMJIT_ASSERT(src_ != nullptr); X86RAState* cur = getState(); X86RAState* src = static_cast(src_); // Ignore if both states are equal. if (cur == src) return; // Switch variables. X86RAPass_switchStateVars(this, src); X86RAPass_switchStateVars(this, src); X86RAPass_switchStateVars(this, src); // Calculate changed state. VirtReg** vregs = _contextVd.getData(); uint32_t count = static_cast(_contextVd.getLength()); X86StateCell* cells = src->_cells; for (uint32_t i = 0; i < count; i++) { VirtReg* vreg = static_cast(vregs[i]); const X86StateCell& cell = cells[i]; uint32_t vState = cell.getState(); if (vState != VirtReg::kStateReg) { vreg->setState(vState); vreg->setModified(false); } } ASMJIT_X86_CHECK_STATE } // ============================================================================ // [asmjit::X86RAPass - State - Intersect] // ============================================================================ // The algorithm is actually not so smart, but tries to find an intersection od // `a` and `b` and tries to move/alloc a variable into that location if it's // possible. It also finds out which variables will be spilled/unused by `a` // and `b` and performs that action here. It may improve the switch state code // in certain cases, but doesn't necessarily do the best job possible. template static ASMJIT_INLINE void X86RAPass_intersectStateVars(X86RAPass* self, X86RAState* a, X86RAState* b) { X86RAState* dst = self->getState(); VirtReg** dVars = dst->getListByKind(C); VirtReg** aVars = a->getListByKind(C); X86StateCell* aCells = a->_cells; X86StateCell* bCells = b->_cells; uint32_t regCount = self->_regCount.get(C); bool didWork; // Similar to `switchStateVars()`, we iterate over and over until there is // no work to be done. do { didWork = false; for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) { VirtReg* dVReg = dVars[physId]; // Destination reg. VirtReg* aVReg = aVars[physId]; // State-a reg. if (dVReg == aVReg) continue; if (dVReg) { const X86StateCell& aCell = aCells[dVReg->_raId]; const X86StateCell& bCell = bCells[dVReg->_raId]; if (aCell.getState() != VirtReg::kStateReg && bCell.getState() != VirtReg::kStateReg) { if (aCell.getState() == VirtReg::kStateMem || bCell.getState() == VirtReg::kStateMem) self->spill(dVReg); else self->unuse(dVReg); dVReg = nullptr; didWork = true; if (!aVReg) continue; } } if (!dVReg && aVReg) { if (aVReg->getPhysId() != Globals::kInvalidRegId) self->move(aVReg, physId); else self->load(aVReg, physId); didWork = true; continue; } if (dVReg) { const X86StateCell& aCell = aCells[dVReg->_raId]; const X86StateCell& bCell = bCells[dVReg->_raId]; if (!aVReg) { if (aCell.getState() == VirtReg::kStateReg || bCell.getState() == VirtReg::kStateReg) continue; if (aCell.getState() == VirtReg::kStateMem || bCell.getState() == VirtReg::kStateMem) self->spill(dVReg); else self->unuse(dVReg); didWork = true; continue; } else if (C == X86Reg::kKindGp) { if (aCell.getState() == VirtReg::kStateReg) { if (dVReg->getPhysId() != Globals::kInvalidRegId && aVReg->getPhysId() != Globals::kInvalidRegId) { self->swapGp(dVReg, aVReg); didWork = true; continue; } } } } } } while (didWork); uint32_t dModified = dst->_modified.get(C); uint32_t aModified = a->_modified.get(C); if (dModified != aModified) { for (uint32_t physId = 0, regMask = 0x1; physId < regCount; physId++, regMask <<= 1) { VirtReg* vreg = dVars[physId]; if (!vreg) continue; const X86StateCell& aCell = aCells[vreg->_raId]; if ((dModified & regMask) && !(aModified & regMask) && aCell.getState() == VirtReg::kStateReg) self->save(vreg); } } } void X86RAPass::intersectStates(RAState* a_, RAState* b_) { X86RAState* a = static_cast(a_); X86RAState* b = static_cast(b_); ASMJIT_ASSERT(a != nullptr); ASMJIT_ASSERT(b != nullptr); X86RAPass_intersectStateVars(this, a, b); X86RAPass_intersectStateVars(this, a, b); X86RAPass_intersectStateVars(this, a, b); ASMJIT_X86_CHECK_STATE } // ============================================================================ // [asmjit::X86RAPass - GetJccFlow / GetOppositeJccFlow] // ============================================================================ //! \internal static ASMJIT_INLINE CBNode* X86RAPass_getJccFlow(CBJump* jNode) { if (jNode->isTaken()) return jNode->getTarget(); else return jNode->getNext(); } //! \internal static ASMJIT_INLINE CBNode* X86RAPass_getOppositeJccFlow(CBJump* jNode) { if (jNode->isTaken()) return jNode->getNext(); else return jNode->getTarget(); } // ============================================================================ // [asmjit::X86RAPass - SingleVarInst] // ============================================================================ //! \internal static void X86RAPass_prepareSingleVarInst(uint32_t instId, TiedReg* tr) { switch (instId) { // - andn reg, reg ; Set all bits in reg to 0. // - xor/pxor reg, reg ; Set all bits in reg to 0. // - sub/psub reg, reg ; Set all bits in reg to 0. // - pcmpgt reg, reg ; Set all bits in reg to 0. // - pcmpeq reg, reg ; Set all bits in reg to 1. case X86Inst::kIdPandn : case X86Inst::kIdXor : case X86Inst::kIdXorpd : case X86Inst::kIdXorps : case X86Inst::kIdPxor : case X86Inst::kIdSub: case X86Inst::kIdPsubb : case X86Inst::kIdPsubw : case X86Inst::kIdPsubd : case X86Inst::kIdPsubq : case X86Inst::kIdPsubsb : case X86Inst::kIdPsubsw : case X86Inst::kIdPsubusb : case X86Inst::kIdPsubusw : case X86Inst::kIdPcmpeqb : case X86Inst::kIdPcmpeqw : case X86Inst::kIdPcmpeqd : case X86Inst::kIdPcmpeqq : case X86Inst::kIdPcmpgtb : case X86Inst::kIdPcmpgtw : case X86Inst::kIdPcmpgtd : case X86Inst::kIdPcmpgtq : tr->flags &= ~TiedReg::kRReg; break; // - and reg, reg ; Nop. // - or reg, reg ; Nop. // - xchg reg, reg ; Nop. case X86Inst::kIdAnd : case X86Inst::kIdAndpd : case X86Inst::kIdAndps : case X86Inst::kIdPand : case X86Inst::kIdOr : case X86Inst::kIdOrpd : case X86Inst::kIdOrps : case X86Inst::kIdPor : case X86Inst::kIdXchg : tr->flags &= ~TiedReg::kWReg; break; } } // ============================================================================ // [asmjit::X86RAPass - Helpers] // ============================================================================ static void X86RAPass_assignStackArgsRegId(X86RAPass* self, CCFunc* func) { const FuncDetail& fd = func->getDetail(); FuncFrameInfo& ffi = func->getFrameInfo(); // Select some register which will contain the base address of function // arguments and return address. The algorithm tries to select registers // which are saved or not preserved by default, if not successful it picks // any other register and adds it to `_savedRegs`. uint32_t stackArgsRegId; if (ffi.hasPreservedFP()) { stackArgsRegId = X86Gp::kIdBp; } else { // Passed registers as defined by the calling convention. uint32_t passed = fd.getPassedRegs(X86Reg::kKindGp); // Registers actually used to pass function arguments (related to this // function signature) with ESP|RSP included as this register can't be // used in general to hold anything bug stack pointer. uint32_t used = fd.getUsedRegs(X86Reg::kKindGp) | Utils::mask(X86Gp::kIdSp); // First try register that is defined to pass a function argument by the // calling convention, but is not used by this function. This will most // likely fail in 32-bit mode, but there is a high chance that it will // pass in 64-bit mode if the function doesn't use so many arguments. uint32_t regs = passed & ~used; // Pick any other register if that didn't work out. if (!regs) regs = ~passed & ~used; stackArgsRegId = Utils::findFirstBit(regs); ASMJIT_ASSERT(stackArgsRegId < self->cc()->getGpCount()); } ffi.setStackArgsRegId(stackArgsRegId); } // ============================================================================ // [asmjit::X86RAPass - SArg Insertion] // ============================================================================ struct SArgData { VirtReg* sVd; VirtReg* cVd; CCPushArg* sArg; uint32_t aType; }; static ASMJIT_INLINE bool X86RAPass_mustConvertSArg(X86RAPass* self, uint32_t dstTypeId, uint32_t srcTypeId) noexcept{ uint32_t dstFloatSize = dstTypeId == TypeId::kF32 ? 4 : dstTypeId == TypeId::kF64 ? 8 : 0; uint32_t srcFloatSize = srcTypeId == TypeId::kF32 ? 4 : srcTypeId == TypeId::kF32x1 ? 4 : srcTypeId == TypeId::kF64 ? 8 : srcTypeId == TypeId::kF64x1 ? 8 : 0; if (dstFloatSize && srcFloatSize) return dstFloatSize != srcFloatSize; else return false; } static ASMJIT_INLINE uint32_t X86RAPass_typeOfConvertedSArg(X86RAPass* self, uint32_t dstTypeId, uint32_t srcTypeId) noexcept { ASMJIT_ASSERT(X86RAPass_mustConvertSArg(self, dstTypeId, srcTypeId)); return dstTypeId == TypeId::kF32 ? TypeId::kF32x1 : TypeId::kF64x1; } static ASMJIT_INLINE Error X86RAPass_insertPushArg( X86RAPass* self, CCFuncCall* call, VirtReg* sReg, const uint32_t* gaRegs, const FuncDetail::Value& arg, uint32_t argIndex, SArgData* sArgList, uint32_t& sArgCount) { X86Compiler* cc = self->cc(); uint32_t i; uint32_t dstTypeId = arg.getTypeId(); uint32_t srcTypeId = sReg->getTypeId(); // First locate or create sArgBase. for (i = 0; i < sArgCount; i++) if (sArgList[i].sVd == sReg && !sArgList[i].cVd) break; SArgData* sArgData = &sArgList[i]; if (i == sArgCount) { sArgData->sVd = sReg; sArgData->cVd = nullptr; sArgData->sArg = nullptr; sArgData->aType = 0xFF; sArgCount++; } uint32_t srcRegKind = sReg->getKind(); // Only handles float<->double conversion. if (X86RAPass_mustConvertSArg(self, dstTypeId, srcTypeId)) { uint32_t cvtTypeId = X86RAPass_typeOfConvertedSArg(self, dstTypeId, srcTypeId); uint32_t cvtRegKind = X86Reg::kKindVec; while (++i < sArgCount) { sArgData = &sArgList[i]; if (sArgData->sVd != sReg) break; if (sArgData->cVd->getTypeId() != cvtTypeId || sArgData->aType != dstTypeId) continue; sArgData->sArg->_args |= Utils::mask(argIndex); return kErrorOk; } VirtReg* cReg = cc->newVirtReg(dstTypeId, x86OpData.archRegs.regInfo[X86Reg::kRegXmm].getSignature(), nullptr); if (!cReg) return DebugUtils::errored(kErrorNoHeapMemory); CCPushArg* sArg = cc->newNodeT(call, sReg, cReg); if (!sArg) return DebugUtils::errored(kErrorNoHeapMemory); X86RAData* raData = self->newRAData(2); if (!raData) return DebugUtils::errored(kErrorNoHeapMemory); ASMJIT_PROPAGATE(self->assignRAId(cReg)); ASMJIT_PROPAGATE(self->assignRAId(sReg)); raData->tiedTotal = 2; raData->tiedCount.reset(); raData->tiedCount.add(srcRegKind); raData->tiedCount.add(cvtRegKind); raData->tiedIndex.reset(); raData->inRegs.reset(); raData->outRegs.reset(); raData->clobberedRegs.reset(); if (srcRegKind <= cvtRegKind) { raData->tiedArray[0].init(sReg, TiedReg::kRReg, 0, gaRegs[srcRegKind]); raData->tiedArray[1].init(cReg, TiedReg::kWReg, 0, gaRegs[cvtRegKind]); raData->tiedIndex.set(cvtRegKind, srcRegKind != cvtRegKind); } else { raData->tiedArray[0].init(cReg, TiedReg::kWReg, 0, gaRegs[cvtRegKind]); raData->tiedArray[1].init(sReg, TiedReg::kRReg, 0, gaRegs[srcRegKind]); raData->tiedIndex.set(srcRegKind, 1); } sArg->setPassData(raData); sArg->_args |= Utils::mask(argIndex); cc->addBefore(sArg, call); ::memmove(sArgData + 1, sArgData, (sArgCount - i) * sizeof(SArgData)); sArgData->sVd = sReg; sArgData->cVd = cReg; sArgData->sArg = sArg; sArgData->aType = dstTypeId; sArgCount++; return kErrorOk; } else { CCPushArg* sArg = sArgData->sArg; ASMJIT_PROPAGATE(self->assignRAId(sReg)); if (!sArg) { sArg = cc->newNodeT(call, sReg, (VirtReg*)nullptr); if (!sArg) return DebugUtils::errored(kErrorNoHeapMemory); X86RAData* raData = self->newRAData(1); if (!raData) return DebugUtils::errored(kErrorNoHeapMemory); raData->tiedTotal = 1; raData->tiedIndex.reset(); raData->tiedCount.reset(); raData->tiedCount.add(srcRegKind); raData->inRegs.reset(); raData->outRegs.reset(); raData->clobberedRegs.reset(); raData->tiedArray[0].init(sReg, TiedReg::kRReg, 0, gaRegs[srcRegKind]); sArg->setPassData(raData); sArgData->sArg = sArg; cc->addBefore(sArg, call); } sArg->_args |= Utils::mask(argIndex); return kErrorOk; } } // ============================================================================ // [asmjit::X86RAPass - Fetch] // ============================================================================ //! \internal //! //! Prepare the given function `func`. //! //! For each node: //! - Create and assign groupId and position. //! - Collect all variables and merge them to vaList. Error X86RAPass::fetch() { uint32_t archType = cc()->getArchType(); CCFunc* func = getFunc(); CBNode* node_ = func; CBNode* next = nullptr; CBNode* stop = getStop(); TiedReg agTmp[80]; SArgData sArgList[80]; uint32_t position = 0; ZoneList::Link* jLink = nullptr; // Global allocable registers. uint32_t* gaRegs = _gaRegs; if (func->getFrameInfo().hasPreservedFP()) gaRegs[X86Reg::kKindGp] &= ~Utils::mask(X86Gp::kIdBp); // Allowed index registers (GP/XMM/YMM). const uint32_t indexMask = Utils::bits(_regCount.getGp()) & ~(Utils::mask(4)); // -------------------------------------------------------------------------- // [VI Macros] // -------------------------------------------------------------------------- #define RA_POPULATE(NODE) \ do { \ X86RAData* raData = newRAData(0); \ if (!raData) goto NoMem; \ NODE->setPassData(raData); \ } while (0) #define RA_DECLARE() \ do { \ X86RegCount tiedCount; \ X86RegCount tiedIndex; \ uint32_t tiedTotal = 0; \ \ X86RegMask inRegs; \ X86RegMask outRegs; \ X86RegMask clobberedRegs; \ \ tiedCount.reset(); \ inRegs.reset(); \ outRegs.reset(); \ clobberedRegs.reset() #define RA_FINALIZE(NODE) \ { \ X86RAData* raData = newRAData(tiedTotal); \ if (!raData) goto NoMem; \ \ tiedIndex.indexFromRegCount(tiedCount); \ raData->tiedCount = tiedCount; \ raData->tiedIndex = tiedIndex; \ \ raData->inRegs = inRegs; \ raData->outRegs = outRegs; \ raData->clobberedRegs = clobberedRegs; \ \ TiedReg* tied = agTmp; \ while (tiedTotal) { \ VirtReg* vreg = tied->vreg; \ \ uint32_t _kind = vreg->getKind(); \ uint32_t _index = tiedIndex.get(_kind); \ \ tiedIndex.add(_kind); \ if (tied->inRegs) \ tied->allocableRegs = tied->inRegs; \ else if (tied->outPhysId != Globals::kInvalidRegId) \ tied->allocableRegs = Utils::mask(tied->outPhysId); \ else \ tied->allocableRegs &= ~inRegs.get(_kind); \ \ vreg->_tied = nullptr; \ raData->setTiedAt(_index, *tied); \ \ tied++; \ tiedTotal--; \ } \ NODE->setPassData(raData); \ } \ } while (0) #define RA_INSERT(REG, TIED, FLAGS, NEW_ALLOCABLE) \ do { \ ASMJIT_ASSERT(REG->_tied == nullptr); \ TIED = &agTmp[tiedTotal++]; \ TIED->init(REG, FLAGS, 0, NEW_ALLOCABLE); \ TIED->refCount++; \ REG->_tied = TIED; \ \ if (assignRAId(REG) != kErrorOk) goto NoMem; \ tiedCount.add(REG->getKind()); \ } while (0) #define RA_MERGE(REG, TIED, FLAGS, NEW_ALLOCABLE) \ do { \ TIED = REG->_tied; \ \ if (!TIED) { \ TIED = &agTmp[tiedTotal++]; \ TIED->init(REG, 0, 0, NEW_ALLOCABLE); \ REG->_tied = TIED; \ \ if (assignRAId(REG) != kErrorOk) goto NoMem; \ tiedCount.add(REG->getKind()); \ } \ \ TIED->flags |= FLAGS; \ TIED->refCount++; \ } while (0) // -------------------------------------------------------------------------- // [Loop] // -------------------------------------------------------------------------- do { _Do: while (node_->hasPassData()) { _NextGroup: if (!jLink) jLink = _jccList.getFirst(); else jLink = jLink->getNext(); if (!jLink) goto _Done; node_ = X86RAPass_getOppositeJccFlow(static_cast(jLink->getValue())); } position++; next = node_->getNext(); node_->setPosition(position); switch (node_->getType()) { // ---------------------------------------------------------------------- // [Align/Embed] // ---------------------------------------------------------------------- case CBNode::kNodeAlign: case CBNode::kNodeData: default: RA_POPULATE(node_); break; // ---------------------------------------------------------------------- // [Hint] // ---------------------------------------------------------------------- case CBNode::kNodeHint: { CCHint* node = static_cast(node_); RA_DECLARE(); if (node->getHint() == CCHint::kHintAlloc) { uint32_t remain[Globals::kMaxVRegKinds]; CCHint* cur = node; remain[X86Reg::kKindGp ] = _regCount.getGp() - 1 - func->getFrameInfo().hasPreservedFP(); remain[X86Reg::kKindMm ] = _regCount.getMm(); remain[X86Reg::kKindK ] = _regCount.getK(); remain[X86Reg::kKindVec] = _regCount.getVec(); // Merge as many alloc-hints as possible. for (;;) { VirtReg* vreg = static_cast(cur->getVReg()); TiedReg* tied = vreg->_tied; uint32_t kind = vreg->getKind(); uint32_t physId = cur->getValue(); uint32_t regMask = 0; // We handle both kInvalidReg and kInvalidValue. if (physId < Globals::kInvalidRegId) regMask = Utils::mask(physId); if (!tied) { if (inRegs.has(kind, regMask) || remain[kind] == 0) break; RA_INSERT(vreg, tied, TiedReg::kRReg, gaRegs[kind]); if (regMask != 0) { inRegs.xor_(kind, regMask); tied->inRegs = regMask; tied->setInPhysId(physId); } remain[kind]--; } else if (regMask != 0) { if (inRegs.has(kind, regMask) && tied->inRegs != regMask) break; inRegs.xor_(kind, tied->inRegs | regMask); tied->inRegs = regMask; tied->setInPhysId(physId); } if (cur != node) cc()->removeNode(cur); cur = static_cast(node->getNext()); if (!cur || cur->getType() != CBNode::kNodeHint || cur->getHint() != CCHint::kHintAlloc) break; } next = node->getNext(); } else { VirtReg* vreg = static_cast(node->getVReg()); TiedReg* tied; uint32_t flags = 0; switch (node->getHint()) { case CCHint::kHintSpill : flags = TiedReg::kRMem | TiedReg::kSpill; break; case CCHint::kHintSave : flags = TiedReg::kRMem ; break; case CCHint::kHintSaveAndUnuse: flags = TiedReg::kRMem | TiedReg::kUnuse; break; case CCHint::kHintUnuse : flags = TiedReg::kUnuse ; break; } RA_INSERT(vreg, tied, flags, 0); } RA_FINALIZE(node_); break; } // ---------------------------------------------------------------------- // [Label] // ---------------------------------------------------------------------- case CBNode::kNodeLabel: { RA_POPULATE(node_); if (node_ == func->getExitNode()) { ASMJIT_PROPAGATE(addReturningNode(node_)); goto _NextGroup; } break; } // ---------------------------------------------------------------------- // [Inst] // ---------------------------------------------------------------------- case CBNode::kNodeInst: { CBInst* node = static_cast(node_); uint32_t instId = node->getInstId(); uint32_t flags = node->getFlags(); uint32_t options = node->getOptions(); uint32_t gpAllowedMask = 0xFFFFFFFF; Operand* opArray = node->getOpArray(); uint32_t opCount = node->getOpCount(); RA_DECLARE(); if (opCount) { const X86Inst& inst = X86Inst::getInst(instId); const X86Inst::CommonData& commonData = inst.getCommonData(); const X86SpecialInst* special = nullptr; // Collect instruction flags and merge all 'TiedReg's. if (commonData.isFpu()) flags |= CBNode::kFlagIsFp; if (commonData.hasFixedRM() && (special = X86SpecialInst_get(instId, opArray, opCount)) != nullptr) flags |= CBNode::kFlagIsSpecial; for (uint32_t i = 0; i < opCount; i++) { Operand* op = &opArray[i]; VirtReg* vreg; TiedReg* tied; if (op->isVirtReg()) { vreg = cc()->getVirtRegById(op->getId()); if (vreg->isFixed()) continue; RA_MERGE(vreg, tied, 0, gaRegs[vreg->getKind()] & gpAllowedMask); if (static_cast(op)->isGpb()) { tied->flags |= static_cast(op)->isGpbLo() ? TiedReg::kX86GpbLo : TiedReg::kX86GpbHi; if (archType == ArchInfo::kTypeX86) { // If a byte register is accessed in 32-bit mode we have to limit // all allocable registers for that variable to eax/ebx/ecx/edx. // Other variables are not affected. tied->allocableRegs &= 0x0F; } else { // It's fine if lo-byte register is accessed in 64-bit mode; // however, hi-byte has to be checked and if it's used all // registers (GP/XMM) could be only allocated in the lower eight // half. To do that, we patch 'allocableRegs' of all variables // we collected until now and change the allocable restriction // for variables that come after. if (static_cast(op)->isGpbHi()) { tied->allocableRegs &= 0x0F; if (gpAllowedMask != 0xFF) { for (uint32_t j = 0; j < i; j++) agTmp[j].allocableRegs &= (agTmp[j].flags & TiedReg::kX86GpbHi) ? 0x0F : 0xFF; gpAllowedMask = 0xFF; } } } } if (special) { uint32_t inReg = special[i].inReg; uint32_t outReg = special[i].outReg; uint32_t c; if (static_cast(op)->isGp()) c = X86Reg::kKindGp; else c = X86Reg::kKindVec; if (inReg != Globals::kInvalidRegId) { uint32_t mask = Utils::mask(inReg); inRegs.or_(c, mask); tied->inRegs |= mask; } if (outReg != Globals::kInvalidRegId) { uint32_t mask = Utils::mask(outReg); outRegs.or_(c, mask); tied->setOutPhysId(outReg); } tied->flags |= special[i].flags; } else { uint32_t inFlags = TiedReg::kRReg; uint32_t outFlags = TiedReg::kWReg; uint32_t combinedFlags; if (i == 0) { // Read/Write is usually the combination of the first operand. combinedFlags = inFlags | outFlags; if (node->getOptions() & CodeEmitter::kOptionOverwrite) { // Manually forcing write-only. combinedFlags = outFlags; } else if (commonData.isUseW()) { // Write-only instruction. uint32_t movSize = commonData.getWriteSize(); uint32_t regSize = vreg->getSize(); // Exception - If the source operand is a memory location // promote move size into 16 bytes. if (opArray[1].isMem() && inst.getOperationData().isMovSsSd()) movSize = 16; if (static_cast(op)->isGp()) { uint32_t opSize = static_cast(op)->getSize(); // Move size is zero in case that it should be determined // from the destination register. if (movSize == 0) movSize = opSize; // Handle the case that a 32-bit operation in 64-bit mode // always clears the rest of the destination register and // the case that move size is actually greater than or // equal to the size of the variable. if (movSize >= 4 || movSize >= regSize) combinedFlags = outFlags; } else if (movSize == 0 || movSize >= regSize) { // If move size is greater than or equal to the size of // the variable there is nothing to do, because the move // will overwrite the variable in all cases. combinedFlags = outFlags; } } else if (commonData.isUseR()) { // Comparison/Test instructions don't modify any operand. combinedFlags = inFlags; } else if (instId == X86Inst::kIdImul && opCount == 3) { // Imul. combinedFlags = outFlags; } } else { // Read-Only is usualy the combination of the second/third/fourth operands. combinedFlags = inFlags; // Idiv is a special instruction, never handled here. ASMJIT_ASSERT(instId != X86Inst::kIdIdiv); // Xchg/Xadd/Imul. if (commonData.isUseXX() || (instId == X86Inst::kIdImul && opCount == 3 && i == 1)) combinedFlags = inFlags | outFlags; } tied->flags |= combinedFlags; } } else if (op->isMem()) { X86Mem* m = static_cast(op); node->setMemOpIndex(i); uint32_t specBase = special ? uint32_t(special[i].inReg) : uint32_t(Globals::kInvalidRegId); if (m->hasBaseReg()) { uint32_t id = m->getBaseId(); if (cc()->isVirtRegValid(id)) { vreg = cc()->getVirtRegById(id); if (!vreg->isStack() && !vreg->isFixed()) { RA_MERGE(vreg, tied, 0, gaRegs[vreg->getKind()] & gpAllowedMask); if (m->isRegHome()) { uint32_t inFlags = TiedReg::kRMem; uint32_t outFlags = TiedReg::kWMem; uint32_t combinedFlags; if (i == 0) { // Default for the first operand. combinedFlags = inFlags | outFlags; if (commonData.isUseW()) { // Move to memory - setting the right flags is important // as if it's just move to the register. It's just a bit // simpler as there are no special cases. uint32_t movSize = std::max(commonData.getWriteSize(), m->getSize()); uint32_t regSize = vreg->getSize(); if (movSize >= regSize) combinedFlags = outFlags; } else if (commonData.isUseR()) { // Comparison/Test instructions don't modify any operand. combinedFlags = inFlags; } } else { // Default for the second operand. combinedFlags = inFlags; // Handle Xchg instruction (modifies both operands). if (commonData.isUseXX()) combinedFlags = inFlags | outFlags; } tied->flags |= combinedFlags; } else { if (specBase != Globals::kInvalidRegId) { uint32_t mask = Utils::mask(specBase); inRegs.or_(vreg->getKind(), mask); outRegs.or_(vreg->getKind(), mask); tied->inRegs |= mask; tied->setOutPhysId(specBase); tied->flags |= special[i].flags; } else { tied->flags |= TiedReg::kRReg; } } } } } if (m->hasIndexReg()) { uint32_t id = m->getIndexId(); if (cc()->isVirtRegValid(id)) { // Restrict allocation to all registers except ESP|RSP. vreg = cc()->getVirtRegById(m->getIndexId()); if (!vreg->isFixed()) { // TODO: AVX vector operands support. RA_MERGE(vreg, tied, 0, gaRegs[X86Reg::kKindGp] & gpAllowedMask); tied->allocableRegs &= indexMask; tied->flags |= TiedReg::kRReg; } } } } } node->setFlags(flags); if (tiedTotal) { // Handle instructions which result in zeros/ones or nop if used with the // same destination and source operand. if (tiedTotal == 1 && opCount >= 2 && opArray[0].isVirtReg() && opArray[1].isVirtReg() && !node->hasMemOp()) X86RAPass_prepareSingleVarInst(instId, &agTmp[0]); } // Turn on AVX if the instruction operates on XMM|YMM|ZMM registers and uses VEX|EVEX prefix. if (tiedCount.getVec() && commonData.hasFlag(X86Inst::kFlagVex | X86Inst::kFlagEvex)) _avxEnabled = true; } const RegOnly& extraReg = node->getExtraReg(); if (extraReg.isValid()) { uint32_t id = extraReg.getId(); if (cc()->isVirtRegValid(id)) { VirtReg* vreg = cc()->getVirtRegById(id); TiedReg* tied; RA_MERGE(vreg, tied, 0, gaRegs[vreg->getKind()] & gpAllowedMask); if (options & (X86Inst::kOptionRep | X86Inst::kOptionRepnz)) { tied->allocableRegs = Utils::mask(X86Gp::kIdCx); tied->flags |= TiedReg::kXReg; } else { tied->flags |= TiedReg::kRReg; } } } RA_FINALIZE(node_); // Handle conditional/unconditional jump. if (node->isJmpOrJcc()) { CBJump* jNode = static_cast(node); CBLabel* jTarget = jNode->getTarget(); // If this jump is unconditional we put next node to unreachable node // list so we can eliminate possible dead code. We have to do this in // all cases since we are unable to translate without fetch() step. // // We also advance our node pointer to the target node to simulate // natural flow of the function. if (jNode->isJmp()) { if (next && !next->hasPassData()) ASMJIT_PROPAGATE(addUnreachableNode(next)); // Jump not followed. if (!jTarget) { ASMJIT_PROPAGATE(addReturningNode(jNode)); goto _NextGroup; } node_ = jTarget; goto _Do; } else { // Jump not followed. if (!jTarget) break; if (jTarget->hasPassData()) { uint32_t jTargetPosition = jTarget->getPosition(); // Update CBNode::kFlagIsTaken to true if this is a conditional // backward jump. This behavior can be overridden by using // `X86Inst::kOptionTaken` when the instruction is created. if (!jNode->isTaken() && opCount == 1 && jTargetPosition <= position) { jNode->_flags |= CBNode::kFlagIsTaken; } } else if (next->hasPassData()) { node_ = jTarget; goto _Do; } else { ASMJIT_PROPAGATE(addJccNode(jNode)); node_ = X86RAPass_getJccFlow(jNode); goto _Do; } } } break; } // ---------------------------------------------------------------------- // [Func-Entry] // ---------------------------------------------------------------------- case CBNode::kNodeFunc: { ASMJIT_ASSERT(node_ == func); X86RAPass_assignStackArgsRegId(this, func); FuncDetail& fd = func->getDetail(); TiedReg* tied; RA_DECLARE(); cc()->setCursor(node_); X86Gp saReg; uint32_t argCount = fd.getArgCount(); for (uint32_t i = 0; i < argCount; i++) { const FuncDetail::Value& arg = fd.getArg(i); VirtReg* vReg = func->getArg(i); if (!vReg) continue; // Overlapped function arguments. if (vReg->_tied) return DebugUtils::errored(kErrorOverlappedRegs); uint32_t aKind = X86Reg::kindOf(arg.getRegType()); uint32_t vKind = vReg->getKind(); if (arg.byReg()) { if (aKind == vKind) { RA_INSERT(vReg, tied, TiedReg::kWReg, 0); tied->setOutPhysId(arg.getRegId()); } else { X86Reg rTmp = cc()->newReg(arg.getTypeId(), "arg%u", i); VirtReg* vTmp = cc()->getVirtReg(rTmp); RA_INSERT(vTmp, tied, TiedReg::kWReg, 0); tied->setOutPhysId(arg.getRegId()); X86Reg dstReg(X86Reg::fromSignature(vReg->getSignature(), vReg->getId())); X86Reg srcReg(X86Reg::fromSignature(vTmp->getSignature(), vTmp->getId())); // Emit conversion after the prolog. return X86Internal::emitArgMove(reinterpret_cast(cc()), dstReg, vReg->getTypeId(), srcReg, vTmp->getTypeId(), _avxEnabled); } } else { // Instead of complicating the prolog allocation we create a virtual // register that holds the base address to all arguments passed by // stack and then insert nodes that copy these arguments to registers. if (!saReg.isValid()) { saReg = cc()->newGpz("__args"); if (!saReg.isValid()) goto NoMem; VirtReg* saBase = cc()->getVirtReg(saReg); RA_INSERT(saBase, tied, TiedReg::kWReg, 0); if (func->getFrameInfo().hasPreservedFP()) saBase->_isFixed = true; tied->setOutPhysId(func->getFrameInfo().getStackArgsRegId()); } // Argument passed by stack is handled after the prolog. X86Gp aReg = X86Gp::fromSignature(vReg->getSignature(), vReg->getId()); X86Mem aMem = x86::ptr(saReg, arg.getStackOffset()); aMem.setArgHome(); ASMJIT_PROPAGATE( X86Internal::emitArgMove(reinterpret_cast(cc()), aReg, vReg->getTypeId(), aMem, arg.getTypeId(), _avxEnabled)); } } // If saReg is not needed, clear it also from FuncFrameInfo. if (!saReg.isValid()) func->getFrameInfo().setStackArgsRegId(Globals::kInvalidRegId); RA_FINALIZE(node_); next = node_->getNext(); break; } // ---------------------------------------------------------------------- // [End] // ---------------------------------------------------------------------- case CBNode::kNodeSentinel: { RA_POPULATE(node_); ASMJIT_PROPAGATE(addReturningNode(node_)); goto _NextGroup; } // ---------------------------------------------------------------------- // [Func-Exit] // ---------------------------------------------------------------------- case CBNode::kNodeFuncExit: { CCFuncRet* node = static_cast(node_); ASMJIT_PROPAGATE(addReturningNode(node)); FuncDetail& fd = func->getDetail(); RA_DECLARE(); if (fd.hasRet()) { const FuncDetail::Value& ret = fd.getRet(0); uint32_t retKind = X86Reg::kindOf(ret.getRegType()); for (uint32_t i = 0; i < 2; i++) { Operand_* op = &node->_ret[i]; if (op->isVirtReg()) { VirtReg* vreg = cc()->getVirtRegById(op->getId()); TiedReg* tied; RA_MERGE(vreg, tied, 0, 0); if (retKind == vreg->getKind()) { tied->flags |= TiedReg::kRReg; tied->inRegs = Utils::mask(ret.getRegId()); inRegs.or_(retKind, tied->inRegs); } else if (retKind == X86Reg::kKindFp) { uint32_t fldFlag = ret.getTypeId() == TypeId::kF32 ? TiedReg::kX86Fld4 : TiedReg::kX86Fld8; tied->flags |= TiedReg::kRMem | fldFlag; } else { // TODO: Fix possible other return type conversions. ASMJIT_NOT_REACHED(); } } } } RA_FINALIZE(node_); if (!next->hasPassData()) ASMJIT_PROPAGATE(addUnreachableNode(next)); goto _NextGroup; } // ---------------------------------------------------------------------- // [Func-Call] // ---------------------------------------------------------------------- case CBNode::kNodeFuncCall: { CCFuncCall* node = static_cast(node_); FuncDetail& fd = node->getDetail(); Operand_* target = node->_opArray; Operand_* args = node->_args; Operand_* rets = node->_ret; func->getFrameInfo().enableCalls(); func->getFrameInfo().mergeCallFrameSize(fd.getArgStackSize()); // TODO: Each function frame should also define its stack arguments' alignment. // func->getFrameInfo().mergeCallFrameAlignment(); uint32_t i; uint32_t argCount = fd.getArgCount(); uint32_t sArgCount = 0; uint32_t gpAllocableMask = gaRegs[X86Reg::kKindGp] & ~node->getDetail().getUsedRegs(X86Reg::kKindGp); VirtReg* vreg; TiedReg* tied; RA_DECLARE(); // Function-call operand. if (target->isVirtReg()) { vreg = cc()->getVirtRegById(target->getId()); RA_MERGE(vreg, tied, 0, 0); tied->flags |= TiedReg::kRReg | TiedReg::kRCall; if (tied->inRegs == 0) tied->allocableRegs |= gpAllocableMask; } else if (target->isMem()) { X86Mem* m = static_cast(target); if (m->hasBaseReg() && Operand::isPackedId(m->getBaseId())) { vreg = cc()->getVirtRegById(m->getBaseId()); if (!vreg->isStack()) { RA_MERGE(vreg, tied, 0, 0); if (m->isRegHome()) { tied->flags |= TiedReg::kRMem | TiedReg::kRCall; } else { tied->flags |= TiedReg::kRReg | TiedReg::kRCall; if (tied->inRegs == 0) tied->allocableRegs |= gpAllocableMask; } } } if (m->hasIndexReg() && Operand::isPackedId(m->getIndexId())) { // Restrict allocation to all registers except ESP/RSP. vreg = cc()->getVirtRegById(m->getIndexId()); RA_MERGE(vreg, tied, 0, 0); tied->flags |= TiedReg::kRReg | TiedReg::kRCall; if ((tied->inRegs & ~indexMask) == 0) tied->allocableRegs &= gpAllocableMask & indexMask; } } // Function-call arguments. for (i = 0; i < argCount; i++) { Operand_* op = &args[i]; if (!op->isVirtReg()) continue; vreg = cc()->getVirtRegById(op->getId()); const FuncDetail::Value& arg = fd.getArg(i); if (arg.byReg()) { RA_MERGE(vreg, tied, 0, 0); uint32_t argClass = X86Reg::kindOf(arg.getRegType()); if (vreg->getKind() == argClass) { tied->inRegs |= Utils::mask(arg.getRegId()); tied->flags |= TiedReg::kRReg | TiedReg::kRFunc; } else { // TODO: Function-call argument conversion. } } // If this is a stack-based argument we insert CCPushArg instead of // using TiedReg. It improves the code, because the argument can be // moved onto stack as soon as it is ready and the register used by // the variable can be reused for something else. It is also much // easier to handle argument conversions, because there will be at // most only one node per conversion. else { if (X86RAPass_insertPushArg(this, node, vreg, gaRegs, arg, i, sArgList, sArgCount) != kErrorOk) goto NoMem; } } // Function-call returns. for (i = 0; i < 2; i++) { Operand_* op = &rets[i]; if (!op->isVirtReg()) continue; const FuncDetail::Value& ret = fd.getRet(i); if (ret.byReg()) { uint32_t retKind = X86Reg::kindOf(ret.getRegType()); vreg = cc()->getVirtRegById(op->getId()); RA_MERGE(vreg, tied, 0, 0); if (vreg->getKind() == retKind) { tied->setOutPhysId(ret.getRegId()); tied->flags |= TiedReg::kWReg | TiedReg::kWFunc; } else { // TODO: Function-call return value conversion. } } } // Init clobbered. clobberedRegs.set(X86Reg::kKindGp , Utils::bits(_regCount.getGp()) & (fd.getPassedRegs(X86Reg::kKindGp ) | ~fd.getPreservedRegs(X86Reg::kKindGp ))); clobberedRegs.set(X86Reg::kKindMm , Utils::bits(_regCount.getMm()) & (fd.getPassedRegs(X86Reg::kKindMm ) | ~fd.getPreservedRegs(X86Reg::kKindMm ))); clobberedRegs.set(X86Reg::kKindK , Utils::bits(_regCount.getK()) & (fd.getPassedRegs(X86Reg::kKindK ) | ~fd.getPreservedRegs(X86Reg::kKindK ))); clobberedRegs.set(X86Reg::kKindVec, Utils::bits(_regCount.getVec()) & (fd.getPassedRegs(X86Reg::kKindVec) | ~fd.getPreservedRegs(X86Reg::kKindVec))); RA_FINALIZE(node_); break; } } node_ = next; } while (node_ != stop); _Done: // Mark exit label and end node as fetched, otherwise they can be removed by // `removeUnreachableCode()`, which could lead to a crash in some later step. node_ = func->getEnd(); if (!node_->hasPassData()) { CBLabel* fExit = func->getExitNode(); RA_POPULATE(fExit); fExit->setPosition(++position); RA_POPULATE(node_); node_->setPosition(++position); } return kErrorOk; // -------------------------------------------------------------------------- // [Failure] // -------------------------------------------------------------------------- NoMem: return DebugUtils::errored(kErrorNoHeapMemory); } // ============================================================================ // [asmjit::X86RAPass - Annotate] // ============================================================================ Error X86RAPass::annotate() { #if !defined(ASMJIT_DISABLE_LOGGING) CCFunc* func = getFunc(); CBNode* node_ = func; CBNode* end = func->getEnd(); Zone& dataZone = cc()->_cbDataZone; StringBuilderTmp<256> sb; uint32_t maxLen = 0; while (node_ && node_ != end) { if (!node_->hasInlineComment()) { if (node_->getType() == CBNode::kNodeInst) { CBInst* node = static_cast(node_); Logging::formatInstruction( sb, 0, cc(), cc()->getArchType(), node->getInstDetail(), node->getOpArray(), node->getOpCount()); node_->setInlineComment( static_cast(dataZone.dup(sb.getData(), sb.getLength(), true))); maxLen = std::max(maxLen, static_cast(sb.getLength())); sb.clear(); } } node_ = node_->getNext(); } _annotationLength = maxLen + 1; #endif // !ASMJIT_DISABLE_LOGGING return kErrorOk; } // ============================================================================ // [asmjit::X86BaseAlloc] // ============================================================================ struct X86BaseAlloc { // -------------------------------------------------------------------------- // [Construction / Destruction] // -------------------------------------------------------------------------- ASMJIT_INLINE X86BaseAlloc(X86RAPass* context) { _context = context; _cc = context->cc(); } ASMJIT_INLINE ~X86BaseAlloc() {} // -------------------------------------------------------------------------- // [Accessors] // -------------------------------------------------------------------------- //! Get the context. ASMJIT_INLINE X86RAPass* getContext() const { return _context; } //! Get the current state (always the same instance as X86RAPass::_x86State). ASMJIT_INLINE X86RAState* getState() const { return _context->getState(); } //! Get the node. ASMJIT_INLINE CBNode* getNode() const { return _node; } //! Get TiedReg list (all). ASMJIT_INLINE TiedReg* getTiedArray() const { return _tiedArray[0]; } //! Get TiedReg list (per class). ASMJIT_INLINE TiedReg* getTiedArrayByKind(uint32_t kind) const { return _tiedArray[kind]; } //! Get TiedReg count (all). ASMJIT_INLINE uint32_t getTiedCount() const { return _tiedTotal; } //! Get TiedReg count (per class). ASMJIT_INLINE uint32_t getTiedCountByKind(uint32_t kind) const { return _tiedCount.get(kind); } //! Get if all variables of the given register `kind` are done. ASMJIT_INLINE bool isTiedDone(uint32_t kind) const { return _tiedDone.get(kind) == _tiedCount.get(kind); } //! Get how many variables have been allocated. ASMJIT_INLINE uint32_t getTiedDone(uint32_t kind) const { return _tiedDone.get(kind); } //! Add to the count of variables allocated. ASMJIT_INLINE void addTiedDone(uint32_t kind, uint32_t n = 1) { _tiedDone.add(kind, n); } //! Get number of allocable registers per class. ASMJIT_INLINE uint32_t getGaRegs(uint32_t kind) const { return _context->_gaRegs[kind]; } // -------------------------------------------------------------------------- // [Init / Cleanup] // -------------------------------------------------------------------------- protected: // Just to prevent calling these methods by X86RAPass::translate(). ASMJIT_INLINE void init(CBNode* node, X86RAData* map); ASMJIT_INLINE void cleanup(); // -------------------------------------------------------------------------- // [Unuse] // -------------------------------------------------------------------------- template ASMJIT_INLINE void unuseBefore(); template ASMJIT_INLINE void unuseAfter(); // -------------------------------------------------------------------------- // [Members] // -------------------------------------------------------------------------- //! RA context. X86RAPass* _context; //! Compiler. X86Compiler* _cc; //! Node. CBNode* _node; //! Register allocator (RA) data. X86RAData* _raData; //! TiedReg list (per register kind). TiedReg* _tiedArray[Globals::kMaxVRegKinds]; //! Count of all TiedReg's. uint32_t _tiedTotal; //! TiedReg's total counter. X86RegCount _tiedCount; //! TiedReg's done counter. X86RegCount _tiedDone; }; // ============================================================================ // [asmjit::X86BaseAlloc - Init / Cleanup] // ============================================================================ ASMJIT_INLINE void X86BaseAlloc::init(CBNode* node, X86RAData* raData) { _node = node; _raData = raData; // We have to set the correct cursor in case any instruction is emitted // during the allocation phase; it has to be emitted before the current // instruction. _cc->_setCursor(node->getPrev()); // Setup the lists of variables. { TiedReg* tied = raData->getTiedArray(); _tiedArray[X86Reg::kKindGp ] = tied; _tiedArray[X86Reg::kKindMm ] = tied + raData->getTiedStart(X86Reg::kKindMm ); _tiedArray[X86Reg::kKindK ] = tied + raData->getTiedStart(X86Reg::kKindK ); _tiedArray[X86Reg::kKindVec] = tied + raData->getTiedStart(X86Reg::kKindVec); } // Setup counters. _tiedTotal = raData->tiedTotal; _tiedCount = raData->tiedCount; _tiedDone.reset(); // Connect VREG->TIED. for (uint32_t i = 0; i < _tiedTotal; i++) { TiedReg* tied = &_tiedArray[0][i]; VirtReg* vreg = tied->vreg; vreg->_tied = tied; } } ASMJIT_INLINE void X86BaseAlloc::cleanup() { // Disconnect VREG->TIED. for (uint32_t i = 0; i < _tiedTotal; i++) { TiedReg* tied = &_tiedArray[0][i]; VirtReg* vreg = tied->vreg; vreg->_tied = nullptr; } } // ============================================================================ // [asmjit::X86BaseAlloc - Unuse] // ============================================================================ template ASMJIT_INLINE void X86BaseAlloc::unuseBefore() { TiedReg* tiedArray = getTiedArrayByKind(C); uint32_t tiedCount = getTiedCountByKind(C); const uint32_t checkFlags = TiedReg::kXReg | TiedReg::kRMem | TiedReg::kRFunc | TiedReg::kRCall ; for (uint32_t i = 0; i < tiedCount; i++) { TiedReg* tied = &tiedArray[i]; if ((tied->flags & checkFlags) == TiedReg::kWReg) _context->unuse(tied->vreg); } } template ASMJIT_INLINE void X86BaseAlloc::unuseAfter() { TiedReg* tiedArray = getTiedArrayByKind(C); uint32_t tiedCount = getTiedCountByKind(C); for (uint32_t i = 0; i < tiedCount; i++) { TiedReg* tied = &tiedArray[i]; if (tied->flags & TiedReg::kUnuse) _context->unuse(tied->vreg); } } // ============================================================================ // [asmjit::X86VarAlloc] // ============================================================================ //! \internal //! //! Register allocator context (asm instructions). struct X86VarAlloc : public X86BaseAlloc { // -------------------------------------------------------------------------- // [Construction / Destruction] // -------------------------------------------------------------------------- ASMJIT_INLINE X86VarAlloc(X86RAPass* context) : X86BaseAlloc(context) {} ASMJIT_INLINE ~X86VarAlloc() {} // -------------------------------------------------------------------------- // [Run] // -------------------------------------------------------------------------- Error run(CBNode* node); // -------------------------------------------------------------------------- // [Init / Cleanup] // -------------------------------------------------------------------------- protected: // Just to prevent calling these methods by X86RAPass::translate(). ASMJIT_INLINE void init(CBNode* node, X86RAData* map); ASMJIT_INLINE void cleanup(); // -------------------------------------------------------------------------- // [Plan / Spill / Alloc] // -------------------------------------------------------------------------- template ASMJIT_INLINE void plan(); template ASMJIT_INLINE void spill(); template ASMJIT_INLINE void alloc(); // -------------------------------------------------------------------------- // [GuessAlloc / GuessSpill] // -------------------------------------------------------------------------- //! Guess which register is the best candidate for `vreg` from `allocableRegs`. //! //! The guess is based on looking ahead and inspecting register allocator //! instructions. The main reason is to prevent allocation to a register //! which is needed by next instruction(s). The guess look tries to go as far //! as possible, after the remaining registers are zero, the mask of previous //! registers (called 'safeRegs') is returned. template ASMJIT_INLINE uint32_t guessAlloc(VirtReg* vreg, uint32_t allocableRegs); //! Guess whether to move the given `vreg` instead of spill. template ASMJIT_INLINE uint32_t guessSpill(VirtReg* vreg, uint32_t allocableRegs); // -------------------------------------------------------------------------- // [Modified] // -------------------------------------------------------------------------- template ASMJIT_INLINE void modified(); // -------------------------------------------------------------------------- // [Members] // -------------------------------------------------------------------------- //! Will alloc to these registers. X86RegMask _willAlloc; //! Will spill these registers. X86RegMask _willSpill; }; // ============================================================================ // [asmjit::X86VarAlloc - Run] // ============================================================================ Error X86VarAlloc::run(CBNode* node_) { // Initialize. X86RAData* raData = node_->getPassData(); // Initialize the allocator; connect Vd->Va. init(node_, raData); if (raData->tiedTotal != 0) { // Unuse overwritten variables. unuseBefore(); unuseBefore(); unuseBefore(); // Plan the allocation. Planner assigns input/output registers for each // variable and decides whether to allocate it in register or stack. plan(); plan(); plan(); // Spill all variables marked by plan(). spill(); spill(); spill(); // Alloc all variables marked by plan(). alloc(); alloc(); alloc(); // Translate node operands. if (node_->getType() == CBNode::kNodeInst) { CBInst* node = static_cast(node_); if (node->hasExtraReg()) { Reg reg = node->getExtraReg().toReg(); ASMJIT_PROPAGATE(X86RAPass_translateOperands(_context, ®, 1)); node->setExtraReg(reg); } ASMJIT_PROPAGATE(X86RAPass_translateOperands(_context, node->getOpArray(), node->getOpCount())); } else if (node_->getType() == CBNode::kNodePushArg) { CCPushArg* node = static_cast(node_); CCFuncCall* call = static_cast(node->getCall()); FuncDetail& fd = call->getDetail(); uint32_t argIndex = 0; uint32_t argMask = node->_args; VirtReg* cvtReg = node->getCvtReg(); VirtReg* srcReg = node->getSrcReg(); // Convert first. ASMJIT_ASSERT(srcReg->getPhysId() != Globals::kInvalidRegId); if (cvtReg) { ASMJIT_ASSERT(cvtReg->getPhysId() != Globals::kInvalidRegId); X86Reg dstOp(X86Reg::fromSignature(cvtReg->getSignature(), cvtReg->getId())); X86Reg srcOp(X86Reg::fromSignature(srcReg->getSignature(), srcReg->getId())); // Emit conversion after the prolog. X86Internal::emitArgMove(reinterpret_cast(_context->cc()), dstOp, cvtReg->getTypeId(), srcOp, srcReg->getTypeId(), _context->_avxEnabled); srcReg = cvtReg; } while (argMask != 0) { if (argMask & 0x1) { FuncDetail::Value& arg = fd.getArg(argIndex); ASMJIT_ASSERT(arg.byStack()); X86Mem dst = x86::ptr(_context->_zsp, -static_cast(_context->getGpSize()) + arg.getStackOffset()); _context->emitRegToStack(arg.getTypeId(), &dst, srcReg->getTypeId(), srcReg->getPhysId()); } argIndex++; argMask >>= 1; } } // Mark variables as modified. modified(); modified(); modified(); // Cleanup; disconnect Vd->Va. cleanup(); // Update clobbered mask. _context->_clobberedRegs.or_(_willAlloc); } // Update clobbered mask. _context->_clobberedRegs.or_(raData->clobberedRegs); // Unuse. if (raData->tiedTotal != 0) { unuseAfter(); unuseAfter(); unuseAfter(); } return kErrorOk; } // ============================================================================ // [asmjit::X86VarAlloc - Init / Cleanup] // ============================================================================ ASMJIT_INLINE void X86VarAlloc::init(CBNode* node, X86RAData* raData) { X86BaseAlloc::init(node, raData); // These will block planner from assigning them during planning. Planner will // add more registers when assigning registers to variables that don't need // any specific register. _willAlloc = raData->inRegs; _willAlloc.or_(raData->outRegs); _willSpill.reset(); } ASMJIT_INLINE void X86VarAlloc::cleanup() { X86BaseAlloc::cleanup(); } // ============================================================================ // [asmjit::X86VarAlloc - Plan / Spill / Alloc] // ============================================================================ template ASMJIT_INLINE void X86VarAlloc::plan() { if (isTiedDone(C)) return; uint32_t i; uint32_t willAlloc = _willAlloc.get(C); uint32_t willFree = 0; TiedReg* tiedArray = getTiedArrayByKind(C); uint32_t tiedCount = getTiedCountByKind(C); X86RAState* state = getState(); // Calculate 'willAlloc' and 'willFree' masks based on mandatory masks. for (i = 0; i < tiedCount; i++) { TiedReg* tied = &tiedArray[i]; VirtReg* vreg = tied->vreg; uint32_t vaFlags = tied->flags; uint32_t physId = vreg->getPhysId(); uint32_t regMask = (physId != Globals::kInvalidRegId) ? Utils::mask(physId) : 0; if ((vaFlags & TiedReg::kXReg) != 0) { // Planning register allocation. First check whether the variable is // already allocated in register and if it can stay allocated there. // // The following conditions may happen: // // a) Allocated register is one of the mandatoryRegs. // b) Allocated register is one of the allocableRegs. uint32_t mandatoryRegs = tied->inRegs; uint32_t allocableRegs = tied->allocableRegs; if (regMask != 0) { // Special path for planning output-only registers. if ((vaFlags & TiedReg::kXReg) == TiedReg::kWReg) { uint32_t outPhysId = tied->outPhysId; mandatoryRegs = (outPhysId != Globals::kInvalidRegId) ? Utils::mask(outPhysId) : 0; if ((mandatoryRegs | allocableRegs) & regMask) { tied->setOutPhysId(physId); tied->flags |= TiedReg::kWDone; if (mandatoryRegs & regMask) { // Case 'a' - 'willAlloc' contains initially all inRegs from all TiedReg's. ASMJIT_ASSERT((willAlloc & regMask) != 0); } else { // Case 'b'. tied->setOutPhysId(physId); willAlloc |= regMask; } addTiedDone(C); continue; } } else { if ((mandatoryRegs | allocableRegs) & regMask) { tied->setInPhysId(physId); tied->flags |= TiedReg::kRDone; if (mandatoryRegs & regMask) { // Case 'a' - 'willAlloc' contains initially all inRegs from all TiedReg's. ASMJIT_ASSERT((willAlloc & regMask) != 0); } else { // Case 'b'. tied->inRegs |= regMask; willAlloc |= regMask; } addTiedDone(C); continue; } } } // Variable is not allocated or allocated in register that doesn't // match inRegs or allocableRegs. The next step is to pick the best // register for this variable. If `inRegs` contains any register the // decision is simple - we have to follow, in other case will use // the advantage of `guessAlloc()` to find a register (or registers) // by looking ahead. But the best way to find a good register is not // here since now we have no information about the registers that // will be freed. So instead of finding register here, we just mark // the current register (if variable is allocated) as `willFree` so // the planner can use this information in the second step to plan the // allocation as a whole. willFree |= regMask; continue; } else { if (regMask != 0) { willFree |= regMask; continue; } else { tied->flags |= TiedReg::kRDone; addTiedDone(C); continue; } } } // Occupied registers without 'willFree' registers; contains basically // all the registers we can use to allocate variables without inRegs // specified. uint32_t occupied = state->_occupied.get(C) & ~willFree; uint32_t willSpill = 0; // Find the best registers for variables that are not allocated yet. for (i = 0; i < tiedCount; i++) { TiedReg* tied = &tiedArray[i]; VirtReg* vreg = tied->vreg; uint32_t vaFlags = tied->flags; if ((vaFlags & TiedReg::kXReg) != 0) { if ((vaFlags & TiedReg::kXReg) == TiedReg::kWReg) { if (vaFlags & TiedReg::kWDone) continue; // Skip all registers that have assigned outPhysId. Spill if occupied. if (tied->hasOutPhysId()) { uint32_t outRegs = Utils::mask(tied->outPhysId); willSpill |= occupied & outRegs; continue; } } else { if (vaFlags & TiedReg::kRDone) continue; // We skip all registers that have assigned inPhysId, indicates that // the register to allocate in is known. if (tied->hasInPhysId()) { uint32_t inRegs = tied->inRegs; willSpill |= occupied & inRegs; continue; } } uint32_t m = tied->inRegs; if (tied->hasOutPhysId()) m |= Utils::mask(tied->outPhysId); m = tied->allocableRegs & ~(willAlloc ^ m); m = guessAlloc(vreg, m); ASMJIT_ASSERT(m != 0); uint32_t candidateRegs = m & ~occupied; uint32_t homeMask = vreg->getHomeMask(); uint32_t physId; uint32_t regMask; if (candidateRegs == 0) { candidateRegs = m & occupied & ~state->_modified.get(C); if (candidateRegs == 0) candidateRegs = m; } if (candidateRegs & homeMask) candidateRegs &= homeMask; physId = Utils::findFirstBit(candidateRegs); regMask = Utils::mask(physId); if ((vaFlags & TiedReg::kXReg) == TiedReg::kWReg) { tied->setOutPhysId(physId); } else { tied->setInPhysId(physId); tied->inRegs = regMask; } willAlloc |= regMask; willSpill |= regMask & occupied; willFree &=~regMask; occupied |= regMask; continue; } else if ((vaFlags & TiedReg::kXMem) != 0) { uint32_t physId = vreg->getPhysId(); if (physId != Globals::kInvalidRegId && (vaFlags & TiedReg::kXMem) != TiedReg::kWMem) { willSpill |= Utils::mask(physId); } } } // Set calculated masks back to the allocator; needed by spill() and alloc(). _willSpill.set(C, willSpill); _willAlloc.set(C, willAlloc); } template ASMJIT_INLINE void X86VarAlloc::spill() { uint32_t m = _willSpill.get(C); uint32_t i = static_cast(0) - 1; if (m == 0) return; X86RAState* state = getState(); VirtReg** vregs = state->getListByKind(C); // Available registers for decision if move has any benefit over spill. uint32_t availableRegs = getGaRegs(C) & ~(state->_occupied.get(C) | m | _willAlloc.get(C)); do { // We always advance one more to destroy the bit that we have found. uint32_t bitIndex = Utils::findFirstBit(m) + 1; i += bitIndex; m >>= bitIndex; VirtReg* vreg = vregs[i]; ASMJIT_ASSERT(vreg); TiedReg* tied = vreg->_tied; ASMJIT_ASSERT(!tied || (tied->flags & TiedReg::kXReg) == 0); if (vreg->isModified() && availableRegs) { // Don't check for alternatives if the variable has to be spilled. if (!tied || (tied->flags & TiedReg::kSpill) == 0) { uint32_t altRegs = guessSpill(vreg, availableRegs); if (altRegs != 0) { uint32_t physId = Utils::findFirstBit(altRegs); uint32_t regMask = Utils::mask(physId); _context->move(vreg, physId); availableRegs ^= regMask; continue; } } } _context->spill(vreg); } while (m != 0); } template ASMJIT_INLINE void X86VarAlloc::alloc() { if (isTiedDone(C)) return; uint32_t i; bool didWork; TiedReg* tiedArray = getTiedArrayByKind(C); uint32_t tiedCount = getTiedCountByKind(C); // Alloc `in` regs. do { didWork = false; for (i = 0; i < tiedCount; i++) { TiedReg* aTied = &tiedArray[i]; VirtReg* aVReg = aTied->vreg; if ((aTied->flags & (TiedReg::kRReg | TiedReg::kRDone)) != TiedReg::kRReg) continue; uint32_t aPhysId = aVReg->getPhysId(); uint32_t bPhysId = aTied->inPhysId; // Shouldn't be the same. ASMJIT_ASSERT(aPhysId != bPhysId); VirtReg* bVReg = getState()->getListByKind(C)[bPhysId]; if (bVReg) { // Gp registers only - Swap two registers if we can solve two // allocation tasks by a single 'xchg' instruction, swapping // two registers required by the instruction/node or one register // required with another non-required. if (C == X86Reg::kKindGp && aPhysId != Globals::kInvalidRegId) { TiedReg* bTied = bVReg->_tied; _context->swapGp(aVReg, bVReg); aTied->flags |= TiedReg::kRDone; addTiedDone(C); // Double-hit, two registers allocated by a single xchg. if (bTied && bTied->inPhysId == aPhysId) { bTied->flags |= TiedReg::kRDone; addTiedDone(C); } didWork = true; continue; } } else if (aPhysId != Globals::kInvalidRegId) { _context->move(aVReg, bPhysId); aTied->flags |= TiedReg::kRDone; addTiedDone(C); didWork = true; continue; } else { _context->alloc(aVReg, bPhysId); aTied->flags |= TiedReg::kRDone; addTiedDone(C); didWork = true; continue; } } } while (didWork); // Alloc 'out' regs. for (i = 0; i < tiedCount; i++) { TiedReg* tied = &tiedArray[i]; VirtReg* vreg = tied->vreg; if ((tied->flags & (TiedReg::kXReg | TiedReg::kWDone)) != TiedReg::kWReg) continue; uint32_t physId = tied->outPhysId; ASMJIT_ASSERT(physId != Globals::kInvalidRegId); if (vreg->getPhysId() != physId) { ASMJIT_ASSERT(getState()->getListByKind(C)[physId] == nullptr); _context->attach(vreg, physId, false); } tied->flags |= TiedReg::kWDone; addTiedDone(C); } } // ============================================================================ // [asmjit::X86VarAlloc - GuessAlloc / GuessSpill] // ============================================================================ template ASMJIT_INLINE uint32_t X86VarAlloc::guessAlloc(VirtReg* vreg, uint32_t allocableRegs) { ASMJIT_ASSERT(allocableRegs != 0); // Stop now if there is only one bit (register) set in `allocableRegs` mask. if (Utils::isPowerOf2(allocableRegs)) return allocableRegs; uint32_t raId = vreg->_raId; uint32_t safeRegs = allocableRegs; uint32_t i; uint32_t maxLookAhead = kCompilerDefaultLookAhead; // Look ahead and calculate mask of special registers on both - input/output. CBNode* node = _node; for (i = 0; i < maxLookAhead; i++) { X86RAData* raData = node->getPassData(); RABits* liveness = raData ? raData->liveness : static_cast(nullptr); // If the variable becomes dead it doesn't make sense to continue. if (liveness && !liveness->getBit(raId)) break; // Stop on `CBSentinel` and `CCFuncRet`. if (node->hasFlag(CBNode::kFlagIsRet)) break; // Stop on conditional jump, we don't follow them. if (node->hasFlag(CBNode::kFlagIsJcc)) break; // Advance on non-conditional jump. if (node->hasFlag(CBNode::kFlagIsJmp)) { node = static_cast(node)->getTarget(); // Stop on jump that is not followed. if (!node) break; } node = node->getNext(); ASMJIT_ASSERT(node != nullptr); raData = node->getPassData(); if (raData) { TiedReg* tied = raData->findTiedByKind(C, vreg); uint32_t mask; if (tied) { // If the variable is overwritten it doesn't make sense to continue. if ((tied->flags & TiedReg::kRAll) == 0) break; mask = tied->allocableRegs; if (mask != 0) { allocableRegs &= mask; if (allocableRegs == 0) break; safeRegs = allocableRegs; } mask = tied->inRegs; if (mask != 0) { allocableRegs &= mask; if (allocableRegs == 0) break; safeRegs = allocableRegs; break; } allocableRegs &= ~(raData->outRegs.get(C) | raData->clobberedRegs.get(C)); if (allocableRegs == 0) break; } else { allocableRegs &= ~(raData->inRegs.get(C) | raData->outRegs.get(C) | raData->clobberedRegs.get(C)); if (allocableRegs == 0) break; } safeRegs = allocableRegs; } } return safeRegs; } template ASMJIT_INLINE uint32_t X86VarAlloc::guessSpill(VirtReg* vreg, uint32_t allocableRegs) { ASMJIT_ASSERT(allocableRegs != 0); return 0; } // ============================================================================ // [asmjit::X86VarAlloc - Modified] // ============================================================================ template ASMJIT_INLINE void X86VarAlloc::modified() { TiedReg* tiedArray = getTiedArrayByKind(C); uint32_t tiedCount = getTiedCountByKind(C); for (uint32_t i = 0; i < tiedCount; i++) { TiedReg* tied = &tiedArray[i]; if (tied->flags & TiedReg::kWReg) { VirtReg* vreg = tied->vreg; uint32_t physId = vreg->getPhysId(); uint32_t regMask = Utils::mask(physId); vreg->setModified(true); _context->_x86State._modified.or_(C, regMask); } } } // ============================================================================ // [asmjit::X86CallAlloc] // ============================================================================ //! \internal //! //! Register allocator context (function call). struct X86CallAlloc : public X86BaseAlloc { // -------------------------------------------------------------------------- // [Construction / Destruction] // -------------------------------------------------------------------------- ASMJIT_INLINE X86CallAlloc(X86RAPass* context) : X86BaseAlloc(context) {} ASMJIT_INLINE ~X86CallAlloc() {} // -------------------------------------------------------------------------- // [Accessors] // -------------------------------------------------------------------------- //! Get the node. ASMJIT_INLINE CCFuncCall* getNode() const { return static_cast(_node); } // -------------------------------------------------------------------------- // [Run] // -------------------------------------------------------------------------- Error run(CCFuncCall* node); // -------------------------------------------------------------------------- // [Init / Cleanup] // -------------------------------------------------------------------------- protected: // Just to prevent calling these methods from X86RAPass::translate(). ASMJIT_INLINE void init(CCFuncCall* node, X86RAData* raData); ASMJIT_INLINE void cleanup(); // -------------------------------------------------------------------------- // [Plan / Alloc / Spill / Move] // -------------------------------------------------------------------------- template ASMJIT_INLINE void plan(); template ASMJIT_INLINE void spill(); template ASMJIT_INLINE void alloc(); // -------------------------------------------------------------------------- // [AllocImmsOnStack] // -------------------------------------------------------------------------- ASMJIT_INLINE void allocImmsOnStack(); // -------------------------------------------------------------------------- // [Duplicate] // -------------------------------------------------------------------------- template ASMJIT_INLINE void duplicate(); // -------------------------------------------------------------------------- // [GuessAlloc / GuessSpill] // -------------------------------------------------------------------------- template ASMJIT_INLINE uint32_t guessAlloc(VirtReg* vreg, uint32_t allocableRegs); template ASMJIT_INLINE uint32_t guessSpill(VirtReg* vreg, uint32_t allocableRegs); // -------------------------------------------------------------------------- // [Save] // -------------------------------------------------------------------------- template ASMJIT_INLINE void save(); // -------------------------------------------------------------------------- // [Clobber] // -------------------------------------------------------------------------- template ASMJIT_INLINE void clobber(); // -------------------------------------------------------------------------- // [Ret] // -------------------------------------------------------------------------- ASMJIT_INLINE void ret(); // -------------------------------------------------------------------------- // [Members] // -------------------------------------------------------------------------- //! Will alloc to these registers. X86RegMask _willAlloc; //! Will spill these registers. X86RegMask _willSpill; }; // ============================================================================ // [asmjit::X86CallAlloc - Run] // ============================================================================ Error X86CallAlloc::run(CCFuncCall* node) { // Initialize the allocator; prepare basics and connect Vd->Va. X86RAData* raData = node->getPassData(); init(node, raData); // Plan register allocation. Planner is only able to assign one register per // variable. If any variable is used multiple times it will be handled later. plan(); plan(); plan(); // Spill. spill(); spill(); spill(); // Alloc. alloc(); alloc(); alloc(); // Unuse clobbered registers that are not used to pass function arguments and // save variables used to pass function arguments that will be reused later on. save(); save(); save(); // Allocate immediates in registers and on the stack. allocImmsOnStack(); // Duplicate. duplicate(); duplicate(); duplicate(); // Translate call operand. ASMJIT_PROPAGATE(X86RAPass_translateOperands(_context, node->getOpArray(), node->getOpCount())); // To emit instructions after call. _cc->_setCursor(node); // If the callee pops stack it has to be manually adjusted back. FuncDetail& fd = node->getDetail(); if (fd.hasFlag(CallConv::kFlagCalleePopsStack) && fd.getArgStackSize() != 0) _cc->emit(X86Inst::kIdSub, _context->_zsp, static_cast(fd.getArgStackSize())); // Clobber. clobber(); clobber(); clobber(); // Return. ret(); // Unuse. unuseAfter(); unuseAfter(); unuseAfter(); // Cleanup; disconnect Vd->Va. cleanup(); return kErrorOk; } // ============================================================================ // [asmjit::X86CallAlloc - Init / Cleanup] // ============================================================================ ASMJIT_INLINE void X86CallAlloc::init(CCFuncCall* node, X86RAData* raData) { X86BaseAlloc::init(node, raData); // Create mask of all registers that will be used to pass function arguments. _willAlloc.reset(); _willAlloc.set(X86Reg::kKindGp , node->getDetail().getUsedRegs(X86Reg::kKindGp )); _willAlloc.set(X86Reg::kKindMm , node->getDetail().getUsedRegs(X86Reg::kKindMm )); _willAlloc.set(X86Reg::kKindK , node->getDetail().getUsedRegs(X86Reg::kKindK )); _willAlloc.set(X86Reg::kKindVec, node->getDetail().getUsedRegs(X86Reg::kKindVec)); _willSpill.reset(); } ASMJIT_INLINE void X86CallAlloc::cleanup() { X86BaseAlloc::cleanup(); } // ============================================================================ // [asmjit::X86CallAlloc - Plan / Spill / Alloc] // ============================================================================ template ASMJIT_INLINE void X86CallAlloc::plan() { uint32_t i; uint32_t clobbered = _raData->clobberedRegs.get(C); uint32_t willAlloc = _willAlloc.get(C); uint32_t willFree = clobbered & ~willAlloc; TiedReg* tiedArray = getTiedArrayByKind(C); uint32_t tiedCount = getTiedCountByKind(C); X86RAState* state = getState(); // Calculate 'willAlloc' and 'willFree' masks based on mandatory masks. for (i = 0; i < tiedCount; i++) { TiedReg* tied = &tiedArray[i]; VirtReg* vreg = tied->vreg; uint32_t vaFlags = tied->flags; uint32_t physId = vreg->getPhysId(); uint32_t regMask = (physId != Globals::kInvalidRegId) ? Utils::mask(physId) : 0; if ((vaFlags & TiedReg::kRReg) != 0) { // Planning register allocation. First check whether the variable is // already allocated in register and if it can stay there. Function // arguments are passed either in a specific register or in stack so // we care mostly of mandatory registers. uint32_t inRegs = tied->inRegs; if (inRegs == 0) { inRegs = tied->allocableRegs; } // Optimize situation where the variable has to be allocated in a // mandatory register, but it's already allocated in register that // is not clobbered (i.e. it will survive function call). if ((regMask & inRegs) != 0 || ((regMask & ~clobbered) != 0 && (vaFlags & TiedReg::kUnuse) == 0)) { tied->setInPhysId(physId); tied->flags |= TiedReg::kRDone; addTiedDone(C); } else { willFree |= regMask; } } else { // Memory access - if variable is allocated it has to be freed. if (regMask != 0) { willFree |= regMask; } else { tied->flags |= TiedReg::kRDone; addTiedDone(C); } } } // Occupied registers without 'willFree' registers; contains basically // all the registers we can use to allocate variables without inRegs // speficied. uint32_t occupied = state->_occupied.get(C) & ~willFree; uint32_t willSpill = 0; // Find the best registers for variables that are not allocated yet. Only // useful for Gp registers used as call operand. for (i = 0; i < tiedCount; i++) { TiedReg* tied = &tiedArray[i]; VirtReg* vreg = tied->vreg; uint32_t vaFlags = tied->flags; if ((vaFlags & TiedReg::kRDone) != 0 || (vaFlags & TiedReg::kRReg) == 0) continue; // All registers except Gp used by call itself must have inPhysId. uint32_t m = tied->inRegs; if (C != X86Reg::kKindGp || m) { ASMJIT_ASSERT(m != 0); tied->setInPhysId(Utils::findFirstBit(m)); willSpill |= occupied & m; continue; } m = tied->allocableRegs & ~(willAlloc ^ m); m = guessAlloc(vreg, m); ASMJIT_ASSERT(m != 0); uint32_t candidateRegs = m & ~occupied; if (candidateRegs == 0) { candidateRegs = m & occupied & ~state->_modified.get(C); if (candidateRegs == 0) candidateRegs = m; } if (!(vaFlags & (TiedReg::kWReg | TiedReg::kUnuse)) && (candidateRegs & ~clobbered)) candidateRegs &= ~clobbered; uint32_t physId = Utils::findFirstBit(candidateRegs); uint32_t regMask = Utils::mask(physId); tied->setInPhysId(physId); tied->inRegs = regMask; willAlloc |= regMask; willSpill |= regMask & occupied; willFree &= ~regMask; occupied |= regMask; continue; } // Set calculated masks back to the allocator; needed by spill() and alloc(). _willSpill.set(C, willSpill); _willAlloc.set(C, willAlloc); } template ASMJIT_INLINE void X86CallAlloc::spill() { uint32_t m = _willSpill.get(C); uint32_t i = static_cast(0) - 1; if (m == 0) return; X86RAState* state = getState(); VirtReg** sVars = state->getListByKind(C); // Available registers for decision if move has any benefit over spill. uint32_t availableRegs = getGaRegs(C) & ~(state->_occupied.get(C) | m | _willAlloc.get(C)); do { // We always advance one more to destroy the bit that we have found. uint32_t bitIndex = Utils::findFirstBit(m) + 1; i += bitIndex; m >>= bitIndex; VirtReg* vreg = sVars[i]; ASMJIT_ASSERT(vreg && !vreg->_tied); if (vreg->isModified() && availableRegs) { uint32_t available = guessSpill(vreg, availableRegs); if (available != 0) { uint32_t physId = Utils::findFirstBit(available); uint32_t regMask = Utils::mask(physId); _context->move(vreg, physId); availableRegs ^= regMask; continue; } } _context->spill(vreg); } while (m != 0); } template ASMJIT_INLINE void X86CallAlloc::alloc() { if (isTiedDone(C)) return; TiedReg* tiedArray = getTiedArrayByKind(C); uint32_t tiedCount = getTiedCountByKind(C); uint32_t i; bool didWork; do { didWork = false; for (i = 0; i < tiedCount; i++) { TiedReg* aTied = &tiedArray[i]; VirtReg* aVReg = aTied->vreg; if ((aTied->flags & (TiedReg::kRReg | TiedReg::kRDone)) != TiedReg::kRReg) continue; uint32_t sPhysId = aVReg->getPhysId(); uint32_t bPhysId = aTied->inPhysId; // Shouldn't be the same. ASMJIT_ASSERT(sPhysId != bPhysId); VirtReg* bVReg = getState()->getListByKind(C)[bPhysId]; if (bVReg) { TiedReg* bTied = bVReg->_tied; // GP registers only - Swap two registers if we can solve two // allocation tasks by a single 'xchg' instruction, swapping // two registers required by the instruction/node or one register // required with another non-required. if (C == X86Reg::kKindGp) { _context->swapGp(aVReg, bVReg); aTied->flags |= TiedReg::kRDone; addTiedDone(C); // Double-hit, two registers allocated by a single swap. if (bTied && bTied->inPhysId == sPhysId) { bTied->flags |= TiedReg::kRDone; addTiedDone(C); } didWork = true; continue; } } else if (sPhysId != Globals::kInvalidRegId) { _context->move(aVReg, bPhysId); _context->_clobberedRegs.or_(C, Utils::mask(bPhysId)); aTied->flags |= TiedReg::kRDone; addTiedDone(C); didWork = true; continue; } else { _context->alloc(aVReg, bPhysId); _context->_clobberedRegs.or_(C, Utils::mask(bPhysId)); aTied->flags |= TiedReg::kRDone; addTiedDone(C); didWork = true; continue; } } } while (didWork); } // ============================================================================ // [asmjit::X86CallAlloc - AllocImmsOnStack] // ============================================================================ ASMJIT_INLINE void X86CallAlloc::allocImmsOnStack() { CCFuncCall* node = getNode(); FuncDetail& fd = node->getDetail(); uint32_t argCount = fd.getArgCount(); Operand_* args = node->_args; for (uint32_t i = 0; i < argCount; i++) { Operand_& op = args[i]; if (!op.isImm()) continue; const Imm& imm = static_cast(op); const FuncDetail::Value& arg = fd.getArg(i); uint32_t varType = arg.getTypeId(); if (arg.byReg()) { _context->emitImmToReg(varType, arg.getRegId(), &imm); } else { X86Mem dst = x86::ptr(_context->_zsp, -static_cast(_context->getGpSize()) + arg.getStackOffset()); _context->emitImmToStack(varType, &dst, &imm); } } } // ============================================================================ // [asmjit::X86CallAlloc - Duplicate] // ============================================================================ template ASMJIT_INLINE void X86CallAlloc::duplicate() { TiedReg* tiedArray = getTiedArrayByKind(C); uint32_t tiedCount = getTiedCountByKind(C); for (uint32_t i = 0; i < tiedCount; i++) { TiedReg* tied = &tiedArray[i]; if ((tied->flags & TiedReg::kRReg) == 0) continue; uint32_t inRegs = tied->inRegs; if (!inRegs) continue; VirtReg* vreg = tied->vreg; uint32_t physId = vreg->getPhysId(); ASMJIT_ASSERT(physId != Globals::kInvalidRegId); inRegs &= ~Utils::mask(physId); if (!inRegs) continue; for (uint32_t dupIndex = 0; inRegs != 0; dupIndex++, inRegs >>= 1) { if (inRegs & 0x1) { _context->emitMove(vreg, dupIndex, physId, "Duplicate"); _context->_clobberedRegs.or_(C, Utils::mask(dupIndex)); } } } } // ============================================================================ // [asmjit::X86CallAlloc - GuessAlloc / GuessSpill] // ============================================================================ template ASMJIT_INLINE uint32_t X86CallAlloc::guessAlloc(VirtReg* vreg, uint32_t allocableRegs) { ASMJIT_ASSERT(allocableRegs != 0); // Stop now if there is only one bit (register) set in 'allocableRegs' mask. if (Utils::isPowerOf2(allocableRegs)) return allocableRegs; uint32_t i; uint32_t safeRegs = allocableRegs; uint32_t maxLookAhead = kCompilerDefaultLookAhead; // Look ahead and calculate mask of special registers on both - input/output. CBNode* node = _node; for (i = 0; i < maxLookAhead; i++) { // Stop on `CCFuncRet` and `CBSentinel`. if (node->hasFlag(CBNode::kFlagIsRet)) break; // Stop on conditional jump, we don't follow them. if (node->hasFlag(CBNode::kFlagIsJcc)) break; // Advance on non-conditional jump. if (node->hasFlag(CBNode::kFlagIsJmp)) { node = static_cast(node)->getTarget(); // Stop on jump that is not followed. if (!node) break; } node = node->getNext(); ASMJIT_ASSERT(node != nullptr); X86RAData* raData = node->getPassData(); if (raData) { TiedReg* tied = raData->findTiedByKind(C, vreg); if (tied) { uint32_t inRegs = tied->inRegs; if (inRegs != 0) { safeRegs = allocableRegs; allocableRegs &= inRegs; if (allocableRegs == 0) goto _UseSafeRegs; else return allocableRegs; } } safeRegs = allocableRegs; allocableRegs &= ~(raData->inRegs.get(C) | raData->outRegs.get(C) | raData->clobberedRegs.get(C)); if (allocableRegs == 0) break; } } _UseSafeRegs: return safeRegs; } template ASMJIT_INLINE uint32_t X86CallAlloc::guessSpill(VirtReg* vreg, uint32_t allocableRegs) { ASMJIT_ASSERT(allocableRegs != 0); return 0; } // ============================================================================ // [asmjit::X86CallAlloc - Save] // ============================================================================ template ASMJIT_INLINE void X86CallAlloc::save() { X86RAState* state = getState(); VirtReg** sVars = state->getListByKind(C); uint32_t i; uint32_t affected = _raData->clobberedRegs.get(C) & state->_occupied.get(C) & state->_modified.get(C); for (i = 0; affected != 0; i++, affected >>= 1) { if (affected & 0x1) { VirtReg* vreg = sVars[i]; ASMJIT_ASSERT(vreg != nullptr); ASMJIT_ASSERT(vreg->isModified()); TiedReg* tied = vreg->_tied; if (!tied || (tied->flags & (TiedReg::kWReg | TiedReg::kUnuse)) == 0) _context->save(vreg); } } } // ============================================================================ // [asmjit::X86CallAlloc - Clobber] // ============================================================================ template ASMJIT_INLINE void X86CallAlloc::clobber() { X86RAState* state = getState(); VirtReg** sVars = state->getListByKind(C); uint32_t i; uint32_t affected = _raData->clobberedRegs.get(C) & state->_occupied.get(C); for (i = 0; affected != 0; i++, affected >>= 1) { if (affected & 0x1) { VirtReg* vreg = sVars[i]; ASMJIT_ASSERT(vreg != nullptr); TiedReg* tied = vreg->_tied; uint32_t vdState = VirtReg::kStateNone; if (!vreg->isModified() || (tied && (tied->flags & (TiedReg::kWAll | TiedReg::kUnuse)) != 0)) vdState = VirtReg::kStateMem; _context->unuse(vreg, vdState); } } } // ============================================================================ // [asmjit::X86CallAlloc - Ret] // ============================================================================ ASMJIT_INLINE void X86CallAlloc::ret() { CCFuncCall* node = getNode(); FuncDetail& fd = node->getDetail(); Operand_* rets = node->_ret; for (uint32_t i = 0; i < 2; i++) { const FuncDetail::Value& ret = fd.getRet(i); Operand_* op = &rets[i]; if (!ret.byReg() || !op->isVirtReg()) continue; VirtReg* vreg = _cc->getVirtRegById(op->getId()); uint32_t regId = ret.getRegId(); switch (vreg->getKind()) { case X86Reg::kKindGp: _context->unuse(vreg); _context->attach(vreg, regId, true); break; case X86Reg::kKindMm: _context->unuse(vreg); _context->attach(vreg, regId, true); break; case X86Reg::kKindVec: if (X86Reg::kindOf(ret.getRegType()) == X86Reg::kKindVec) { _context->unuse(vreg); _context->attach(vreg, regId, true); } else { uint32_t elementId = TypeId::elementOf(vreg->getTypeId()); uint32_t size = (elementId == TypeId::kF32) ? 4 : 8; X86Mem m = _context->getVarMem(vreg); m.setSize(size); _context->unuse(vreg, VirtReg::kStateMem); _cc->fstp(m); } break; } } } // ============================================================================ // [asmjit::X86RAPass - TranslateOperands] // ============================================================================ //! \internal static Error X86RAPass_translateOperands(X86RAPass* self, Operand_* opArray, uint32_t opCount) { X86Compiler* cc = self->cc(); // Translate variables into registers. for (uint32_t i = 0; i < opCount; i++) { Operand_* op = &opArray[i]; if (op->isVirtReg()) { VirtReg* vreg = cc->getVirtRegById(op->getId()); ASMJIT_ASSERT(vreg != nullptr); ASMJIT_ASSERT(vreg->getPhysId() != Globals::kInvalidRegId); op->_reg.id = vreg->getPhysId(); } else if (op->isMem()) { X86Mem* m = static_cast(op); if (m->hasBaseReg() && cc->isVirtRegValid(m->getBaseId())) { VirtReg* vreg = cc->getVirtRegById(m->getBaseId()); if (m->isRegHome()) { self->getVarCell(vreg); } else { ASMJIT_ASSERT(vreg->getPhysId() != Globals::kInvalidRegId); op->_mem.base = vreg->getPhysId(); } } if (m->hasIndexReg() && cc->isVirtRegValid(m->getIndexId())) { VirtReg* vreg = cc->getVirtRegById(m->getIndexId()); op->_mem.index = vreg->getPhysId(); } } } return kErrorOk; } // ============================================================================ // [asmjit::X86RAPass - TranslatePrologEpilog] // ============================================================================ //! \internal static Error X86RAPass_prepareFuncFrame(X86RAPass* self, CCFunc* func) { FuncFrameInfo& ffi = func->getFrameInfo(); X86RegMask& clobberedRegs = self->_clobberedRegs; // Initialize dirty registers. ffi.setDirtyRegs(X86Reg::kKindGp , clobberedRegs.get(X86Reg::kKindGp )); ffi.setDirtyRegs(X86Reg::kKindMm , clobberedRegs.get(X86Reg::kKindMm )); ffi.setDirtyRegs(X86Reg::kKindK , clobberedRegs.get(X86Reg::kKindK )); ffi.setDirtyRegs(X86Reg::kKindVec, clobberedRegs.get(X86Reg::kKindVec)); // Initialize stack size & alignment. ffi.setStackFrameSize(self->_memAllTotal); ffi.setStackFrameAlignment(self->_memMaxAlign); return kErrorOk; } //! \internal static Error X86RAPass_patchFuncMem(X86RAPass* self, CCFunc* func, CBNode* stop, FuncFrameLayout& layout) { X86Compiler* cc = self->cc(); CBNode* node = func; do { if (node->getType() == CBNode::kNodeInst) { CBInst* iNode = static_cast(node); if (iNode->hasMemOp()) { X86Mem* m = iNode->getMemOp(); if (m->isArgHome()) { m->addOffsetLo32(layout.getStackArgsOffset()); m->clearArgHome(); } if (m->isRegHome() && Operand::isPackedId(m->getBaseId())) { VirtReg* vreg = cc->getVirtRegById(m->getBaseId()); ASMJIT_ASSERT(vreg != nullptr); RACell* cell = vreg->getMemCell(); ASMJIT_ASSERT(cell != nullptr); m->_setBase(cc->_nativeGpReg.getType(), self->_varBaseRegId); m->addOffsetLo32(self->_varBaseOffset + cell->offset); m->clearRegHome(); } } } node = node->getNext(); } while (node != stop); return kErrorOk; } // ============================================================================ // [asmjit::X86RAPass - Translate - Jump] // ============================================================================ //! \internal static void X86RAPass_translateJump(X86RAPass* self, CBJump* jNode, CBLabel* jTarget) { X86Compiler* cc = self->cc(); CBNode* injectRef = self->getFunc()->getEnd()->getPrev(); CBNode* prevCursor = cc->setCursor(injectRef); self->switchState(jTarget->getPassData()->state); // Any code necessary to `switchState()` will be added at the end of the function. if (cc->getCursor() != injectRef) { // TODO: Can fail. CBLabel* injectLabel = cc->newLabelNode(); // Add the jump to the target. cc->jmp(jTarget->getLabel()); // Inject the label. cc->_setCursor(injectRef); cc->addNode(injectLabel); // Finally, patch `jNode` target. ASMJIT_ASSERT(jNode->getOpCount() > 0); jNode->_opArray[jNode->getOpCount() - 1] = injectLabel->getLabel(); jNode->_target = injectLabel; // If we injected any code it may not satisfy short form anymore. jNode->delOptions(X86Inst::kOptionShortForm); } cc->_setCursor(prevCursor); self->loadState(jNode->getPassData()->state); } // ============================================================================ // [asmjit::X86RAPass - Translate - Ret] // ============================================================================ static Error X86RAPass_translateRet(X86RAPass* self, CCFuncRet* rNode, CBLabel* exitTarget) { X86Compiler* cc = self->cc(); CBNode* node = rNode->getNext(); // 32-bit mode requires to push floating point return value(s), handle it // here as it's a special case. X86RAData* raData = rNode->getPassData(); if (raData) { TiedReg* tiedArray = raData->tiedArray; uint32_t tiedTotal = raData->tiedTotal; for (uint32_t i = 0; i < tiedTotal; i++) { TiedReg* tied = &tiedArray[i]; if (tied->flags & (TiedReg::kX86Fld4 | TiedReg::kX86Fld8)) { VirtReg* vreg = tied->vreg; X86Mem m(self->getVarMem(vreg)); uint32_t elementId = TypeId::elementOf(vreg->getTypeId()); m.setSize(elementId == TypeId::kF32 ? 4 : elementId == TypeId::kF64 ? 8 : (tied->flags & TiedReg::kX86Fld4) ? 4 : 8); cc->fld(m); } } } // Decide whether to `jmp` or not in case we are next to the return label. while (node) { switch (node->getType()) { // If we have found an exit label we just return, there is no need to // emit jump to that. case CBNode::kNodeLabel: if (static_cast(node) == exitTarget) return kErrorOk; goto _EmitRet; case CBNode::kNodeData: case CBNode::kNodeInst: case CBNode::kNodeFuncCall: case CBNode::kNodeFuncExit: goto _EmitRet; // Continue iterating. case CBNode::kNodeComment: case CBNode::kNodeAlign: case CBNode::kNodeHint: break; // Invalid node to be here. case CBNode::kNodeFunc: return DebugUtils::errored(kErrorInvalidState); // We can't go forward from here. case CBNode::kNodeSentinel: return kErrorOk; } node = node->getNext(); } _EmitRet: { cc->_setCursor(rNode); cc->jmp(exitTarget->getLabel()); } return kErrorOk; } // ============================================================================ // [asmjit::X86RAPass - Translate - Func] // ============================================================================ Error X86RAPass::translate() { X86Compiler* cc = this->cc(); CCFunc* func = getFunc(); // Register allocator contexts. X86VarAlloc vAlloc(this); X86CallAlloc cAlloc(this); // Flow. CBNode* node_ = func; CBNode* next = nullptr; CBNode* stop = getStop(); ZoneList::Link* jLink = _jccList.getFirst(); for (;;) { while (node_->isTranslated()) { // Switch state if we went to a node that is already translated. if (node_->getType() == CBNode::kNodeLabel) { CBLabel* node = static_cast(node_); cc->_setCursor(node->getPrev()); switchState(node->getPassData()->state); } _NextGroup: if (!jLink) { goto _Done; } else { node_ = jLink->getValue(); jLink = jLink->getNext(); CBNode* jFlow = X86RAPass_getOppositeJccFlow(static_cast(node_)); loadState(node_->getPassData()->state); if (jFlow->hasPassData() && jFlow->getPassData()->state) { X86RAPass_translateJump(this, static_cast(node_), static_cast(jFlow)); node_ = jFlow; if (node_->isTranslated()) goto _NextGroup; } else { node_ = jFlow; } break; } } next = node_->getNext(); node_->_flags |= CBNode::kFlagIsTranslated; if (node_->hasPassData()) { switch (node_->getType()) { // -------------------------------------------------------------------- // [Align / Embed] // -------------------------------------------------------------------- case CBNode::kNodeAlign: case CBNode::kNodeData: break; // -------------------------------------------------------------------- // [Label] // -------------------------------------------------------------------- case CBNode::kNodeLabel: { CBLabel* node = static_cast(node_); ASMJIT_ASSERT(node->getPassData()->state == nullptr); node->getPassData()->state = saveState(); if (node == func->getExitNode()) goto _NextGroup; break; } // -------------------------------------------------------------------- // [Inst/Call/SArg/Ret] // -------------------------------------------------------------------- case CBNode::kNodeInst: case CBNode::kNodeFunc: case CBNode::kNodeFuncCall: case CBNode::kNodePushArg: // Update TiedReg's unuse flags based on liveness of the next node. if (!node_->isJcc()) { X86RAData* raData = node_->getPassData(); RABits* liveness; if (raData && next && next->hasPassData() && (liveness = next->getPassData()->liveness)) { TiedReg* tiedArray = raData->tiedArray; uint32_t tiedTotal = raData->tiedTotal; for (uint32_t i = 0; i < tiedTotal; i++) { TiedReg* tied = &tiedArray[i]; VirtReg* vreg = tied->vreg; if (!liveness->getBit(vreg->_raId) && !vreg->isFixed()) tied->flags |= TiedReg::kUnuse; } } } if (node_->getType() == CBNode::kNodeFuncCall) { ASMJIT_PROPAGATE(cAlloc.run(static_cast(node_))); break; } ASMJIT_FALLTHROUGH; case CBNode::kNodeHint: case CBNode::kNodeFuncExit: { ASMJIT_PROPAGATE(vAlloc.run(node_)); // Handle conditional/unconditional jump. if (node_->isJmpOrJcc()) { CBJump* node = static_cast(node_); CBLabel* jTarget = node->getTarget(); // Target not followed. if (!jTarget) { if (node->isJmp()) goto _NextGroup; else break; } if (node->isJmp()) { if (jTarget->hasPassData() && jTarget->getPassData()->state) { cc->_setCursor(node->getPrev()); switchState(jTarget->getPassData()->state); goto _NextGroup; } else { next = jTarget; } } else { CBNode* jNext = node->getNext(); if (jTarget->isTranslated()) { if (jNext->isTranslated()) { ASMJIT_ASSERT(jNext->getType() == CBNode::kNodeLabel); cc->_setCursor(node->getPrev()); intersectStates( jTarget->getPassData()->state, jNext->getPassData()->state); } RAState* savedState = saveState(); node->getPassData()->state = savedState; X86RAPass_translateJump(this, node, jTarget); next = jNext; } else if (jNext->isTranslated()) { ASMJIT_ASSERT(jNext->getType() == CBNode::kNodeLabel); RAState* savedState = saveState(); node->getPassData()->state = savedState; cc->_setCursor(node); switchState(jNext->getPassData()->state); next = jTarget; } else { node->getPassData()->state = saveState(); next = X86RAPass_getJccFlow(node); } } } else if (node_->isRet()) { ASMJIT_PROPAGATE( X86RAPass_translateRet(this, static_cast(node_), func->getExitNode())); goto _NextGroup; } break; } // -------------------------------------------------------------------- // [End] // -------------------------------------------------------------------- case CBNode::kNodeSentinel: { goto _NextGroup; } default: break; } } if (next == stop) goto _NextGroup; node_ = next; } _Done: { ASMJIT_PROPAGATE(resolveCellOffsets()); ASMJIT_PROPAGATE(X86RAPass_prepareFuncFrame(this, func)); FuncFrameLayout layout; ASMJIT_PROPAGATE(layout.init(func->getDetail(), func->getFrameInfo())); _varBaseRegId = layout._stackBaseRegId; _varBaseOffset = layout._stackBaseOffset; ASMJIT_PROPAGATE(X86RAPass_patchFuncMem(this, func, stop, layout)); cc->_setCursor(func); ASMJIT_PROPAGATE(FuncUtils::emitProlog(this->cc(), layout)); cc->_setCursor(func->getExitNode()); ASMJIT_PROPAGATE(FuncUtils::emitEpilog(this->cc(), layout)); } return kErrorOk; } } // asmjit namespace // [Api-End] #include "../asmjit_apiend.h" // [Guard] #endif // ASMJIT_BUILD_X86 && !ASMJIT_DISABLE_COMPILER