asmjit: sync with upstream

This commit is contained in:
Patrick Mackinlay 2022-05-17 16:51:29 +07:00
parent 1a6634fda5
commit 02e5f1e468
26 changed files with 2193 additions and 1687 deletions

View File

@ -249,7 +249,8 @@ if (ASMJIT_SANITIZE)
endif()
endif()
if (NOT WIN32)
# Do not link to pthread on Android as it's part of C runtime.
if (NOT WIN32 AND NOT ANDROID)
list(APPEND ASMJIT_DEPS pthread)
endif()

View File

@ -4993,7 +4993,7 @@ EmitDone:
if (Support::test(options, InstOptions::kReserved)) {
#ifndef ASMJIT_NO_LOGGING
if (_logger)
EmitterUtils::logInstructionEmitted(this, instId, options, o0, o1, o2, opExt, 0, 0, writer.cursor());
EmitterUtils::logInstructionEmitted(this, BaseInst::composeARMInstId(instId, instCC), options, o0, o1, o2, opExt, 0, 0, writer.cursor());
#endif
}

View File

@ -169,6 +169,18 @@ public:
//! \}
//! \name Compiler specific
//! \{
//! Special pseudo-instruction that can be used to load a memory address into `o0` GP register.
//!
//! \note At the moment this instruction is only useful to load a stack allocated address into a GP register
//! for further use. It makes very little sense to use it for anything else. The semantics of this instruction
//! is the same as X86 `LEA` (load effective address) instruction.
inline Error loadAddressOf(const Gp& o0, const Mem& o1) { return _emitter()->_emitI(Inst::kIdAdr, o0, o1); }
//! \}
//! \name Function Call & Ret Intrinsics
//! \{

View File

@ -117,7 +117,7 @@ ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove(
case TypeId::kUInt32:
case TypeId::kInt64:
case TypeId::kUInt64:
return emitter->mov(src.as<Gp>().x(), dst.as<Gp>().x());
return emitter->mov(dst.as<Gp>().x(), src.as<Gp>().x());
default: {
if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))

View File

@ -139,7 +139,7 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
if (ASMJIT_UNLIKELY(!Inst::isDefinedId(realId)))
return DebugUtils::errored(kErrorInvalidInstruction);
out->_instFlags = 0;
out->_instFlags = InstRWFlags::kNone;
out->_opCount = uint8_t(opCount);
out->_rmFeature = 0;
out->_extraReg.reset();

View File

@ -102,7 +102,7 @@ public:
// TODO: [ARM] This is just a workaround...
static InstControlFlow getControlFlowType(InstId instId) noexcept {
switch (instId) {
switch (BaseInst::extractRealId(instId)) {
case Inst::kIdB:
case Inst::kIdBr:
if (BaseInst::extractARMCondCode(instId) == CondCode::kAL)
@ -127,8 +127,8 @@ static InstControlFlow getControlFlowType(InstId instId) noexcept {
Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& controlType, RAInstBuilder& ib) noexcept {
InstRWInfo rwInfo;
InstId instId = inst->id();
if (Inst::isDefinedId(instId)) {
if (Inst::isDefinedId(inst->realId())) {
InstId instId = inst->id();
uint32_t opCount = inst->opCount();
const Operand* opArray = inst->operands();
ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_arch, inst->baseInst(), opArray, opCount, &rwInfo));
@ -136,6 +136,8 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& controlType, RAInstB
const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
uint32_t singleRegOps = 0;
ib.addInstRWFlags(rwInfo.instFlags());
if (opCount) {
uint32_t consecutiveOffset = 0xFFFFFFFFu;
uint32_t consecutiveParent = Globals::kInvalidId;
@ -715,6 +717,50 @@ ASMJIT_FAVOR_SPEED Error ARMRAPass::_rewrite(BaseNode* first, BaseNode* stop) no
}
}
}
// Rewrite `loadAddressOf()` construct.
if (inst->realId() == Inst::kIdAdr && inst->opCount() == 2 && inst->op(1).isMem()) {
BaseMem mem = inst->op(1).as<BaseMem>();
int64_t offset = mem.offset();
if (!mem.hasBaseOrIndex()) {
inst->setId(Inst::kIdMov);
inst->setOp(1, Imm(offset));
}
else {
if (mem.hasIndex())
return DebugUtils::errored(kErrorInvalidAddressIndex);
GpX dst(inst->op(0).as<Gp>().id());
GpX base(mem.baseId());
InstId arithInstId = offset < 0 ? Inst::kIdSub : Inst::kIdAdd;
uint64_t absOffset = offset < 0 ? Support::neg(uint64_t(offset)) : uint64_t(offset);
inst->setId(arithInstId);
inst->setOpCount(3);
inst->setOp(1, base);
inst->setOp(2, Imm(absOffset));
// Use two operations if the offset cannot be encoded with ADD/SUB.
if (absOffset > 0xFFFu && (absOffset & ~uint64_t(0xFFF000u)) != 0) {
if (absOffset <= 0xFFFFFFu) {
cc()->_setCursor(inst->prev());
ASMJIT_PROPAGATE(cc()->emit(arithInstId, dst, base, Imm(absOffset & 0xFFFu)));
inst->setOp(1, dst);
inst->setOp(2, Imm(absOffset & 0xFFF000u));
}
else {
cc()->_setCursor(inst->prev());
ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, inst->op(0), Imm(absOffset)));
inst->setOp(1, base);
inst->setOp(2, dst);
}
}
}
}
}
node = next;

View File

@ -455,11 +455,12 @@ public:
//! \}
//! \name ARM Specific Features
//! \name Clone
//! \{
//! Clones the memory operand.
inline constexpr Mem clone() const noexcept { return Mem(*this); }
//! Gets new memory operand adjusted by `off`.
inline Mem cloneAdjusted(int64_t off) const noexcept {
Mem result(*this);
@ -467,6 +468,51 @@ public:
return result;
}
//! Clones the memory operand and makes it pre-index.
inline Mem pre() const noexcept {
Mem result(*this);
result.setPredicate(kOffsetPreIndex);
return result;
}
//! Clones the memory operand, applies a given offset `off` and makes it pre-index.
inline Mem pre(int64_t off) const noexcept {
Mem result(*this);
result.setPredicate(kOffsetPreIndex);
result.addOffset(off);
return result;
}
//! Clones the memory operand and makes it post-index.
inline Mem post() const noexcept {
Mem result(*this);
result.setPredicate(kOffsetPreIndex);
return result;
}
//! Clones the memory operand, applies a given offset `off` and makes it post-index.
inline Mem post(int64_t off) const noexcept {
Mem result(*this);
result.setPredicate(kOffsetPostIndex);
result.addOffset(off);
return result;
}
//! \}
//! \name Base & Index
//! \{
//! Converts memory `baseType` and `baseId` to `arm::Reg` instance.
//!
//! The memory must have a valid base register otherwise the result will be wrong.
inline Reg baseReg() const noexcept { return Reg::fromTypeAndId(baseType(), baseId()); }
//! Converts memory `indexType` and `indexId` to `arm::Reg` instance.
//!
//! The memory must have a valid index register otherwise the result will be wrong.
inline Reg indexReg() const noexcept { return Reg::fromTypeAndId(indexType(), indexId()); }
using BaseMem::setIndex;
inline void setIndex(const BaseReg& index, uint32_t shift) noexcept {
@ -474,6 +520,11 @@ public:
setShift(shift);
}
//! \}
//! \name ARM Specific Features
//! \{
//! Gets whether the memory operand has shift (aka scale) constant.
inline constexpr bool hasShift() const noexcept { return _signature.hasField<kSignatureMemShiftValueMask>(); }
//! Gets the memory operand's shift (aka scale) constant.
@ -499,32 +550,6 @@ public:
inline void makePreIndex() noexcept { setPredicate(kOffsetPreIndex); }
inline void makePostIndex() noexcept { setPredicate(kOffsetPostIndex); }
inline Mem pre() const noexcept {
Mem result(*this);
result.setPredicate(kOffsetPreIndex);
return result;
}
inline Mem pre(int64_t off) const noexcept {
Mem result(*this);
result.setPredicate(kOffsetPreIndex);
result.addOffset(off);
return result;
}
inline Mem post() const noexcept {
Mem result(*this);
result.setPredicate(kOffsetPreIndex);
return result;
}
inline Mem post(int64_t off) const noexcept {
Mem result(*this);
result.setPredicate(kOffsetPostIndex);
result.addOffset(off);
return result;
}
//! \}
};

View File

@ -152,7 +152,7 @@ enum class InstHints : uint8_t {
//! No feature hints.
kNoHints = 0,
//! Architecture supports a register swap by using a single instructio.
//! Architecture supports a register swap by using a single instruction.
kRegSwap = 0x01u,
//! Architecture provides push/pop instructions.
kPushPop = 0x02u

View File

@ -356,7 +356,7 @@ struct OffsetFormat {
//! Returns the size of the region/instruction where the offset is encoded.
inline uint32_t regionSize() const noexcept { return _regionSize; }
//! Returns the the offset of the word relative to the start of the region where the offset is.
//! Returns the offset of the word relative to the start of the region where the offset is.
inline uint32_t valueOffset() const noexcept { return _valueOffset; }
//! Returns the size of the data-type (word) that contains the offset, in bytes.

View File

@ -143,7 +143,7 @@ Error formatLabel(
}
if (le->type() == LabelType::kAnonymous)
ASMJIT_PROPAGATE(sb.append("L%u@", labelId));
ASMJIT_PROPAGATE(sb.appendFormat("L%u@", labelId));
return sb.append(le->name());
}
else {

View File

@ -1127,7 +1127,7 @@ public:
//! Tests whether the callee must adjust SP before returning (X86-STDCALL only)
inline bool hasCalleeStackCleanup() const noexcept { return _calleeStackCleanup != 0; }
//! Returns home many bytes of the stack the the callee must adjust before returning (X86-STDCALL only)
//! Returns home many bytes of the stack the callee must adjust before returning (X86-STDCALL only)
inline uint32_t calleeStackCleanup() const noexcept { return _calleeStackCleanup; }
//! Returns call stack alignment.

View File

@ -312,6 +312,10 @@ public:
return id | (uint32_t(cc) << Support::ConstCTZ<uint32_t(InstIdParts::kARM_Cond)>::value);
}
static inline constexpr InstId extractRealId(uint32_t id) noexcept {
return id & uint32_t(InstIdParts::kRealId);
}
static inline constexpr arm::CondCode extractARMCondCode(uint32_t id) noexcept {
return (arm::CondCode)((uint32_t(id) & uint32_t(InstIdParts::kARM_Cond)) >> Support::ConstCTZ<uint32_t(InstIdParts::kARM_Cond)>::value);
}
@ -614,13 +618,25 @@ struct OpRWInfo {
//! \}
};
//! Flags used by \ref InstRWInfo.
enum class InstRWFlags : uint32_t {
//! No flags.
kNone = 0x00000000u,
//! Describes a move operation.
//!
//! This flag is used by RA to eliminate moves that are guaranteed to be moves only.
kMovOp = 0x00000001u
};
ASMJIT_DEFINE_ENUM_FLAGS(InstRWFlags)
//! Read/Write information of an instruction.
struct InstRWInfo {
//! \name Members
//! \{
//! Instruction flags (there are no flags at the moment, this field is reserved).
uint32_t _instFlags;
InstRWFlags _instFlags;
//! CPU flags read.
CpuRWFlags _readFlags;
//! CPU flags written.
@ -646,6 +662,20 @@ struct InstRWInfo {
//! \}
//! \name Instruction Flags
//! \{
//! Returns flags associated with the instruction, see \ref InstRWFlags.
inline InstRWFlags instFlags() const noexcept { return _instFlags; }
//! Tests whether the instruction flags contain `flag`.
inline bool hasInstFlag(InstRWFlags flag) const noexcept { return Support::test(_instFlags, flag); }
//! Tests whether the instruction flags contain \ref InstRWFlags::kMovOp.
inline bool isMovOp() const noexcept { return hasInstFlag(InstRWFlags::kMovOp); }
//! \}
//! \name CPU Flags Information
//! \{

View File

@ -82,6 +82,12 @@ public:
size_t size = sizeOf(count);
memcpy(this, other, size);
}
inline void unassign(RegGroup group, uint32_t physId, uint32_t indexInWorkIds) noexcept {
assigned.clear(group, Support::bitMask(physId));
dirty.clear(group, Support::bitMask(physId));
workIds[indexInWorkIds] = kWorkNone;
}
};
struct WorkToPhysMap {
@ -304,6 +310,28 @@ public:
_physToWorkIds.swap(other._physToWorkIds);
}
inline void assignWorkIdsFromPhysIds() noexcept {
memset(_workToPhysMap, uint8_t(BaseReg::kIdBad), WorkToPhysMap::sizeOf(_layout.workCount));
for (RegGroup group : RegGroupVirtValues{}) {
uint32_t physBaseIndex = _layout.physIndex[group];
Support::BitWordIterator<RegMask> it(_physToWorkMap->assigned[group]);
while (it.hasNext()) {
uint32_t physId = it.next();
uint32_t workId = _physToWorkMap->workIds[physBaseIndex + physId];
ASMJIT_ASSERT(workId != kWorkNone);
_workToPhysMap->physIds[workId] = uint8_t(physId);
}
}
}
inline void copyFrom(const PhysToWorkMap* physToWorkMap) noexcept {
memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal));
assignWorkIdsFromPhysIds();
}
inline void copyFrom(const PhysToWorkMap* physToWorkMap, const WorkToPhysMap* workToPhysMap) noexcept {
memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal));
memcpy(_workToPhysMap, workToPhysMap, WorkToPhysMap::sizeOf(_layout.workCount));

View File

@ -271,8 +271,12 @@ struct RARegMask {
}
template<class Operator>
inline void op(RegGroup group, uint32_t input) noexcept {
_masks[group] = Operator::op(_masks[group], input);
inline void op(RegGroup group, RegMask mask) noexcept {
_masks[group] = Operator::op(_masks[group], mask);
}
inline void clear(RegGroup group, RegMask mask) noexcept {
_masks[group] = _masks[group] & ~mask;
}
//! \}

View File

@ -38,7 +38,9 @@ Error RALocalAllocator::init() noexcept {
physToWorkMap = _pass->newPhysToWorkMap();
workToPhysMap = _pass->newWorkToPhysMap();
if (!physToWorkMap || !workToPhysMap)
_tmpWorkToPhysMap = _pass->newWorkToPhysMap();
if (!physToWorkMap || !workToPhysMap || !_tmpWorkToPhysMap)
return DebugUtils::errored(kErrorOutOfMemory);
_tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
@ -122,26 +124,18 @@ Error RALocalAllocator::makeInitialAssignment() noexcept {
return kErrorOk;
}
Error RALocalAllocator::replaceAssignment(
const PhysToWorkMap* physToWorkMap,
const WorkToPhysMap* workToPhysMap) noexcept {
_curAssignment.copyFrom(physToWorkMap, workToPhysMap);
Error RALocalAllocator::replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept {
_curAssignment.copyFrom(physToWorkMap);
return kErrorOk;
}
Error RALocalAllocator::switchToAssignment(
PhysToWorkMap* dstPhysToWorkMap,
WorkToPhysMap* dstWorkToPhysMap,
const ZoneBitVector& liveIn,
bool dstReadOnly,
bool tryMode) noexcept {
Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept {
RAAssignment dst;
RAAssignment& cur = _curAssignment;
dst.initLayout(_pass->_physRegCount, _pass->workRegs());
dst.initMaps(dstPhysToWorkMap, dstWorkToPhysMap);
dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap);
dst.assignWorkIdsFromPhysIds();
if (tryMode)
return kErrorOk;
@ -329,24 +323,27 @@ Cleared:
if (!tryMode) {
// Here is a code that dumps the conflicting part if something fails here:
// if (!dst.equals(cur)) {
// uint32_t physTotal = dst._layout.physTotal;
// uint32_t workCount = dst._layout.workCount;
// if (!dst.equals(cur)) {
// uint32_t physTotal = dst._layout.physTotal;
// uint32_t workCount = dst._layout.workCount;
//
// for (uint32_t physId = 0; physId < physTotal; physId++) {
// uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
// uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
// if (dstWorkId != curWorkId)
// fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
// }
// fprintf(stderr, "Dirty DST=0x%08X CUR=0x%08X\n", dst.dirty(RegGroup::kGp), cur.dirty(RegGroup::kGp));
// fprintf(stderr, "Assigned DST=0x%08X CUR=0x%08X\n", dst.assigned(RegGroup::kGp), cur.assigned(RegGroup::kGp));
//
// for (uint32_t workId = 0; workId < workCount; workId++) {
// uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
// uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
// if (dstPhysId != curPhysId)
// fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
// }
// for (uint32_t physId = 0; physId < physTotal; physId++) {
// uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
// uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
// if (dstWorkId != curWorkId)
// fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
// }
//
// for (uint32_t workId = 0; workId < workCount; workId++) {
// uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
// uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
// if (dstPhysId != curPhysId)
// fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
// }
// }
ASMJIT_ASSERT(dst.equals(cur));
}
@ -839,6 +836,34 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
// STEP 9
// ------
//
// Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary.
if (node->isInvoke() && group == RegGroup::kVec) {
const InvokeNode* invokeNode = node->as<InvokeNode>();
RegMask maybeClobberedRegs = invokeNode->detail().callConv().preservedRegs(group) & _curAssignment.assigned(group);
if (maybeClobberedRegs) {
uint32_t saveRestoreVecSize = invokeNode->detail().callConv().saveRestoreRegSize(group);
Support::BitWordIterator<RegMask> it(maybeClobberedRegs);
do {
uint32_t physId = it.next();
uint32_t workId = _curAssignment.physToWorkId(group, physId);
RAWorkReg* workReg = workRegById(workId);
uint32_t virtSize = workReg->virtReg()->virtSize();
if (virtSize > saveRestoreVecSize) {
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
}
} while (it.hasNext());
}
}
// STEP 10
// -------
//
// Assign OUT registers.
if (outPending) {
@ -981,12 +1006,7 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co
// Use TryMode of `switchToAssignment()` if possible.
if (target->hasEntryAssignment()) {
ASMJIT_PROPAGATE(switchToAssignment(
target->entryPhysToWorkMap(),
target->entryWorkToPhysMap(),
target->liveIn(),
target->isAllocated(),
true));
ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), true));
}
ASMJIT_PROPAGATE(allocInst(node));
@ -997,12 +1017,7 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co
BaseNode* prevCursor = _cc->setCursor(injectionPoint);
_tmpAssignment.copyFrom(_curAssignment);
ASMJIT_PROPAGATE(switchToAssignment(
target->entryPhysToWorkMap(),
target->entryWorkToPhysMap(),
target->liveIn(),
target->isAllocated(),
false));
ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), false));
BaseNode* curCursor = _cc->cursor();
if (curCursor != injectionPoint) {
@ -1060,7 +1075,6 @@ Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets,
if (!sharedAssignment.empty()) {
ASMJIT_PROPAGATE(switchToAssignment(
sharedAssignment.physToWorkMap(),
sharedAssignment.workToPhysMap(),
sharedAssignment.liveIn(),
true, // Read-only.
false // Try-mode.

View File

@ -57,6 +57,9 @@ public:
//! TiedReg's total counter.
RARegCount _tiedCount;
//! Temporary workToPhysMap that can be used freely by the allocator.
WorkToPhysMap* _tmpWorkToPhysMap;
//! \name Construction & Destruction
//! \{
@ -113,9 +116,7 @@ public:
Error makeInitialAssignment() noexcept;
Error replaceAssignment(
const PhysToWorkMap* physToWorkMap,
const WorkToPhysMap* workToPhysMap) noexcept;
Error replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept;
//! Switch to the given assignment by reassigning all register and emitting code that reassigns them.
//! This is always used to switch to a previously stored assignment.
@ -123,12 +124,7 @@ public:
//! If `tryMode` is true then the final assignment doesn't have to be exactly same as specified by `dstPhysToWorkMap`
//! and `dstWorkToPhysMap`. This mode is only used before conditional jumps that already have assignment to generate
//! a code sequence that is always executed regardless of the flow.
Error switchToAssignment(
PhysToWorkMap* dstPhysToWorkMap,
WorkToPhysMap* dstWorkToPhysMap,
const ZoneBitVector& liveIn,
bool dstReadOnly,
bool tryMode) noexcept;
Error switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept;
inline Error spillRegsBeforeEntry(RABlock* block) noexcept {
return spillScratchGpRegsBeforeEntry(block->entryScratchGpRegs());

View File

@ -1483,18 +1483,12 @@ Error BaseRAPass::runLocalAllocator() noexcept {
cc()->_setCursor(unconditionalJump ? prev->prev() : prev);
if (consecutive->hasEntryAssignment()) {
ASMJIT_PROPAGATE(
lra.switchToAssignment(
consecutive->entryPhysToWorkMap(),
consecutive->entryWorkToPhysMap(),
consecutive->liveIn(),
consecutive->isAllocated(),
false));
ASMJIT_PROPAGATE(lra.switchToAssignment(consecutive->entryPhysToWorkMap(), consecutive->liveIn(), consecutive->isAllocated(), false));
}
else {
ASMJIT_PROPAGATE(lra.spillRegsBeforeEntry(consecutive));
ASMJIT_PROPAGATE(setBlockEntryAssignment(consecutive, block, lra._curAssignment));
lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap(), consecutive->entryWorkToPhysMap());
lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap());
}
}
@ -1526,7 +1520,7 @@ Error BaseRAPass::runLocalAllocator() noexcept {
}
// If we switched to some block we have to update the local allocator.
lra.replaceAssignment(block->entryPhysToWorkMap(), block->entryWorkToPhysMap());
lra.replaceAssignment(block->entryPhysToWorkMap());
}
_clobberedRegs.op<Support::Or>(lra._clobberedRegs);
@ -1546,12 +1540,10 @@ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlo
}
PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
WorkToPhysMap* workToPhysMap = cloneWorkToPhysMap(fromAssignment.workToPhysMap());
if (ASMJIT_UNLIKELY(!physToWorkMap || !workToPhysMap))
if (ASMJIT_UNLIKELY(!physToWorkMap))
return DebugUtils::errored(kErrorOutOfMemory);
block->setEntryAssignment(physToWorkMap, workToPhysMap);
block->setEntryAssignment(physToWorkMap);
// True if this is the first (entry) block, nothing to do in this case.
if (block == fromBlock) {
@ -1562,10 +1554,6 @@ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlo
return kErrorOk;
}
RAAssignment as;
as.initLayout(_physRegCount, workRegs());
as.initMaps(physToWorkMap, workToPhysMap);
const ZoneBitVector& liveOut = fromBlock->liveOut();
const ZoneBitVector& liveIn = block->liveIn();
@ -1578,94 +1566,85 @@ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlo
RAWorkReg* workReg = workRegById(workId);
RegGroup group = workReg->group();
uint32_t physId = as.workToPhysId(group, workId);
uint32_t physId = fromAssignment.workToPhysId(group, workId);
if (physId != RAAssignment::kPhysNone)
as.unassign(group, workId, physId);
physToWorkMap->unassign(group, physId, _physRegIndex.get(group) + physId);
}
}
return blockEntryAssigned(as);
return blockEntryAssigned(physToWorkMap);
}
Error BaseRAPass::setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept {
ASMJIT_ASSERT(_sharedAssignments[sharedAssignmentId].empty());
PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
WorkToPhysMap* workToPhysMap = cloneWorkToPhysMap(fromAssignment.workToPhysMap());
if (ASMJIT_UNLIKELY(!physToWorkMap || !workToPhysMap))
if (ASMJIT_UNLIKELY(!physToWorkMap))
return DebugUtils::errored(kErrorOutOfMemory);
_sharedAssignments[sharedAssignmentId].assignMaps(physToWorkMap, workToPhysMap);
_sharedAssignments[sharedAssignmentId].assignPhysToWorkMap(physToWorkMap);
ZoneBitVector& sharedLiveIn = _sharedAssignments[sharedAssignmentId]._liveIn;
ASMJIT_PROPAGATE(sharedLiveIn.resize(allocator(), workRegCount()));
RAAssignment as;
as.initLayout(_physRegCount, workRegs());
Support::Array<uint32_t, Globals::kNumVirtGroups> sharedAssigned {};
for (RABlock* block : blocks()) {
if (block->sharedAssignmentId() == sharedAssignmentId) {
ASMJIT_ASSERT(!block->hasEntryAssignment());
PhysToWorkMap* entryPhysToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
WorkToPhysMap* entryWorkToPhysMap = cloneWorkToPhysMap(fromAssignment.workToPhysMap());
if (ASMJIT_UNLIKELY(!entryPhysToWorkMap || !entryWorkToPhysMap))
if (ASMJIT_UNLIKELY(!entryPhysToWorkMap))
return DebugUtils::errored(kErrorOutOfMemory);
block->setEntryAssignment(entryPhysToWorkMap, entryWorkToPhysMap);
as.initMaps(entryPhysToWorkMap, entryWorkToPhysMap);
block->setEntryAssignment(entryPhysToWorkMap);
const ZoneBitVector& liveIn = block->liveIn();
sharedLiveIn.or_(liveIn);
for (RegGroup group : RegGroupVirtValues{}) {
sharedAssigned[group] |= entryPhysToWorkMap->assigned[group];
uint32_t physBaseIndex = _physRegIndex.get(group);
Support::BitWordIterator<RegMask> it(entryPhysToWorkMap->assigned[group]);
while (it.hasNext()) {
uint32_t physId = it.next();
uint32_t workId = as.physToWorkId(group, physId);
uint32_t workId = entryPhysToWorkMap->workIds[physBaseIndex + physId];
if (!liveIn.bitAt(workId))
as.unassign(group, workId, physId);
entryPhysToWorkMap->unassign(group, physId, physBaseIndex + physId);
}
}
}
}
{
as.initMaps(physToWorkMap, workToPhysMap);
for (RegGroup group : RegGroupVirtValues{}) {
uint32_t physBaseIndex = _physRegIndex.get(group);
Support::BitWordIterator<RegMask> it(_availableRegs[group] & ~sharedAssigned[group]);
for (RegGroup group : RegGroupVirtValues{}) {
Support::BitWordIterator<RegMask> it(_availableRegs[group] & ~sharedAssigned[group]);
while (it.hasNext()) {
uint32_t physId = it.next();
if (as.isPhysAssigned(group, physId)) {
uint32_t workId = as.physToWorkId(group, physId);
as.unassign(group, workId, physId);
}
}
while (it.hasNext()) {
uint32_t physId = it.next();
if (Support::bitTest(physToWorkMap->assigned[group], physId))
physToWorkMap->unassign(group, physId, physBaseIndex + physId);
}
}
return blockEntryAssigned(as);
return blockEntryAssigned(physToWorkMap);
}
Error BaseRAPass::blockEntryAssigned(const RAAssignment& as) noexcept {
Error BaseRAPass::blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept {
// Complex allocation strategy requires to record register assignments upon block entry (or per shared state).
for (RegGroup group : RegGroupVirtValues{}) {
if (!_strategy[group].isComplex())
continue;
Support::BitWordIterator<RegMask> it(as.assigned(group));
uint32_t physBaseIndex = _physRegIndex[group];
Support::BitWordIterator<RegMask> it(physToWorkMap->assigned[group]);
while (it.hasNext()) {
uint32_t physId = it.next();
uint32_t workId = as.physToWorkId(group, physId);
uint32_t workId = physToWorkMap->workIds[physBaseIndex + physId];
RAWorkReg* workReg = workRegById(workId);
workReg->addAllocatedMask(Support::bitMask(physId));

View File

@ -129,10 +129,8 @@ public:
//! Scratch registers used at exit, by a terminator instruction.
RegMask _exitScratchGpRegs = 0;
//! Register assignment (PhysToWork) on entry.
//! Register assignment on entry.
PhysToWorkMap* _entryPhysToWorkMap = nullptr;
//! Register assignment (WorkToPhys) on entry.
WorkToPhysMap* _entryWorkToPhysMap = nullptr;
//! \}
@ -247,13 +245,8 @@ public:
}
inline bool hasEntryAssignment() const noexcept { return _entryPhysToWorkMap != nullptr; }
inline WorkToPhysMap* entryWorkToPhysMap() const noexcept { return _entryWorkToPhysMap; }
inline PhysToWorkMap* entryPhysToWorkMap() const noexcept { return _entryPhysToWorkMap; }
inline void setEntryAssignment(PhysToWorkMap* physToWorkMap, WorkToPhysMap* workToPhysMap) noexcept {
_entryPhysToWorkMap = physToWorkMap;
_entryWorkToPhysMap = workToPhysMap;
}
inline void setEntryAssignment(PhysToWorkMap* physToWorkMap) noexcept { _entryPhysToWorkMap = physToWorkMap; }
//! \}
@ -283,6 +276,8 @@ public:
//! Parent block.
RABlock* _block;
//! Instruction RW flags.
InstRWFlags _instRWFlags;
//! Aggregated RATiedFlags from all operands & instruction specific flags.
RATiedFlags _flags;
//! Total count of RATiedReg's.
@ -305,9 +300,10 @@ public:
//! \name Construction & Destruction
//! \{
inline RAInst(RABlock* block, RATiedFlags flags, uint32_t tiedTotal, const RARegMask& clobberedRegs) noexcept {
inline RAInst(RABlock* block, InstRWFlags instRWFlags, RATiedFlags tiedFlags, uint32_t tiedTotal, const RARegMask& clobberedRegs) noexcept {
_block = block;
_flags = flags;
_instRWFlags = instRWFlags;
_flags = tiedFlags;
_tiedTotal = tiedTotal;
_tiedIndex.reset();
_tiedCount.reset();
@ -321,6 +317,13 @@ public:
//! \name Accessors
//! \{
//! Returns instruction RW flags.
inline InstRWFlags instRWFlags() const noexcept { return _instRWFlags; };
//! Tests whether the given `flag` is present in instruction RW flags.
inline bool hasInstRWFlag(InstRWFlags flag) const noexcept { return Support::test(_instRWFlags, flag); }
//! Adds `flags` to instruction RW flags.
inline void addInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags |= flags; }
//! Returns the instruction flags.
inline RATiedFlags flags() const noexcept { return _flags; }
//! Tests whether the instruction has flag `flag`.
@ -383,6 +386,9 @@ public:
//! \name Members
//! \{
//! Instruction RW flags.
InstRWFlags _instRWFlags;
//! Flags combined from all RATiedReg's.
RATiedFlags _aggregatedFlags;
//! Flags that will be cleared before storing the aggregated flags to `RAInst`.
@ -407,6 +413,7 @@ public:
inline void init() noexcept { reset(); }
inline void reset() noexcept {
_instRWFlags = InstRWFlags::kNone;
_aggregatedFlags = RATiedFlags::kNone;
_forbiddenFlags = RATiedFlags::kNone;
_count.reset();
@ -421,10 +428,15 @@ public:
//! \name Accessors
//! \{
inline RATiedFlags aggregatedFlags() const noexcept { return _aggregatedFlags; }
inline RATiedFlags forbiddenFlags() const noexcept { return _forbiddenFlags; }
inline InstRWFlags instRWFlags() const noexcept { return _instRWFlags; }
inline bool hasInstRWFlag(InstRWFlags flag) const noexcept { return Support::test(_instRWFlags, flag); }
inline void addInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags |= flags; }
inline void clearInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags &= ~flags; }
inline RATiedFlags aggregatedFlags() const noexcept { return _aggregatedFlags; }
inline void addAggregatedFlags(RATiedFlags flags) noexcept { _aggregatedFlags |= flags; }
inline RATiedFlags forbiddenFlags() const noexcept { return _forbiddenFlags; }
inline void addForbiddenFlags(RATiedFlags flags) noexcept { _forbiddenFlags |= flags; }
//! Returns the number of tied registers added to the builder.
@ -616,8 +628,6 @@ public:
ZoneBitVector _liveIn {};
//! Register assignment (PhysToWork).
PhysToWorkMap* _physToWorkMap = nullptr;
//! Register assignment (WorkToPhys).
WorkToPhysMap* _workToPhysMap = nullptr;
//! \}
@ -632,12 +642,7 @@ public:
inline const ZoneBitVector& liveIn() const noexcept { return _liveIn; }
inline PhysToWorkMap* physToWorkMap() const noexcept { return _physToWorkMap; }
inline WorkToPhysMap* workToPhysMap() const noexcept { return _workToPhysMap; }
inline void assignMaps(PhysToWorkMap* physToWorkMap, WorkToPhysMap* workToPhysMap) noexcept {
_physToWorkMap = physToWorkMap;
_workToPhysMap = workToPhysMap;
}
inline void assignPhysToWorkMap(PhysToWorkMap* physToWorkMap) noexcept { _physToWorkMap = physToWorkMap; }
//! \}
};
@ -873,16 +878,16 @@ public:
return _exits.append(allocator(), block);
}
ASMJIT_FORCE_INLINE RAInst* newRAInst(RABlock* block, RATiedFlags flags, uint32_t tiedRegCount, const RARegMask& clobberedRegs) noexcept {
ASMJIT_FORCE_INLINE RAInst* newRAInst(RABlock* block, InstRWFlags instRWFlags, RATiedFlags flags, uint32_t tiedRegCount, const RARegMask& clobberedRegs) noexcept {
void* p = zone()->alloc(RAInst::sizeOf(tiedRegCount));
if (ASMJIT_UNLIKELY(!p))
return nullptr;
return new(p) RAInst(block, flags, tiedRegCount, clobberedRegs);
return new(p) RAInst(block, instRWFlags, flags, tiedRegCount, clobberedRegs);
}
ASMJIT_FORCE_INLINE Error assignRAInst(BaseNode* node, RABlock* block, RAInstBuilder& ib) noexcept {
uint32_t tiedRegCount = ib.tiedRegCount();
RAInst* raInst = newRAInst(block, ib.aggregatedFlags(), tiedRegCount, ib._clobbered);
RAInst* raInst = newRAInst(block, ib.instRWFlags(), ib.aggregatedFlags(), tiedRegCount, ib._clobbered);
if (ASMJIT_UNLIKELY(!raInst))
return DebugUtils::errored(kErrorOutOfMemory);
@ -1066,13 +1071,6 @@ public:
return static_cast<PhysToWorkMap*>(zone()->dupAligned(map, size, sizeof(uint32_t)));
}
inline WorkToPhysMap* cloneWorkToPhysMap(const WorkToPhysMap* map) noexcept {
size_t size = WorkToPhysMap::sizeOf(_workRegs.size());
if (ASMJIT_UNLIKELY(size == 0))
return const_cast<WorkToPhysMap*>(map);
return static_cast<WorkToPhysMap*>(zone()->dup(map, size));
}
//! \name Liveness Analysis & Statistics
//! \{
@ -1110,7 +1108,7 @@ public:
//! Called after the RA assignment has been assigned to a block.
//!
//! This cannot change the assignment, but can examine it.
Error blockEntryAssigned(const RAAssignment& as) noexcept;
Error blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept;
//! \}

View File

@ -30,7 +30,7 @@ static inline uint32_t getXmmMovInst(const FuncFrame& frame) {
: (avx ? Inst::kIdVmovups : Inst::kIdMovups);
}
//! Converts `size` to a 'kmov?' instructio.
//! Converts `size` to a 'kmov?' instruction.
static inline uint32_t kmovInstFromSize(uint32_t size) noexcept {
switch (size) {
case 1: return Inst::kIdKmovb;

View File

@ -776,6 +776,15 @@ static ASMJIT_FORCE_INLINE Error rwHandleAVX512(const BaseInst& inst, const Inst
return kErrorOk;
}
static ASMJIT_FORCE_INLINE bool hasSameRegType(const BaseReg* regs, size_t opCount) noexcept {
ASMJIT_ASSERT(opCount > 0);
RegType regType = regs[0].type();
for (size_t i = 1; i < opCount; i++)
if (regs[i].type() != regType)
return false;
return true;
}
Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept {
// Only called when `arch` matches X86 family.
ASMJIT_ASSERT(Environment::isFamilyX86(arch));
@ -801,13 +810,14 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
: InstDB::rwInfoB[InstDB::rwInfoIndexB[instId]];
const InstDB::RWInfoRm& instRmInfo = InstDB::rwInfoRm[instRwInfo.rmInfo];
out->_instFlags = 0;
out->_instFlags = InstDB::_instFlagsTable[additionalInfo._instFlagsIndex];
out->_opCount = uint8_t(opCount);
out->_rmFeature = instRmInfo.rmFeature;
out->_extraReg.reset();
out->_readFlags = CpuRWFlags(rwFlags.readFlags);
out->_writeFlags = CpuRWFlags(rwFlags.writeFlags);
uint32_t opTypeMask = 0u;
uint32_t nativeGpSize = Environment::registerSizeFromArch(arch);
constexpr OpRWFlags R = OpRWFlags::kRead;
@ -827,6 +837,8 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
const Operand_& srcOp = operands[i];
const InstDB::RWInfoOp& rwOpData = InstDB::rwInfoOp[instRwInfo.opInfoIndex[i]];
opTypeMask |= Support::bitMask(srcOp.opType());
if (!srcOp.isRegOrMem()) {
op.reset();
continue;
@ -878,6 +890,35 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
}
}
// Only keep kMovOp if the instruction is actually register to register move of the same kind.
if (out->hasInstFlag(InstRWFlags::kMovOp)) {
if (!(opCount >= 2 && opTypeMask == Support::bitMask(OperandType::kReg) && hasSameRegType(reinterpret_cast<const BaseReg*>(operands), opCount)))
out->_instFlags &= ~InstRWFlags::kMovOp;
}
// Special cases require more logic.
if (instRmInfo.flags & (InstDB::RWInfoRm::kFlagMovssMovsd | InstDB::RWInfoRm::kFlagPextrw | InstDB::RWInfoRm::kFlagFeatureIfRMI)) {
if (instRmInfo.flags & InstDB::RWInfoRm::kFlagMovssMovsd) {
if (opCount == 2) {
if (operands[0].isReg() && operands[1].isReg()) {
// Doesn't zero extend the destination.
out->_operands[0]._extendByteMask = 0;
}
}
}
else if (instRmInfo.flags & InstDB::RWInfoRm::kFlagPextrw) {
if (opCount == 3 && Reg::isMm(operands[1])) {
out->_rmFeature = 0;
rmOpsMask = 0;
}
}
else if (instRmInfo.flags & InstDB::RWInfoRm::kFlagFeatureIfRMI) {
if (opCount != 3 || !operands[2].isImm()) {
out->_rmFeature = 0;
}
}
}
rmOpsMask &= instRmInfo.rmOpsMask;
if (rmOpsMask) {
Support::BitWordIterator<uint32_t> it(rmOpsMask);
@ -916,6 +957,9 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
// used to move between GP, segment, control and debug registers. Moving between GP registers also allow to
// use memory operand.
// We will again set the flag if it's actually a move from GP to GP register, otherwise this flag cannot be set.
out->_instFlags &= ~InstRWFlags::kMovOp;
if (opCount == 2) {
if (operands[0].isReg() && operands[1].isReg()) {
const Reg& o0 = operands[0].as<Reg>();
@ -926,6 +970,7 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
out->_operands[1].reset(R | RegM, operands[1].size());
rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
out->_instFlags |= InstRWFlags::kMovOp;
return kErrorOk;
}
@ -1618,6 +1663,68 @@ UNIT(x86_inst_api_text) {
"Instructions do not match \"%s\" (#%u) != \"%s\" (#%u)", aName.data(), a, bName.data(), b);
}
}
template<typename... Args>
static Error queryRWInfoSimple(InstRWInfo* out, Arch arch, InstId instId, InstOptions options, Args&&... args) {
BaseInst inst(instId);
inst.addOptions(options);
Operand_ opArray[] = { std::forward<Args>(args)... };
return InstInternal::queryRWInfo(arch, inst, opArray, sizeof...(args), out);
}
UNIT(x86_inst_api_rm_feature) {
INFO("Verifying whether RM/feature is reported correctly for PEXTRW instruction");
{
InstRWInfo rwi;
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdPextrw, InstOptions::kNone, eax, mm1, imm(1));
EXPECT(rwi.rmFeature() == 0);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdPextrw, InstOptions::kNone, eax, xmm1, imm(1));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kSSE4_1);
}
INFO("Verifying whether RM/feature is reported correctly for AVX512 shift instructions");
{
InstRWInfo rwi;
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslld, InstOptions::kNone, xmm1, xmm2, imm(8));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllq, InstOptions::kNone, ymm1, ymm2, imm(8));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrad, InstOptions::kNone, xmm1, xmm2, imm(8));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrld, InstOptions::kNone, ymm1, ymm2, imm(8));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrlq, InstOptions::kNone, xmm1, xmm2, imm(8));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslldq, InstOptions::kNone, xmm1, xmm2, imm(8));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllw, InstOptions::kNone, ymm1, ymm2, imm(8));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsraw, InstOptions::kNone, xmm1, xmm2, imm(8));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrldq, InstOptions::kNone, ymm1, ymm2, imm(8));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrlw, InstOptions::kNone, xmm1, xmm2, imm(8));
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslld, InstOptions::kNone, xmm1, xmm2, xmm3);
EXPECT(rwi.rmFeature() == 0);
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllw, InstOptions::kNone, xmm1, xmm2, xmm3);
EXPECT(rwi.rmFeature() == 0);
}
}
#endif
ASMJIT_END_SUB_NAMESPACE

File diff suppressed because it is too large Load Diff

View File

@ -461,7 +461,7 @@ struct InstInfo {
//! \name Accessors
//! \{
//! Returns common information, see `CommonInfo`.
//! Returns common information, see \ref CommonInfo.
inline const CommonInfo& commonInfo() const noexcept { return _commonInfoTable[_commonInfoIndex]; }
//! Returns instruction flags, see \ref Flags.

View File

@ -189,12 +189,12 @@ enum EncodingId : uint32_t {
//! Additional information table, provides CPU extensions required to execute an instruction and RW flags.
struct AdditionalInfo {
//! Features vector.
uint8_t _features[6];
//! Index to `_instFlagsTable`.
uint8_t _instFlagsIndex;
//! Index to `_rwFlagsTable`.
uint8_t _rwFlagsIndex;
//! Reserved for future use.
uint8_t _reserved;
//! Features vector.
uint8_t _features[6];
inline const uint8_t* featuresBegin() const noexcept { return _features; }
inline const uint8_t* featuresEnd() const noexcept { return _features + ASMJIT_ARRAY_SIZE(_features); }
@ -259,7 +259,13 @@ struct RWInfoRm {
};
enum Flags : uint8_t {
kFlagAmbiguous = 0x01
kFlagAmbiguous = 0x01,
//! Special semantics for PEXTRW - memory operand can only be used with SSE4.1 instruction and it's forbidden in MMX.
kFlagPextrw = 0x02,
//! Special semantics for MOVSS and MOVSD - doesn't zero extend the destination if the operation is a reg to reg move.
kFlagMovssMovsd = 0x04,
//! Special semantics for AVX shift instructions that do not provide reg/mem in AVX/AVX2 mode (AVX-512 is required).
kFlagFeatureIfRMI = 0x08
};
uint8_t category;
@ -283,6 +289,7 @@ extern const RWInfo rwInfoB[];
extern const RWInfoOp rwInfoOp[];
extern const RWInfoRm rwInfoRm[];
extern const RWFlagsInfoTable _rwFlagsInfoTable[];
extern const InstRWFlags _instFlagsTable[];
extern const uint32_t _mainOpcodeTable[];
extern const uint32_t _altOpcodeTable[];

View File

@ -126,6 +126,12 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& i
bool hasGpbHiConstraint = false;
uint32_t singleRegOps = 0;
// Copy instruction RW flags to instruction builder except kMovOp, which is propagated manually later.
ib.addInstRWFlags(rwInfo.instFlags() & ~InstRWFlags::kMovOp);
// Mask of all operand types used by the instruction - can be used as an optimization later.
uint32_t opTypesMask = 0u;
if (opCount) {
// The mask is for all registers, but we are mostly interested in AVX-512 registers at the moment. The mask
// will be combined with all available registers of the Compiler at the end so we it never use more registers
@ -167,6 +173,8 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& i
const Operand& op = opArray[i];
const OpRWInfo& opRwInfo = rwInfo.operand(i);
opTypesMask |= 1u << uint32_t(op.opType());
if (op.isReg()) {
// Register Operand
// ----------------
@ -394,6 +402,24 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& i
}
}
// If this instruction has move semantics then check whether it could be eliminated if all virtual registers
// are allocated into the same register. Take into account the virtual size of the destination register as that's
// more important than a physical register size in this case.
if (rwInfo.hasInstFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg() && Support::bitTest(opTypesMask, uint32_t(OperandType::kReg))) {
// AVX+ move instructions have 3 operand form - the first two operands must be the same to guarantee move semantics.
if (opCount == 2 || (opCount == 3 && opArray[0] == opArray[1])) {
uint32_t vIndex = Operand::virtIdToIndex(opArray[0].as<Reg>().id());
if (vIndex < Operand::kVirtIdCount) {
const VirtReg* vReg = _cc->virtRegByIndex(vIndex);
const OpRWInfo& opRwInfo = rwInfo.operand(0);
uint64_t remainingByteMask = vReg->workReg()->regByteMask() & ~opRwInfo.writeByteMask();
if (remainingByteMask == 0u || (remainingByteMask & opRwInfo.extendByteMask()) == 0)
ib.addInstRWFlags(InstRWFlags::kMovOp);
}
}
}
// Handle X86 constraints.
if (hasGpbHiConstraint) {
for (RATiedReg& tiedReg : ib) {
@ -1251,6 +1277,10 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no
// Rewrite virtual registers into physical registers.
if (raInst) {
// This data is allocated by Zone passed to `runOnFunction()`, which will be reset after the RA pass finishes.
// So reset this data to prevent having a dead pointer after the RA pass is complete.
node->resetPassData();
// If the instruction contains pass data (raInst) then it was a subject for register allocation and must be
// rewritten to use physical regs.
RATiedReg* tiedRegs = raInst->tiedRegs();
@ -1274,16 +1304,25 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no
}
}
// Transform VEX instruction to EVEX when necessary.
if (raInst->isTransformable()) {
if (maxRegId > 15) {
// Transform VEX instruction to EVEX.
inst->setId(transformVexToEvex(inst->id()));
}
}
// This data is allocated by Zone passed to `runOnFunction()`, which will be reset after the RA pass finishes.
// So reset this data to prevent having a dead pointer after the RA pass is complete.
node->resetPassData();
// Remove moves that do not do anything.
//
// Usually these moves are inserted during code generation and originally they used different registers. If RA
// allocated these into the same register such redundant mov would appear.
if (raInst->hasInstRWFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg()) {
if (inst->opCount() == 2) {
if (inst->op(0) == inst->op(1)) {
cc()->removeNode(node);
goto Next;
}
}
}
if (ASMJIT_UNLIKELY(node->type() != NodeType::kInst)) {
// FuncRet terminates the flow, it must either be removed if the exit label is next to it (optimization) or
@ -1327,6 +1366,7 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no
}
}
Next:
node = next;
}

View File

@ -3819,6 +3819,124 @@ public:
static uint32_t calledFunc(uint32_t x) { return x + 1; }
};
// x86::Compiler - X86Test_FuncCallAVXClobber
// ==========================================
class X86Test_FuncCallAVXClobber : public X86TestCase {
public:
X86Test_FuncCallAVXClobber() : X86TestCase("FuncCallAVXClobber") {}
static void add(TestApp& app) {
const CpuInfo& cpuInfo = CpuInfo::host();
if (cpuInfo.features().x86().hasAVX2() && sizeof(void*) == 8)
app.add(new X86Test_FuncCallAVXClobber());
}
virtual void compile(x86::Compiler& cc) {
FuncNode* mainFunc = cc.addFunc(FuncSignatureT<void, void*, const void*, const void*>(CallConvId::kHost));
mainFunc->frame().setAvxEnabled();
mainFunc->frame().setAvxCleanup();
// We need a Windows calling convention to test this properly also on a non-Windows machine.
FuncNode* helperFunc = cc.newFunc(FuncSignatureT<void, void*, const void*>(CallConvId::kX64Windows));
helperFunc->frame().setAvxEnabled();
helperFunc->frame().setAvxCleanup();
{
size_t i;
x86::Gp dPtr = cc.newIntPtr("dPtr");
x86::Gp aPtr = cc.newIntPtr("aPtr");
x86::Gp bPtr = cc.newIntPtr("bPtr");
x86::Gp tPtr = cc.newIntPtr("tPtr");
x86::Ymm acc[8];
x86::Mem stack = cc.newStack(32, 1, "stack");
mainFunc->setArg(0, dPtr);
mainFunc->setArg(1, aPtr);
mainFunc->setArg(2, bPtr);
cc.lea(tPtr, stack);
for (i = 0; i < 8; i++) {
acc[i] = cc.newYmm("acc%zu", i);
cc.vmovdqu(acc[i], x86::ptr(aPtr));
}
InvokeNode* invokeNode;
cc.invoke(&invokeNode,
helperFunc->label(),
FuncSignatureT<void, void*, const void*>(CallConvId::kX64Windows));
invokeNode->setArg(0, tPtr);
invokeNode->setArg(1, bPtr);
for (i = 1; i < 8; i++) {
cc.vpaddd(acc[0], acc[0], acc[i]);
}
cc.vpaddd(acc[0], acc[0], x86::ptr(tPtr));
cc.vmovdqu(x86::ptr(dPtr), acc[0]);
cc.endFunc();
}
{
cc.addFunc(helperFunc);
x86::Gp dPtr = cc.newIntPtr("dPtr");
x86::Gp aPtr = cc.newIntPtr("aPtr");
helperFunc->setArg(0, dPtr);
helperFunc->setArg(1, aPtr);
x86::Gp tmp = cc.newIntPtr("tmp");
x86::Ymm acc = cc.newYmm("acc");
cc.mov(tmp, 1);
cc.vmovd(acc.xmm(), tmp);
cc.vpbroadcastd(acc, acc.xmm());
cc.vpaddd(acc, acc, x86::ptr(aPtr));
cc.vmovdqu(x86::ptr(dPtr), acc);
cc.endFunc();
}
}
virtual bool run(void* _func, String& result, String& expect) {
typedef void (*Func)(void*, const void*, const void*);
Func func = ptr_as_func<Func>(_func);
size_t i;
static const uint32_t aData[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
static const uint32_t bData[8] = { 6, 3, 5, 9, 1, 8, 7, 2 };
uint32_t resultData[8];
uint32_t expectData[8];
for (i = 0; i < 8; i++)
expectData[i] = aData[i] * 8 + bData[i] + 1;
func(resultData, aData, bData);
result.assign("{");
expect.assign("{");
for (i = 0; i < 8; i++) {
result.appendFormat("%u", resultData[i]);
expect.appendFormat("%u", expectData[i]);
if (i != 7) result.append(", ");
if (i != 7) expect.append(", ");
}
result.append("}");
expect.append("}");
return result == expect;
}
};
// x86::Compiler - X86Test_MiscLocalConstPool
// ==========================================
@ -4186,6 +4304,7 @@ void compiler_add_x86_tests(TestApp& app) {
app.addT<X86Test_FuncCallMisc4>();
app.addT<X86Test_FuncCallMisc5>();
app.addT<X86Test_FuncCallMisc6>();
app.addT<X86Test_FuncCallAVXClobber>();
// Miscellaneous tests.
app.addT<X86Test_MiscLocalConstPool>();

View File

@ -1786,6 +1786,7 @@ class AdditionalInfoTable extends core.Task {
run() {
const insts = this.ctx.insts;
const rwInfoTable = new IndexedArray();
const instFlagsTable = new IndexedArray();
const additionaInfoTable = new IndexedArray();
// If the instruction doesn't read any flags it should point to the first index.
@ -1800,9 +1801,48 @@ class AdditionalInfoTable extends core.Task {
var [r, w] = this.rwFlagsOf(dbInsts);
const rData = r.map(function(flag) { return `FLAG(${flag})`; }).join(" | ") || "0";
const wData = w.map(function(flag) { return `FLAG(${flag})`; }).join(" | ") || "0";
const rwDataIndex = rwInfoTable.addIndexed(`{ ${rData}, ${wData} }`);
const instFlags = Object.create(null);
inst.additionalInfoIndex = additionaInfoTable.addIndexed(`{ { ${features} }, ${rwDataIndex}, 0 }`);
switch (inst.name) {
case "kmovb":
case "kmovd":
case "kmovq":
case "kmovw":
case "mov":
case "movq":
case "movsd":
case "movss":
case "movapd":
case "movaps":
case "movdqa":
case "movdqu":
case "movupd":
case "movups":
case "vmovapd":
case "vmovaps":
case "vmovdqa":
case "vmovdqa8":
case "vmovdqa16":
case "vmovdqa32":
case "vmovdqa64":
case "vmovdqu":
case "vmovdqu8":
case "vmovdqu16":
case "vmovdqu32":
case "vmovdqu64":
case "vmovq":
case "vmovsd":
case "vmovss":
case "vmovupd":
case "vmovups":
instFlags["MovOp"] = true;
break;
}
const instFlagsIndex = instFlagsTable.addIndexed("InstRWFlags(" + CxxUtils.flags(instFlags, (f) => { return `FLAG(${f})`; }, "FLAG(None)") + ")");
const rwInfoIndex = rwInfoTable.addIndexed(`{ ${rData}, ${wData} }`);
inst.additionalInfoIndex = additionaInfoTable.addIndexed(`{ ${instFlagsIndex}, ${rwInfoIndex}, { ${features} } }`);
});
var s = `#define EXT(VAL) uint32_t(CpuFeatures::X86::k##VAL)\n` +
@ -1811,8 +1851,12 @@ class AdditionalInfoTable extends core.Task {
`\n` +
`#define FLAG(VAL) uint32_t(CpuRWFlags::kX86_##VAL)\n` +
`const InstDB::RWFlagsInfoTable InstDB::_rwFlagsInfoTable[] = {\n${StringUtils.format(rwInfoTable, kIndent, true)}\n};\n` +
`#undef FLAG\n` +
`\n` +
`#define FLAG(VAL) uint32_t(InstRWFlags::k##VAL)\n` +
`const InstRWFlags InstDB::_instFlagsTable[] = {\n${StringUtils.format(instFlagsTable, kIndent, true)}\n};\n` +
`#undef FLAG\n`;
this.inject("AdditionalInfoTable", disclaimer(s), additionaInfoTable.length * 8 + rwInfoTable.length * 8);
this.inject("AdditionalInfoTable", disclaimer(s), additionaInfoTable.length * 8 + rwInfoTable.length * 8 + instFlagsTable.length * 4);
}
rwFlagsOf(dbInsts) {
@ -2030,7 +2074,12 @@ class InstRWInfoTable extends core.Task {
"InstDB::RWInfoRm::kCategory" + rmInfo.category.padEnd(10),
StringUtils.decToHex(rmInfo.rmIndexes, 2),
String(Math.max(rmInfo.memFixed, 0)).padEnd(2),
CxxUtils.flags({ "InstDB::RWInfoRm::kFlagAmbiguous": Boolean(rmInfo.memAmbiguous) }),
CxxUtils.flags({
"InstDB::RWInfoRm::kFlagAmbiguous": Boolean(rmInfo.memAmbiguous),
"InstDB::RWInfoRm::kFlagMovssMovsd": Boolean(inst.name === "movss" || inst.name === "movsd"),
"InstDB::RWInfoRm::kFlagPextrw": Boolean(inst.name === "pextrw"),
"InstDB::RWInfoRm::kFlagFeatureIfRMI": Boolean(rmInfo.memExtensionIfRMI)
}),
rmInfo.memExtension === "None" ? "0" : "uint32_t(CpuFeatures::X86::k" + rmInfo.memExtension + ")"
);
@ -2284,7 +2333,8 @@ class InstRWInfoTable extends core.Task {
memFixed: this.rmFixedSize(dbInsts),
memAmbiguous: this.rmIsAmbiguous(dbInsts),
memConsistent: this.rmIsConsistent(dbInsts),
memExtension: this.rmExtension(dbInsts)
memExtension: this.rmExtension(dbInsts),
memExtensionIfRMI: this.rmExtensionIfRMI(dbInsts)
};
if (info.memFixed !== -1)
@ -2493,14 +2543,32 @@ class InstRWInfoTable extends core.Task {
case "pextrw":
return "SSE4_1";
case "vpslld":
case "vpsllq":
case "vpsrad":
case "vpsrld":
case "vpsrlq":
return "AVX512_F";
case "vpslldq":
case "vpsllw":
case "vpsraw":
case "vpsrldq":
case "vpsrlw":
return "AVX512_BW";
default:
return "None";
}
}
rmExtensionIfRMI(dbInsts) {
if (!dbInsts.length)
return 0;
const name = dbInsts[0].name;
return /^(vpslld|vpsllq|vpsrad|vpsrld|vpsrlq|vpslldq|vpsllw|vpsraw|vpsrldq|vpsrlw)$/.test(name);
}
}
// ============================================================================