mirror of
https://github.com/holub/mame
synced 2025-04-19 15:11:37 +03:00
asmjit: sync with upstream
This commit is contained in:
parent
1a6634fda5
commit
02e5f1e468
3
3rdparty/asmjit/CMakeLists.txt
vendored
3
3rdparty/asmjit/CMakeLists.txt
vendored
@ -249,7 +249,8 @@ if (ASMJIT_SANITIZE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT WIN32)
|
||||
# Do not link to pthread on Android as it's part of C runtime.
|
||||
if (NOT WIN32 AND NOT ANDROID)
|
||||
list(APPEND ASMJIT_DEPS pthread)
|
||||
endif()
|
||||
|
||||
|
@ -4993,7 +4993,7 @@ EmitDone:
|
||||
if (Support::test(options, InstOptions::kReserved)) {
|
||||
#ifndef ASMJIT_NO_LOGGING
|
||||
if (_logger)
|
||||
EmitterUtils::logInstructionEmitted(this, instId, options, o0, o1, o2, opExt, 0, 0, writer.cursor());
|
||||
EmitterUtils::logInstructionEmitted(this, BaseInst::composeARMInstId(instId, instCC), options, o0, o1, o2, opExt, 0, 0, writer.cursor());
|
||||
#endif
|
||||
}
|
||||
|
||||
|
12
3rdparty/asmjit/src/asmjit/arm/a64compiler.h
vendored
12
3rdparty/asmjit/src/asmjit/arm/a64compiler.h
vendored
@ -169,6 +169,18 @@ public:
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Compiler specific
|
||||
//! \{
|
||||
|
||||
//! Special pseudo-instruction that can be used to load a memory address into `o0` GP register.
|
||||
//!
|
||||
//! \note At the moment this instruction is only useful to load a stack allocated address into a GP register
|
||||
//! for further use. It makes very little sense to use it for anything else. The semantics of this instruction
|
||||
//! is the same as X86 `LEA` (load effective address) instruction.
|
||||
inline Error loadAddressOf(const Gp& o0, const Mem& o1) { return _emitter()->_emitI(Inst::kIdAdr, o0, o1); }
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Function Call & Ret Intrinsics
|
||||
//! \{
|
||||
|
||||
|
@ -117,7 +117,7 @@ ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove(
|
||||
case TypeId::kUInt32:
|
||||
case TypeId::kInt64:
|
||||
case TypeId::kUInt64:
|
||||
return emitter->mov(src.as<Gp>().x(), dst.as<Gp>().x());
|
||||
return emitter->mov(dst.as<Gp>().x(), src.as<Gp>().x());
|
||||
|
||||
default: {
|
||||
if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))
|
||||
|
@ -139,7 +139,7 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
if (ASMJIT_UNLIKELY(!Inst::isDefinedId(realId)))
|
||||
return DebugUtils::errored(kErrorInvalidInstruction);
|
||||
|
||||
out->_instFlags = 0;
|
||||
out->_instFlags = InstRWFlags::kNone;
|
||||
out->_opCount = uint8_t(opCount);
|
||||
out->_rmFeature = 0;
|
||||
out->_extraReg.reset();
|
||||
|
52
3rdparty/asmjit/src/asmjit/arm/a64rapass.cpp
vendored
52
3rdparty/asmjit/src/asmjit/arm/a64rapass.cpp
vendored
@ -102,7 +102,7 @@ public:
|
||||
|
||||
// TODO: [ARM] This is just a workaround...
|
||||
static InstControlFlow getControlFlowType(InstId instId) noexcept {
|
||||
switch (instId) {
|
||||
switch (BaseInst::extractRealId(instId)) {
|
||||
case Inst::kIdB:
|
||||
case Inst::kIdBr:
|
||||
if (BaseInst::extractARMCondCode(instId) == CondCode::kAL)
|
||||
@ -127,8 +127,8 @@ static InstControlFlow getControlFlowType(InstId instId) noexcept {
|
||||
Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& controlType, RAInstBuilder& ib) noexcept {
|
||||
InstRWInfo rwInfo;
|
||||
|
||||
InstId instId = inst->id();
|
||||
if (Inst::isDefinedId(instId)) {
|
||||
if (Inst::isDefinedId(inst->realId())) {
|
||||
InstId instId = inst->id();
|
||||
uint32_t opCount = inst->opCount();
|
||||
const Operand* opArray = inst->operands();
|
||||
ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_arch, inst->baseInst(), opArray, opCount, &rwInfo));
|
||||
@ -136,6 +136,8 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& controlType, RAInstB
|
||||
const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
|
||||
uint32_t singleRegOps = 0;
|
||||
|
||||
ib.addInstRWFlags(rwInfo.instFlags());
|
||||
|
||||
if (opCount) {
|
||||
uint32_t consecutiveOffset = 0xFFFFFFFFu;
|
||||
uint32_t consecutiveParent = Globals::kInvalidId;
|
||||
@ -715,6 +717,50 @@ ASMJIT_FAVOR_SPEED Error ARMRAPass::_rewrite(BaseNode* first, BaseNode* stop) no
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rewrite `loadAddressOf()` construct.
|
||||
if (inst->realId() == Inst::kIdAdr && inst->opCount() == 2 && inst->op(1).isMem()) {
|
||||
BaseMem mem = inst->op(1).as<BaseMem>();
|
||||
int64_t offset = mem.offset();
|
||||
|
||||
if (!mem.hasBaseOrIndex()) {
|
||||
inst->setId(Inst::kIdMov);
|
||||
inst->setOp(1, Imm(offset));
|
||||
}
|
||||
else {
|
||||
if (mem.hasIndex())
|
||||
return DebugUtils::errored(kErrorInvalidAddressIndex);
|
||||
|
||||
GpX dst(inst->op(0).as<Gp>().id());
|
||||
GpX base(mem.baseId());
|
||||
|
||||
InstId arithInstId = offset < 0 ? Inst::kIdSub : Inst::kIdAdd;
|
||||
uint64_t absOffset = offset < 0 ? Support::neg(uint64_t(offset)) : uint64_t(offset);
|
||||
|
||||
inst->setId(arithInstId);
|
||||
inst->setOpCount(3);
|
||||
inst->setOp(1, base);
|
||||
inst->setOp(2, Imm(absOffset));
|
||||
|
||||
// Use two operations if the offset cannot be encoded with ADD/SUB.
|
||||
if (absOffset > 0xFFFu && (absOffset & ~uint64_t(0xFFF000u)) != 0) {
|
||||
if (absOffset <= 0xFFFFFFu) {
|
||||
cc()->_setCursor(inst->prev());
|
||||
ASMJIT_PROPAGATE(cc()->emit(arithInstId, dst, base, Imm(absOffset & 0xFFFu)));
|
||||
|
||||
inst->setOp(1, dst);
|
||||
inst->setOp(2, Imm(absOffset & 0xFFF000u));
|
||||
}
|
||||
else {
|
||||
cc()->_setCursor(inst->prev());
|
||||
ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, inst->op(0), Imm(absOffset)));
|
||||
|
||||
inst->setOp(1, base);
|
||||
inst->setOp(2, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node = next;
|
||||
|
79
3rdparty/asmjit/src/asmjit/arm/armoperand.h
vendored
79
3rdparty/asmjit/src/asmjit/arm/armoperand.h
vendored
@ -455,11 +455,12 @@ public:
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name ARM Specific Features
|
||||
//! \name Clone
|
||||
//! \{
|
||||
|
||||
//! Clones the memory operand.
|
||||
inline constexpr Mem clone() const noexcept { return Mem(*this); }
|
||||
|
||||
//! Gets new memory operand adjusted by `off`.
|
||||
inline Mem cloneAdjusted(int64_t off) const noexcept {
|
||||
Mem result(*this);
|
||||
@ -467,6 +468,51 @@ public:
|
||||
return result;
|
||||
}
|
||||
|
||||
//! Clones the memory operand and makes it pre-index.
|
||||
inline Mem pre() const noexcept {
|
||||
Mem result(*this);
|
||||
result.setPredicate(kOffsetPreIndex);
|
||||
return result;
|
||||
}
|
||||
|
||||
//! Clones the memory operand, applies a given offset `off` and makes it pre-index.
|
||||
inline Mem pre(int64_t off) const noexcept {
|
||||
Mem result(*this);
|
||||
result.setPredicate(kOffsetPreIndex);
|
||||
result.addOffset(off);
|
||||
return result;
|
||||
}
|
||||
|
||||
//! Clones the memory operand and makes it post-index.
|
||||
inline Mem post() const noexcept {
|
||||
Mem result(*this);
|
||||
result.setPredicate(kOffsetPreIndex);
|
||||
return result;
|
||||
}
|
||||
|
||||
//! Clones the memory operand, applies a given offset `off` and makes it post-index.
|
||||
inline Mem post(int64_t off) const noexcept {
|
||||
Mem result(*this);
|
||||
result.setPredicate(kOffsetPostIndex);
|
||||
result.addOffset(off);
|
||||
return result;
|
||||
}
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Base & Index
|
||||
//! \{
|
||||
|
||||
//! Converts memory `baseType` and `baseId` to `arm::Reg` instance.
|
||||
//!
|
||||
//! The memory must have a valid base register otherwise the result will be wrong.
|
||||
inline Reg baseReg() const noexcept { return Reg::fromTypeAndId(baseType(), baseId()); }
|
||||
|
||||
//! Converts memory `indexType` and `indexId` to `arm::Reg` instance.
|
||||
//!
|
||||
//! The memory must have a valid index register otherwise the result will be wrong.
|
||||
inline Reg indexReg() const noexcept { return Reg::fromTypeAndId(indexType(), indexId()); }
|
||||
|
||||
using BaseMem::setIndex;
|
||||
|
||||
inline void setIndex(const BaseReg& index, uint32_t shift) noexcept {
|
||||
@ -474,6 +520,11 @@ public:
|
||||
setShift(shift);
|
||||
}
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name ARM Specific Features
|
||||
//! \{
|
||||
|
||||
//! Gets whether the memory operand has shift (aka scale) constant.
|
||||
inline constexpr bool hasShift() const noexcept { return _signature.hasField<kSignatureMemShiftValueMask>(); }
|
||||
//! Gets the memory operand's shift (aka scale) constant.
|
||||
@ -499,32 +550,6 @@ public:
|
||||
inline void makePreIndex() noexcept { setPredicate(kOffsetPreIndex); }
|
||||
inline void makePostIndex() noexcept { setPredicate(kOffsetPostIndex); }
|
||||
|
||||
inline Mem pre() const noexcept {
|
||||
Mem result(*this);
|
||||
result.setPredicate(kOffsetPreIndex);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline Mem pre(int64_t off) const noexcept {
|
||||
Mem result(*this);
|
||||
result.setPredicate(kOffsetPreIndex);
|
||||
result.addOffset(off);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline Mem post() const noexcept {
|
||||
Mem result(*this);
|
||||
result.setPredicate(kOffsetPreIndex);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline Mem post(int64_t off) const noexcept {
|
||||
Mem result(*this);
|
||||
result.setPredicate(kOffsetPostIndex);
|
||||
result.addOffset(off);
|
||||
return result;
|
||||
}
|
||||
|
||||
//! \}
|
||||
};
|
||||
|
||||
|
2
3rdparty/asmjit/src/asmjit/core/archtraits.h
vendored
2
3rdparty/asmjit/src/asmjit/core/archtraits.h
vendored
@ -152,7 +152,7 @@ enum class InstHints : uint8_t {
|
||||
//! No feature hints.
|
||||
kNoHints = 0,
|
||||
|
||||
//! Architecture supports a register swap by using a single instructio.
|
||||
//! Architecture supports a register swap by using a single instruction.
|
||||
kRegSwap = 0x01u,
|
||||
//! Architecture provides push/pop instructions.
|
||||
kPushPop = 0x02u
|
||||
|
2
3rdparty/asmjit/src/asmjit/core/codeholder.h
vendored
2
3rdparty/asmjit/src/asmjit/core/codeholder.h
vendored
@ -356,7 +356,7 @@ struct OffsetFormat {
|
||||
//! Returns the size of the region/instruction where the offset is encoded.
|
||||
inline uint32_t regionSize() const noexcept { return _regionSize; }
|
||||
|
||||
//! Returns the the offset of the word relative to the start of the region where the offset is.
|
||||
//! Returns the offset of the word relative to the start of the region where the offset is.
|
||||
inline uint32_t valueOffset() const noexcept { return _valueOffset; }
|
||||
|
||||
//! Returns the size of the data-type (word) that contains the offset, in bytes.
|
||||
|
@ -143,7 +143,7 @@ Error formatLabel(
|
||||
}
|
||||
|
||||
if (le->type() == LabelType::kAnonymous)
|
||||
ASMJIT_PROPAGATE(sb.append("L%u@", labelId));
|
||||
ASMJIT_PROPAGATE(sb.appendFormat("L%u@", labelId));
|
||||
return sb.append(le->name());
|
||||
}
|
||||
else {
|
||||
|
2
3rdparty/asmjit/src/asmjit/core/func.h
vendored
2
3rdparty/asmjit/src/asmjit/core/func.h
vendored
@ -1127,7 +1127,7 @@ public:
|
||||
|
||||
//! Tests whether the callee must adjust SP before returning (X86-STDCALL only)
|
||||
inline bool hasCalleeStackCleanup() const noexcept { return _calleeStackCleanup != 0; }
|
||||
//! Returns home many bytes of the stack the the callee must adjust before returning (X86-STDCALL only)
|
||||
//! Returns home many bytes of the stack the callee must adjust before returning (X86-STDCALL only)
|
||||
inline uint32_t calleeStackCleanup() const noexcept { return _calleeStackCleanup; }
|
||||
|
||||
//! Returns call stack alignment.
|
||||
|
32
3rdparty/asmjit/src/asmjit/core/inst.h
vendored
32
3rdparty/asmjit/src/asmjit/core/inst.h
vendored
@ -312,6 +312,10 @@ public:
|
||||
return id | (uint32_t(cc) << Support::ConstCTZ<uint32_t(InstIdParts::kARM_Cond)>::value);
|
||||
}
|
||||
|
||||
static inline constexpr InstId extractRealId(uint32_t id) noexcept {
|
||||
return id & uint32_t(InstIdParts::kRealId);
|
||||
}
|
||||
|
||||
static inline constexpr arm::CondCode extractARMCondCode(uint32_t id) noexcept {
|
||||
return (arm::CondCode)((uint32_t(id) & uint32_t(InstIdParts::kARM_Cond)) >> Support::ConstCTZ<uint32_t(InstIdParts::kARM_Cond)>::value);
|
||||
}
|
||||
@ -614,13 +618,25 @@ struct OpRWInfo {
|
||||
//! \}
|
||||
};
|
||||
|
||||
//! Flags used by \ref InstRWInfo.
|
||||
enum class InstRWFlags : uint32_t {
|
||||
//! No flags.
|
||||
kNone = 0x00000000u,
|
||||
|
||||
//! Describes a move operation.
|
||||
//!
|
||||
//! This flag is used by RA to eliminate moves that are guaranteed to be moves only.
|
||||
kMovOp = 0x00000001u
|
||||
};
|
||||
ASMJIT_DEFINE_ENUM_FLAGS(InstRWFlags)
|
||||
|
||||
//! Read/Write information of an instruction.
|
||||
struct InstRWInfo {
|
||||
//! \name Members
|
||||
//! \{
|
||||
|
||||
//! Instruction flags (there are no flags at the moment, this field is reserved).
|
||||
uint32_t _instFlags;
|
||||
InstRWFlags _instFlags;
|
||||
//! CPU flags read.
|
||||
CpuRWFlags _readFlags;
|
||||
//! CPU flags written.
|
||||
@ -646,6 +662,20 @@ struct InstRWInfo {
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Instruction Flags
|
||||
//! \{
|
||||
|
||||
//! Returns flags associated with the instruction, see \ref InstRWFlags.
|
||||
inline InstRWFlags instFlags() const noexcept { return _instFlags; }
|
||||
|
||||
//! Tests whether the instruction flags contain `flag`.
|
||||
inline bool hasInstFlag(InstRWFlags flag) const noexcept { return Support::test(_instFlags, flag); }
|
||||
|
||||
//! Tests whether the instruction flags contain \ref InstRWFlags::kMovOp.
|
||||
inline bool isMovOp() const noexcept { return hasInstFlag(InstRWFlags::kMovOp); }
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name CPU Flags Information
|
||||
//! \{
|
||||
|
||||
|
28
3rdparty/asmjit/src/asmjit/core/raassignment_p.h
vendored
28
3rdparty/asmjit/src/asmjit/core/raassignment_p.h
vendored
@ -82,6 +82,12 @@ public:
|
||||
size_t size = sizeOf(count);
|
||||
memcpy(this, other, size);
|
||||
}
|
||||
|
||||
inline void unassign(RegGroup group, uint32_t physId, uint32_t indexInWorkIds) noexcept {
|
||||
assigned.clear(group, Support::bitMask(physId));
|
||||
dirty.clear(group, Support::bitMask(physId));
|
||||
workIds[indexInWorkIds] = kWorkNone;
|
||||
}
|
||||
};
|
||||
|
||||
struct WorkToPhysMap {
|
||||
@ -304,6 +310,28 @@ public:
|
||||
_physToWorkIds.swap(other._physToWorkIds);
|
||||
}
|
||||
|
||||
inline void assignWorkIdsFromPhysIds() noexcept {
|
||||
memset(_workToPhysMap, uint8_t(BaseReg::kIdBad), WorkToPhysMap::sizeOf(_layout.workCount));
|
||||
|
||||
for (RegGroup group : RegGroupVirtValues{}) {
|
||||
uint32_t physBaseIndex = _layout.physIndex[group];
|
||||
Support::BitWordIterator<RegMask> it(_physToWorkMap->assigned[group]);
|
||||
|
||||
while (it.hasNext()) {
|
||||
uint32_t physId = it.next();
|
||||
uint32_t workId = _physToWorkMap->workIds[physBaseIndex + physId];
|
||||
|
||||
ASMJIT_ASSERT(workId != kWorkNone);
|
||||
_workToPhysMap->physIds[workId] = uint8_t(physId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void copyFrom(const PhysToWorkMap* physToWorkMap) noexcept {
|
||||
memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal));
|
||||
assignWorkIdsFromPhysIds();
|
||||
}
|
||||
|
||||
inline void copyFrom(const PhysToWorkMap* physToWorkMap, const WorkToPhysMap* workToPhysMap) noexcept {
|
||||
memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal));
|
||||
memcpy(_workToPhysMap, workToPhysMap, WorkToPhysMap::sizeOf(_layout.workCount));
|
||||
|
8
3rdparty/asmjit/src/asmjit/core/radefs_p.h
vendored
8
3rdparty/asmjit/src/asmjit/core/radefs_p.h
vendored
@ -271,8 +271,12 @@ struct RARegMask {
|
||||
}
|
||||
|
||||
template<class Operator>
|
||||
inline void op(RegGroup group, uint32_t input) noexcept {
|
||||
_masks[group] = Operator::op(_masks[group], input);
|
||||
inline void op(RegGroup group, RegMask mask) noexcept {
|
||||
_masks[group] = Operator::op(_masks[group], mask);
|
||||
}
|
||||
|
||||
inline void clear(RegGroup group, RegMask mask) noexcept {
|
||||
_masks[group] = _masks[group] & ~mask;
|
||||
}
|
||||
|
||||
//! \}
|
||||
|
98
3rdparty/asmjit/src/asmjit/core/ralocal.cpp
vendored
98
3rdparty/asmjit/src/asmjit/core/ralocal.cpp
vendored
@ -38,7 +38,9 @@ Error RALocalAllocator::init() noexcept {
|
||||
|
||||
physToWorkMap = _pass->newPhysToWorkMap();
|
||||
workToPhysMap = _pass->newWorkToPhysMap();
|
||||
if (!physToWorkMap || !workToPhysMap)
|
||||
_tmpWorkToPhysMap = _pass->newWorkToPhysMap();
|
||||
|
||||
if (!physToWorkMap || !workToPhysMap || !_tmpWorkToPhysMap)
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
|
||||
_tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
|
||||
@ -122,26 +124,18 @@ Error RALocalAllocator::makeInitialAssignment() noexcept {
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
Error RALocalAllocator::replaceAssignment(
|
||||
const PhysToWorkMap* physToWorkMap,
|
||||
const WorkToPhysMap* workToPhysMap) noexcept {
|
||||
|
||||
_curAssignment.copyFrom(physToWorkMap, workToPhysMap);
|
||||
Error RALocalAllocator::replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept {
|
||||
_curAssignment.copyFrom(physToWorkMap);
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
Error RALocalAllocator::switchToAssignment(
|
||||
PhysToWorkMap* dstPhysToWorkMap,
|
||||
WorkToPhysMap* dstWorkToPhysMap,
|
||||
const ZoneBitVector& liveIn,
|
||||
bool dstReadOnly,
|
||||
bool tryMode) noexcept {
|
||||
|
||||
Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept {
|
||||
RAAssignment dst;
|
||||
RAAssignment& cur = _curAssignment;
|
||||
|
||||
dst.initLayout(_pass->_physRegCount, _pass->workRegs());
|
||||
dst.initMaps(dstPhysToWorkMap, dstWorkToPhysMap);
|
||||
dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap);
|
||||
dst.assignWorkIdsFromPhysIds();
|
||||
|
||||
if (tryMode)
|
||||
return kErrorOk;
|
||||
@ -329,24 +323,27 @@ Cleared:
|
||||
|
||||
if (!tryMode) {
|
||||
// Here is a code that dumps the conflicting part if something fails here:
|
||||
// if (!dst.equals(cur)) {
|
||||
// uint32_t physTotal = dst._layout.physTotal;
|
||||
// uint32_t workCount = dst._layout.workCount;
|
||||
// if (!dst.equals(cur)) {
|
||||
// uint32_t physTotal = dst._layout.physTotal;
|
||||
// uint32_t workCount = dst._layout.workCount;
|
||||
//
|
||||
// for (uint32_t physId = 0; physId < physTotal; physId++) {
|
||||
// uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
|
||||
// uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
|
||||
// if (dstWorkId != curWorkId)
|
||||
// fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
|
||||
// }
|
||||
// fprintf(stderr, "Dirty DST=0x%08X CUR=0x%08X\n", dst.dirty(RegGroup::kGp), cur.dirty(RegGroup::kGp));
|
||||
// fprintf(stderr, "Assigned DST=0x%08X CUR=0x%08X\n", dst.assigned(RegGroup::kGp), cur.assigned(RegGroup::kGp));
|
||||
//
|
||||
// for (uint32_t workId = 0; workId < workCount; workId++) {
|
||||
// uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
|
||||
// uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
|
||||
// if (dstPhysId != curPhysId)
|
||||
// fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
|
||||
// }
|
||||
// for (uint32_t physId = 0; physId < physTotal; physId++) {
|
||||
// uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
|
||||
// uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
|
||||
// if (dstWorkId != curWorkId)
|
||||
// fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
|
||||
// }
|
||||
//
|
||||
// for (uint32_t workId = 0; workId < workCount; workId++) {
|
||||
// uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
|
||||
// uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
|
||||
// if (dstPhysId != curPhysId)
|
||||
// fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
|
||||
// }
|
||||
// }
|
||||
ASMJIT_ASSERT(dst.equals(cur));
|
||||
}
|
||||
|
||||
@ -839,6 +836,34 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||||
// STEP 9
|
||||
// ------
|
||||
//
|
||||
// Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary.
|
||||
|
||||
if (node->isInvoke() && group == RegGroup::kVec) {
|
||||
const InvokeNode* invokeNode = node->as<InvokeNode>();
|
||||
|
||||
RegMask maybeClobberedRegs = invokeNode->detail().callConv().preservedRegs(group) & _curAssignment.assigned(group);
|
||||
if (maybeClobberedRegs) {
|
||||
uint32_t saveRestoreVecSize = invokeNode->detail().callConv().saveRestoreRegSize(group);
|
||||
Support::BitWordIterator<RegMask> it(maybeClobberedRegs);
|
||||
|
||||
do {
|
||||
uint32_t physId = it.next();
|
||||
uint32_t workId = _curAssignment.physToWorkId(group, physId);
|
||||
|
||||
RAWorkReg* workReg = workRegById(workId);
|
||||
uint32_t virtSize = workReg->virtReg()->virtSize();
|
||||
|
||||
if (virtSize > saveRestoreVecSize) {
|
||||
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
|
||||
}
|
||||
|
||||
} while (it.hasNext());
|
||||
}
|
||||
}
|
||||
|
||||
// STEP 10
|
||||
// -------
|
||||
//
|
||||
// Assign OUT registers.
|
||||
|
||||
if (outPending) {
|
||||
@ -981,12 +1006,7 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co
|
||||
|
||||
// Use TryMode of `switchToAssignment()` if possible.
|
||||
if (target->hasEntryAssignment()) {
|
||||
ASMJIT_PROPAGATE(switchToAssignment(
|
||||
target->entryPhysToWorkMap(),
|
||||
target->entryWorkToPhysMap(),
|
||||
target->liveIn(),
|
||||
target->isAllocated(),
|
||||
true));
|
||||
ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), true));
|
||||
}
|
||||
|
||||
ASMJIT_PROPAGATE(allocInst(node));
|
||||
@ -997,12 +1017,7 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co
|
||||
BaseNode* prevCursor = _cc->setCursor(injectionPoint);
|
||||
|
||||
_tmpAssignment.copyFrom(_curAssignment);
|
||||
ASMJIT_PROPAGATE(switchToAssignment(
|
||||
target->entryPhysToWorkMap(),
|
||||
target->entryWorkToPhysMap(),
|
||||
target->liveIn(),
|
||||
target->isAllocated(),
|
||||
false));
|
||||
ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), false));
|
||||
|
||||
BaseNode* curCursor = _cc->cursor();
|
||||
if (curCursor != injectionPoint) {
|
||||
@ -1060,7 +1075,6 @@ Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets,
|
||||
if (!sharedAssignment.empty()) {
|
||||
ASMJIT_PROPAGATE(switchToAssignment(
|
||||
sharedAssignment.physToWorkMap(),
|
||||
sharedAssignment.workToPhysMap(),
|
||||
sharedAssignment.liveIn(),
|
||||
true, // Read-only.
|
||||
false // Try-mode.
|
||||
|
14
3rdparty/asmjit/src/asmjit/core/ralocal_p.h
vendored
14
3rdparty/asmjit/src/asmjit/core/ralocal_p.h
vendored
@ -57,6 +57,9 @@ public:
|
||||
//! TiedReg's total counter.
|
||||
RARegCount _tiedCount;
|
||||
|
||||
//! Temporary workToPhysMap that can be used freely by the allocator.
|
||||
WorkToPhysMap* _tmpWorkToPhysMap;
|
||||
|
||||
//! \name Construction & Destruction
|
||||
//! \{
|
||||
|
||||
@ -113,9 +116,7 @@ public:
|
||||
|
||||
Error makeInitialAssignment() noexcept;
|
||||
|
||||
Error replaceAssignment(
|
||||
const PhysToWorkMap* physToWorkMap,
|
||||
const WorkToPhysMap* workToPhysMap) noexcept;
|
||||
Error replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept;
|
||||
|
||||
//! Switch to the given assignment by reassigning all register and emitting code that reassigns them.
|
||||
//! This is always used to switch to a previously stored assignment.
|
||||
@ -123,12 +124,7 @@ public:
|
||||
//! If `tryMode` is true then the final assignment doesn't have to be exactly same as specified by `dstPhysToWorkMap`
|
||||
//! and `dstWorkToPhysMap`. This mode is only used before conditional jumps that already have assignment to generate
|
||||
//! a code sequence that is always executed regardless of the flow.
|
||||
Error switchToAssignment(
|
||||
PhysToWorkMap* dstPhysToWorkMap,
|
||||
WorkToPhysMap* dstWorkToPhysMap,
|
||||
const ZoneBitVector& liveIn,
|
||||
bool dstReadOnly,
|
||||
bool tryMode) noexcept;
|
||||
Error switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept;
|
||||
|
||||
inline Error spillRegsBeforeEntry(RABlock* block) noexcept {
|
||||
return spillScratchGpRegsBeforeEntry(block->entryScratchGpRegs());
|
||||
|
81
3rdparty/asmjit/src/asmjit/core/rapass.cpp
vendored
81
3rdparty/asmjit/src/asmjit/core/rapass.cpp
vendored
@ -1483,18 +1483,12 @@ Error BaseRAPass::runLocalAllocator() noexcept {
|
||||
cc()->_setCursor(unconditionalJump ? prev->prev() : prev);
|
||||
|
||||
if (consecutive->hasEntryAssignment()) {
|
||||
ASMJIT_PROPAGATE(
|
||||
lra.switchToAssignment(
|
||||
consecutive->entryPhysToWorkMap(),
|
||||
consecutive->entryWorkToPhysMap(),
|
||||
consecutive->liveIn(),
|
||||
consecutive->isAllocated(),
|
||||
false));
|
||||
ASMJIT_PROPAGATE(lra.switchToAssignment(consecutive->entryPhysToWorkMap(), consecutive->liveIn(), consecutive->isAllocated(), false));
|
||||
}
|
||||
else {
|
||||
ASMJIT_PROPAGATE(lra.spillRegsBeforeEntry(consecutive));
|
||||
ASMJIT_PROPAGATE(setBlockEntryAssignment(consecutive, block, lra._curAssignment));
|
||||
lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap(), consecutive->entryWorkToPhysMap());
|
||||
lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1526,7 +1520,7 @@ Error BaseRAPass::runLocalAllocator() noexcept {
|
||||
}
|
||||
|
||||
// If we switched to some block we have to update the local allocator.
|
||||
lra.replaceAssignment(block->entryPhysToWorkMap(), block->entryWorkToPhysMap());
|
||||
lra.replaceAssignment(block->entryPhysToWorkMap());
|
||||
}
|
||||
|
||||
_clobberedRegs.op<Support::Or>(lra._clobberedRegs);
|
||||
@ -1546,12 +1540,10 @@ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlo
|
||||
}
|
||||
|
||||
PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
|
||||
WorkToPhysMap* workToPhysMap = cloneWorkToPhysMap(fromAssignment.workToPhysMap());
|
||||
|
||||
if (ASMJIT_UNLIKELY(!physToWorkMap || !workToPhysMap))
|
||||
if (ASMJIT_UNLIKELY(!physToWorkMap))
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
|
||||
block->setEntryAssignment(physToWorkMap, workToPhysMap);
|
||||
block->setEntryAssignment(physToWorkMap);
|
||||
|
||||
// True if this is the first (entry) block, nothing to do in this case.
|
||||
if (block == fromBlock) {
|
||||
@ -1562,10 +1554,6 @@ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlo
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
RAAssignment as;
|
||||
as.initLayout(_physRegCount, workRegs());
|
||||
as.initMaps(physToWorkMap, workToPhysMap);
|
||||
|
||||
const ZoneBitVector& liveOut = fromBlock->liveOut();
|
||||
const ZoneBitVector& liveIn = block->liveIn();
|
||||
|
||||
@ -1578,94 +1566,85 @@ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlo
|
||||
RAWorkReg* workReg = workRegById(workId);
|
||||
|
||||
RegGroup group = workReg->group();
|
||||
uint32_t physId = as.workToPhysId(group, workId);
|
||||
uint32_t physId = fromAssignment.workToPhysId(group, workId);
|
||||
|
||||
if (physId != RAAssignment::kPhysNone)
|
||||
as.unassign(group, workId, physId);
|
||||
physToWorkMap->unassign(group, physId, _physRegIndex.get(group) + physId);
|
||||
}
|
||||
}
|
||||
|
||||
return blockEntryAssigned(as);
|
||||
return blockEntryAssigned(physToWorkMap);
|
||||
}
|
||||
|
||||
Error BaseRAPass::setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept {
|
||||
ASMJIT_ASSERT(_sharedAssignments[sharedAssignmentId].empty());
|
||||
|
||||
PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
|
||||
WorkToPhysMap* workToPhysMap = cloneWorkToPhysMap(fromAssignment.workToPhysMap());
|
||||
|
||||
if (ASMJIT_UNLIKELY(!physToWorkMap || !workToPhysMap))
|
||||
if (ASMJIT_UNLIKELY(!physToWorkMap))
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
|
||||
_sharedAssignments[sharedAssignmentId].assignMaps(physToWorkMap, workToPhysMap);
|
||||
_sharedAssignments[sharedAssignmentId].assignPhysToWorkMap(physToWorkMap);
|
||||
|
||||
ZoneBitVector& sharedLiveIn = _sharedAssignments[sharedAssignmentId]._liveIn;
|
||||
ASMJIT_PROPAGATE(sharedLiveIn.resize(allocator(), workRegCount()));
|
||||
|
||||
RAAssignment as;
|
||||
as.initLayout(_physRegCount, workRegs());
|
||||
|
||||
Support::Array<uint32_t, Globals::kNumVirtGroups> sharedAssigned {};
|
||||
|
||||
for (RABlock* block : blocks()) {
|
||||
if (block->sharedAssignmentId() == sharedAssignmentId) {
|
||||
ASMJIT_ASSERT(!block->hasEntryAssignment());
|
||||
|
||||
PhysToWorkMap* entryPhysToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
|
||||
WorkToPhysMap* entryWorkToPhysMap = cloneWorkToPhysMap(fromAssignment.workToPhysMap());
|
||||
|
||||
if (ASMJIT_UNLIKELY(!entryPhysToWorkMap || !entryWorkToPhysMap))
|
||||
if (ASMJIT_UNLIKELY(!entryPhysToWorkMap))
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
|
||||
block->setEntryAssignment(entryPhysToWorkMap, entryWorkToPhysMap);
|
||||
as.initMaps(entryPhysToWorkMap, entryWorkToPhysMap);
|
||||
block->setEntryAssignment(entryPhysToWorkMap);
|
||||
|
||||
const ZoneBitVector& liveIn = block->liveIn();
|
||||
sharedLiveIn.or_(liveIn);
|
||||
|
||||
for (RegGroup group : RegGroupVirtValues{}) {
|
||||
sharedAssigned[group] |= entryPhysToWorkMap->assigned[group];
|
||||
|
||||
uint32_t physBaseIndex = _physRegIndex.get(group);
|
||||
Support::BitWordIterator<RegMask> it(entryPhysToWorkMap->assigned[group]);
|
||||
|
||||
while (it.hasNext()) {
|
||||
uint32_t physId = it.next();
|
||||
uint32_t workId = as.physToWorkId(group, physId);
|
||||
uint32_t workId = entryPhysToWorkMap->workIds[physBaseIndex + physId];
|
||||
|
||||
if (!liveIn.bitAt(workId))
|
||||
as.unassign(group, workId, physId);
|
||||
entryPhysToWorkMap->unassign(group, physId, physBaseIndex + physId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
as.initMaps(physToWorkMap, workToPhysMap);
|
||||
for (RegGroup group : RegGroupVirtValues{}) {
|
||||
uint32_t physBaseIndex = _physRegIndex.get(group);
|
||||
Support::BitWordIterator<RegMask> it(_availableRegs[group] & ~sharedAssigned[group]);
|
||||
|
||||
for (RegGroup group : RegGroupVirtValues{}) {
|
||||
Support::BitWordIterator<RegMask> it(_availableRegs[group] & ~sharedAssigned[group]);
|
||||
|
||||
while (it.hasNext()) {
|
||||
uint32_t physId = it.next();
|
||||
if (as.isPhysAssigned(group, physId)) {
|
||||
uint32_t workId = as.physToWorkId(group, physId);
|
||||
as.unassign(group, workId, physId);
|
||||
}
|
||||
}
|
||||
while (it.hasNext()) {
|
||||
uint32_t physId = it.next();
|
||||
if (Support::bitTest(physToWorkMap->assigned[group], physId))
|
||||
physToWorkMap->unassign(group, physId, physBaseIndex + physId);
|
||||
}
|
||||
}
|
||||
|
||||
return blockEntryAssigned(as);
|
||||
return blockEntryAssigned(physToWorkMap);
|
||||
}
|
||||
|
||||
Error BaseRAPass::blockEntryAssigned(const RAAssignment& as) noexcept {
|
||||
Error BaseRAPass::blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept {
|
||||
// Complex allocation strategy requires to record register assignments upon block entry (or per shared state).
|
||||
for (RegGroup group : RegGroupVirtValues{}) {
|
||||
if (!_strategy[group].isComplex())
|
||||
continue;
|
||||
|
||||
Support::BitWordIterator<RegMask> it(as.assigned(group));
|
||||
uint32_t physBaseIndex = _physRegIndex[group];
|
||||
Support::BitWordIterator<RegMask> it(physToWorkMap->assigned[group]);
|
||||
|
||||
while (it.hasNext()) {
|
||||
uint32_t physId = it.next();
|
||||
uint32_t workId = as.physToWorkId(group, physId);
|
||||
uint32_t workId = physToWorkMap->workIds[physBaseIndex + physId];
|
||||
|
||||
RAWorkReg* workReg = workRegById(workId);
|
||||
workReg->addAllocatedMask(Support::bitMask(physId));
|
||||
|
62
3rdparty/asmjit/src/asmjit/core/rapass_p.h
vendored
62
3rdparty/asmjit/src/asmjit/core/rapass_p.h
vendored
@ -129,10 +129,8 @@ public:
|
||||
//! Scratch registers used at exit, by a terminator instruction.
|
||||
RegMask _exitScratchGpRegs = 0;
|
||||
|
||||
//! Register assignment (PhysToWork) on entry.
|
||||
//! Register assignment on entry.
|
||||
PhysToWorkMap* _entryPhysToWorkMap = nullptr;
|
||||
//! Register assignment (WorkToPhys) on entry.
|
||||
WorkToPhysMap* _entryWorkToPhysMap = nullptr;
|
||||
|
||||
//! \}
|
||||
|
||||
@ -247,13 +245,8 @@ public:
|
||||
}
|
||||
|
||||
inline bool hasEntryAssignment() const noexcept { return _entryPhysToWorkMap != nullptr; }
|
||||
inline WorkToPhysMap* entryWorkToPhysMap() const noexcept { return _entryWorkToPhysMap; }
|
||||
inline PhysToWorkMap* entryPhysToWorkMap() const noexcept { return _entryPhysToWorkMap; }
|
||||
|
||||
inline void setEntryAssignment(PhysToWorkMap* physToWorkMap, WorkToPhysMap* workToPhysMap) noexcept {
|
||||
_entryPhysToWorkMap = physToWorkMap;
|
||||
_entryWorkToPhysMap = workToPhysMap;
|
||||
}
|
||||
inline void setEntryAssignment(PhysToWorkMap* physToWorkMap) noexcept { _entryPhysToWorkMap = physToWorkMap; }
|
||||
|
||||
//! \}
|
||||
|
||||
@ -283,6 +276,8 @@ public:
|
||||
|
||||
//! Parent block.
|
||||
RABlock* _block;
|
||||
//! Instruction RW flags.
|
||||
InstRWFlags _instRWFlags;
|
||||
//! Aggregated RATiedFlags from all operands & instruction specific flags.
|
||||
RATiedFlags _flags;
|
||||
//! Total count of RATiedReg's.
|
||||
@ -305,9 +300,10 @@ public:
|
||||
//! \name Construction & Destruction
|
||||
//! \{
|
||||
|
||||
inline RAInst(RABlock* block, RATiedFlags flags, uint32_t tiedTotal, const RARegMask& clobberedRegs) noexcept {
|
||||
inline RAInst(RABlock* block, InstRWFlags instRWFlags, RATiedFlags tiedFlags, uint32_t tiedTotal, const RARegMask& clobberedRegs) noexcept {
|
||||
_block = block;
|
||||
_flags = flags;
|
||||
_instRWFlags = instRWFlags;
|
||||
_flags = tiedFlags;
|
||||
_tiedTotal = tiedTotal;
|
||||
_tiedIndex.reset();
|
||||
_tiedCount.reset();
|
||||
@ -321,6 +317,13 @@ public:
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
//! Returns instruction RW flags.
|
||||
inline InstRWFlags instRWFlags() const noexcept { return _instRWFlags; };
|
||||
//! Tests whether the given `flag` is present in instruction RW flags.
|
||||
inline bool hasInstRWFlag(InstRWFlags flag) const noexcept { return Support::test(_instRWFlags, flag); }
|
||||
//! Adds `flags` to instruction RW flags.
|
||||
inline void addInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags |= flags; }
|
||||
|
||||
//! Returns the instruction flags.
|
||||
inline RATiedFlags flags() const noexcept { return _flags; }
|
||||
//! Tests whether the instruction has flag `flag`.
|
||||
@ -383,6 +386,9 @@ public:
|
||||
//! \name Members
|
||||
//! \{
|
||||
|
||||
//! Instruction RW flags.
|
||||
InstRWFlags _instRWFlags;
|
||||
|
||||
//! Flags combined from all RATiedReg's.
|
||||
RATiedFlags _aggregatedFlags;
|
||||
//! Flags that will be cleared before storing the aggregated flags to `RAInst`.
|
||||
@ -407,6 +413,7 @@ public:
|
||||
|
||||
inline void init() noexcept { reset(); }
|
||||
inline void reset() noexcept {
|
||||
_instRWFlags = InstRWFlags::kNone;
|
||||
_aggregatedFlags = RATiedFlags::kNone;
|
||||
_forbiddenFlags = RATiedFlags::kNone;
|
||||
_count.reset();
|
||||
@ -421,10 +428,15 @@ public:
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
inline RATiedFlags aggregatedFlags() const noexcept { return _aggregatedFlags; }
|
||||
inline RATiedFlags forbiddenFlags() const noexcept { return _forbiddenFlags; }
|
||||
inline InstRWFlags instRWFlags() const noexcept { return _instRWFlags; }
|
||||
inline bool hasInstRWFlag(InstRWFlags flag) const noexcept { return Support::test(_instRWFlags, flag); }
|
||||
inline void addInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags |= flags; }
|
||||
inline void clearInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags &= ~flags; }
|
||||
|
||||
inline RATiedFlags aggregatedFlags() const noexcept { return _aggregatedFlags; }
|
||||
inline void addAggregatedFlags(RATiedFlags flags) noexcept { _aggregatedFlags |= flags; }
|
||||
|
||||
inline RATiedFlags forbiddenFlags() const noexcept { return _forbiddenFlags; }
|
||||
inline void addForbiddenFlags(RATiedFlags flags) noexcept { _forbiddenFlags |= flags; }
|
||||
|
||||
//! Returns the number of tied registers added to the builder.
|
||||
@ -616,8 +628,6 @@ public:
|
||||
ZoneBitVector _liveIn {};
|
||||
//! Register assignment (PhysToWork).
|
||||
PhysToWorkMap* _physToWorkMap = nullptr;
|
||||
//! Register assignment (WorkToPhys).
|
||||
WorkToPhysMap* _workToPhysMap = nullptr;
|
||||
|
||||
//! \}
|
||||
|
||||
@ -632,12 +642,7 @@ public:
|
||||
inline const ZoneBitVector& liveIn() const noexcept { return _liveIn; }
|
||||
|
||||
inline PhysToWorkMap* physToWorkMap() const noexcept { return _physToWorkMap; }
|
||||
inline WorkToPhysMap* workToPhysMap() const noexcept { return _workToPhysMap; }
|
||||
|
||||
inline void assignMaps(PhysToWorkMap* physToWorkMap, WorkToPhysMap* workToPhysMap) noexcept {
|
||||
_physToWorkMap = physToWorkMap;
|
||||
_workToPhysMap = workToPhysMap;
|
||||
}
|
||||
inline void assignPhysToWorkMap(PhysToWorkMap* physToWorkMap) noexcept { _physToWorkMap = physToWorkMap; }
|
||||
|
||||
//! \}
|
||||
};
|
||||
@ -873,16 +878,16 @@ public:
|
||||
return _exits.append(allocator(), block);
|
||||
}
|
||||
|
||||
ASMJIT_FORCE_INLINE RAInst* newRAInst(RABlock* block, RATiedFlags flags, uint32_t tiedRegCount, const RARegMask& clobberedRegs) noexcept {
|
||||
ASMJIT_FORCE_INLINE RAInst* newRAInst(RABlock* block, InstRWFlags instRWFlags, RATiedFlags flags, uint32_t tiedRegCount, const RARegMask& clobberedRegs) noexcept {
|
||||
void* p = zone()->alloc(RAInst::sizeOf(tiedRegCount));
|
||||
if (ASMJIT_UNLIKELY(!p))
|
||||
return nullptr;
|
||||
return new(p) RAInst(block, flags, tiedRegCount, clobberedRegs);
|
||||
return new(p) RAInst(block, instRWFlags, flags, tiedRegCount, clobberedRegs);
|
||||
}
|
||||
|
||||
ASMJIT_FORCE_INLINE Error assignRAInst(BaseNode* node, RABlock* block, RAInstBuilder& ib) noexcept {
|
||||
uint32_t tiedRegCount = ib.tiedRegCount();
|
||||
RAInst* raInst = newRAInst(block, ib.aggregatedFlags(), tiedRegCount, ib._clobbered);
|
||||
RAInst* raInst = newRAInst(block, ib.instRWFlags(), ib.aggregatedFlags(), tiedRegCount, ib._clobbered);
|
||||
|
||||
if (ASMJIT_UNLIKELY(!raInst))
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
@ -1066,13 +1071,6 @@ public:
|
||||
return static_cast<PhysToWorkMap*>(zone()->dupAligned(map, size, sizeof(uint32_t)));
|
||||
}
|
||||
|
||||
inline WorkToPhysMap* cloneWorkToPhysMap(const WorkToPhysMap* map) noexcept {
|
||||
size_t size = WorkToPhysMap::sizeOf(_workRegs.size());
|
||||
if (ASMJIT_UNLIKELY(size == 0))
|
||||
return const_cast<WorkToPhysMap*>(map);
|
||||
return static_cast<WorkToPhysMap*>(zone()->dup(map, size));
|
||||
}
|
||||
|
||||
//! \name Liveness Analysis & Statistics
|
||||
//! \{
|
||||
|
||||
@ -1110,7 +1108,7 @@ public:
|
||||
//! Called after the RA assignment has been assigned to a block.
|
||||
//!
|
||||
//! This cannot change the assignment, but can examine it.
|
||||
Error blockEntryAssigned(const RAAssignment& as) noexcept;
|
||||
Error blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept;
|
||||
|
||||
//! \}
|
||||
|
||||
|
@ -30,7 +30,7 @@ static inline uint32_t getXmmMovInst(const FuncFrame& frame) {
|
||||
: (avx ? Inst::kIdVmovups : Inst::kIdMovups);
|
||||
}
|
||||
|
||||
//! Converts `size` to a 'kmov?' instructio.
|
||||
//! Converts `size` to a 'kmov?' instruction.
|
||||
static inline uint32_t kmovInstFromSize(uint32_t size) noexcept {
|
||||
switch (size) {
|
||||
case 1: return Inst::kIdKmovb;
|
||||
|
109
3rdparty/asmjit/src/asmjit/x86/x86instapi.cpp
vendored
109
3rdparty/asmjit/src/asmjit/x86/x86instapi.cpp
vendored
@ -776,6 +776,15 @@ static ASMJIT_FORCE_INLINE Error rwHandleAVX512(const BaseInst& inst, const Inst
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
static ASMJIT_FORCE_INLINE bool hasSameRegType(const BaseReg* regs, size_t opCount) noexcept {
|
||||
ASMJIT_ASSERT(opCount > 0);
|
||||
RegType regType = regs[0].type();
|
||||
for (size_t i = 1; i < opCount; i++)
|
||||
if (regs[i].type() != regType)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept {
|
||||
// Only called when `arch` matches X86 family.
|
||||
ASMJIT_ASSERT(Environment::isFamilyX86(arch));
|
||||
@ -801,13 +810,14 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
: InstDB::rwInfoB[InstDB::rwInfoIndexB[instId]];
|
||||
const InstDB::RWInfoRm& instRmInfo = InstDB::rwInfoRm[instRwInfo.rmInfo];
|
||||
|
||||
out->_instFlags = 0;
|
||||
out->_instFlags = InstDB::_instFlagsTable[additionalInfo._instFlagsIndex];
|
||||
out->_opCount = uint8_t(opCount);
|
||||
out->_rmFeature = instRmInfo.rmFeature;
|
||||
out->_extraReg.reset();
|
||||
out->_readFlags = CpuRWFlags(rwFlags.readFlags);
|
||||
out->_writeFlags = CpuRWFlags(rwFlags.writeFlags);
|
||||
|
||||
uint32_t opTypeMask = 0u;
|
||||
uint32_t nativeGpSize = Environment::registerSizeFromArch(arch);
|
||||
|
||||
constexpr OpRWFlags R = OpRWFlags::kRead;
|
||||
@ -827,6 +837,8 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
const Operand_& srcOp = operands[i];
|
||||
const InstDB::RWInfoOp& rwOpData = InstDB::rwInfoOp[instRwInfo.opInfoIndex[i]];
|
||||
|
||||
opTypeMask |= Support::bitMask(srcOp.opType());
|
||||
|
||||
if (!srcOp.isRegOrMem()) {
|
||||
op.reset();
|
||||
continue;
|
||||
@ -878,6 +890,35 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
}
|
||||
}
|
||||
|
||||
// Only keep kMovOp if the instruction is actually register to register move of the same kind.
|
||||
if (out->hasInstFlag(InstRWFlags::kMovOp)) {
|
||||
if (!(opCount >= 2 && opTypeMask == Support::bitMask(OperandType::kReg) && hasSameRegType(reinterpret_cast<const BaseReg*>(operands), opCount)))
|
||||
out->_instFlags &= ~InstRWFlags::kMovOp;
|
||||
}
|
||||
|
||||
// Special cases require more logic.
|
||||
if (instRmInfo.flags & (InstDB::RWInfoRm::kFlagMovssMovsd | InstDB::RWInfoRm::kFlagPextrw | InstDB::RWInfoRm::kFlagFeatureIfRMI)) {
|
||||
if (instRmInfo.flags & InstDB::RWInfoRm::kFlagMovssMovsd) {
|
||||
if (opCount == 2) {
|
||||
if (operands[0].isReg() && operands[1].isReg()) {
|
||||
// Doesn't zero extend the destination.
|
||||
out->_operands[0]._extendByteMask = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (instRmInfo.flags & InstDB::RWInfoRm::kFlagPextrw) {
|
||||
if (opCount == 3 && Reg::isMm(operands[1])) {
|
||||
out->_rmFeature = 0;
|
||||
rmOpsMask = 0;
|
||||
}
|
||||
}
|
||||
else if (instRmInfo.flags & InstDB::RWInfoRm::kFlagFeatureIfRMI) {
|
||||
if (opCount != 3 || !operands[2].isImm()) {
|
||||
out->_rmFeature = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rmOpsMask &= instRmInfo.rmOpsMask;
|
||||
if (rmOpsMask) {
|
||||
Support::BitWordIterator<uint32_t> it(rmOpsMask);
|
||||
@ -916,6 +957,9 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
// used to move between GP, segment, control and debug registers. Moving between GP registers also allow to
|
||||
// use memory operand.
|
||||
|
||||
// We will again set the flag if it's actually a move from GP to GP register, otherwise this flag cannot be set.
|
||||
out->_instFlags &= ~InstRWFlags::kMovOp;
|
||||
|
||||
if (opCount == 2) {
|
||||
if (operands[0].isReg() && operands[1].isReg()) {
|
||||
const Reg& o0 = operands[0].as<Reg>();
|
||||
@ -926,6 +970,7 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
out->_operands[1].reset(R | RegM, operands[1].size());
|
||||
|
||||
rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
|
||||
out->_instFlags |= InstRWFlags::kMovOp;
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
@ -1618,6 +1663,68 @@ UNIT(x86_inst_api_text) {
|
||||
"Instructions do not match \"%s\" (#%u) != \"%s\" (#%u)", aName.data(), a, bName.data(), b);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename... Args>
|
||||
static Error queryRWInfoSimple(InstRWInfo* out, Arch arch, InstId instId, InstOptions options, Args&&... args) {
|
||||
BaseInst inst(instId);
|
||||
inst.addOptions(options);
|
||||
Operand_ opArray[] = { std::forward<Args>(args)... };
|
||||
return InstInternal::queryRWInfo(arch, inst, opArray, sizeof...(args), out);
|
||||
}
|
||||
|
||||
UNIT(x86_inst_api_rm_feature) {
|
||||
INFO("Verifying whether RM/feature is reported correctly for PEXTRW instruction");
|
||||
{
|
||||
InstRWInfo rwi;
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdPextrw, InstOptions::kNone, eax, mm1, imm(1));
|
||||
EXPECT(rwi.rmFeature() == 0);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdPextrw, InstOptions::kNone, eax, xmm1, imm(1));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kSSE4_1);
|
||||
}
|
||||
|
||||
INFO("Verifying whether RM/feature is reported correctly for AVX512 shift instructions");
|
||||
{
|
||||
InstRWInfo rwi;
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslld, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllq, InstOptions::kNone, ymm1, ymm2, imm(8));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrad, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrld, InstOptions::kNone, ymm1, ymm2, imm(8));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrlq, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslldq, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllw, InstOptions::kNone, ymm1, ymm2, imm(8));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsraw, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrldq, InstOptions::kNone, ymm1, ymm2, imm(8));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrlw, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslld, InstOptions::kNone, xmm1, xmm2, xmm3);
|
||||
EXPECT(rwi.rmFeature() == 0);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllw, InstOptions::kNone, xmm1, xmm2, xmm3);
|
||||
EXPECT(rwi.rmFeature() == 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
ASMJIT_END_SUB_NAMESPACE
|
||||
|
3022
3rdparty/asmjit/src/asmjit/x86/x86instdb.cpp
vendored
3022
3rdparty/asmjit/src/asmjit/x86/x86instdb.cpp
vendored
File diff suppressed because it is too large
Load Diff
2
3rdparty/asmjit/src/asmjit/x86/x86instdb.h
vendored
2
3rdparty/asmjit/src/asmjit/x86/x86instdb.h
vendored
@ -461,7 +461,7 @@ struct InstInfo {
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
//! Returns common information, see `CommonInfo`.
|
||||
//! Returns common information, see \ref CommonInfo.
|
||||
inline const CommonInfo& commonInfo() const noexcept { return _commonInfoTable[_commonInfoIndex]; }
|
||||
|
||||
//! Returns instruction flags, see \ref Flags.
|
||||
|
17
3rdparty/asmjit/src/asmjit/x86/x86instdb_p.h
vendored
17
3rdparty/asmjit/src/asmjit/x86/x86instdb_p.h
vendored
@ -189,12 +189,12 @@ enum EncodingId : uint32_t {
|
||||
|
||||
//! Additional information table, provides CPU extensions required to execute an instruction and RW flags.
|
||||
struct AdditionalInfo {
|
||||
//! Features vector.
|
||||
uint8_t _features[6];
|
||||
//! Index to `_instFlagsTable`.
|
||||
uint8_t _instFlagsIndex;
|
||||
//! Index to `_rwFlagsTable`.
|
||||
uint8_t _rwFlagsIndex;
|
||||
//! Reserved for future use.
|
||||
uint8_t _reserved;
|
||||
//! Features vector.
|
||||
uint8_t _features[6];
|
||||
|
||||
inline const uint8_t* featuresBegin() const noexcept { return _features; }
|
||||
inline const uint8_t* featuresEnd() const noexcept { return _features + ASMJIT_ARRAY_SIZE(_features); }
|
||||
@ -259,7 +259,13 @@ struct RWInfoRm {
|
||||
};
|
||||
|
||||
enum Flags : uint8_t {
|
||||
kFlagAmbiguous = 0x01
|
||||
kFlagAmbiguous = 0x01,
|
||||
//! Special semantics for PEXTRW - memory operand can only be used with SSE4.1 instruction and it's forbidden in MMX.
|
||||
kFlagPextrw = 0x02,
|
||||
//! Special semantics for MOVSS and MOVSD - doesn't zero extend the destination if the operation is a reg to reg move.
|
||||
kFlagMovssMovsd = 0x04,
|
||||
//! Special semantics for AVX shift instructions that do not provide reg/mem in AVX/AVX2 mode (AVX-512 is required).
|
||||
kFlagFeatureIfRMI = 0x08
|
||||
};
|
||||
|
||||
uint8_t category;
|
||||
@ -283,6 +289,7 @@ extern const RWInfo rwInfoB[];
|
||||
extern const RWInfoOp rwInfoOp[];
|
||||
extern const RWInfoRm rwInfoRm[];
|
||||
extern const RWFlagsInfoTable _rwFlagsInfoTable[];
|
||||
extern const InstRWFlags _instFlagsTable[];
|
||||
|
||||
extern const uint32_t _mainOpcodeTable[];
|
||||
extern const uint32_t _altOpcodeTable[];
|
||||
|
48
3rdparty/asmjit/src/asmjit/x86/x86rapass.cpp
vendored
48
3rdparty/asmjit/src/asmjit/x86/x86rapass.cpp
vendored
@ -126,6 +126,12 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& i
|
||||
bool hasGpbHiConstraint = false;
|
||||
uint32_t singleRegOps = 0;
|
||||
|
||||
// Copy instruction RW flags to instruction builder except kMovOp, which is propagated manually later.
|
||||
ib.addInstRWFlags(rwInfo.instFlags() & ~InstRWFlags::kMovOp);
|
||||
|
||||
// Mask of all operand types used by the instruction - can be used as an optimization later.
|
||||
uint32_t opTypesMask = 0u;
|
||||
|
||||
if (opCount) {
|
||||
// The mask is for all registers, but we are mostly interested in AVX-512 registers at the moment. The mask
|
||||
// will be combined with all available registers of the Compiler at the end so we it never use more registers
|
||||
@ -167,6 +173,8 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& i
|
||||
const Operand& op = opArray[i];
|
||||
const OpRWInfo& opRwInfo = rwInfo.operand(i);
|
||||
|
||||
opTypesMask |= 1u << uint32_t(op.opType());
|
||||
|
||||
if (op.isReg()) {
|
||||
// Register Operand
|
||||
// ----------------
|
||||
@ -394,6 +402,24 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& i
|
||||
}
|
||||
}
|
||||
|
||||
// If this instruction has move semantics then check whether it could be eliminated if all virtual registers
|
||||
// are allocated into the same register. Take into account the virtual size of the destination register as that's
|
||||
// more important than a physical register size in this case.
|
||||
if (rwInfo.hasInstFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg() && Support::bitTest(opTypesMask, uint32_t(OperandType::kReg))) {
|
||||
// AVX+ move instructions have 3 operand form - the first two operands must be the same to guarantee move semantics.
|
||||
if (opCount == 2 || (opCount == 3 && opArray[0] == opArray[1])) {
|
||||
uint32_t vIndex = Operand::virtIdToIndex(opArray[0].as<Reg>().id());
|
||||
if (vIndex < Operand::kVirtIdCount) {
|
||||
const VirtReg* vReg = _cc->virtRegByIndex(vIndex);
|
||||
const OpRWInfo& opRwInfo = rwInfo.operand(0);
|
||||
|
||||
uint64_t remainingByteMask = vReg->workReg()->regByteMask() & ~opRwInfo.writeByteMask();
|
||||
if (remainingByteMask == 0u || (remainingByteMask & opRwInfo.extendByteMask()) == 0)
|
||||
ib.addInstRWFlags(InstRWFlags::kMovOp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle X86 constraints.
|
||||
if (hasGpbHiConstraint) {
|
||||
for (RATiedReg& tiedReg : ib) {
|
||||
@ -1251,6 +1277,10 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no
|
||||
|
||||
// Rewrite virtual registers into physical registers.
|
||||
if (raInst) {
|
||||
// This data is allocated by Zone passed to `runOnFunction()`, which will be reset after the RA pass finishes.
|
||||
// So reset this data to prevent having a dead pointer after the RA pass is complete.
|
||||
node->resetPassData();
|
||||
|
||||
// If the instruction contains pass data (raInst) then it was a subject for register allocation and must be
|
||||
// rewritten to use physical regs.
|
||||
RATiedReg* tiedRegs = raInst->tiedRegs();
|
||||
@ -1274,16 +1304,25 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no
|
||||
}
|
||||
}
|
||||
|
||||
// Transform VEX instruction to EVEX when necessary.
|
||||
if (raInst->isTransformable()) {
|
||||
if (maxRegId > 15) {
|
||||
// Transform VEX instruction to EVEX.
|
||||
inst->setId(transformVexToEvex(inst->id()));
|
||||
}
|
||||
}
|
||||
|
||||
// This data is allocated by Zone passed to `runOnFunction()`, which will be reset after the RA pass finishes.
|
||||
// So reset this data to prevent having a dead pointer after the RA pass is complete.
|
||||
node->resetPassData();
|
||||
// Remove moves that do not do anything.
|
||||
//
|
||||
// Usually these moves are inserted during code generation and originally they used different registers. If RA
|
||||
// allocated these into the same register such redundant mov would appear.
|
||||
if (raInst->hasInstRWFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg()) {
|
||||
if (inst->opCount() == 2) {
|
||||
if (inst->op(0) == inst->op(1)) {
|
||||
cc()->removeNode(node);
|
||||
goto Next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ASMJIT_UNLIKELY(node->type() != NodeType::kInst)) {
|
||||
// FuncRet terminates the flow, it must either be removed if the exit label is next to it (optimization) or
|
||||
@ -1327,6 +1366,7 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no
|
||||
}
|
||||
}
|
||||
|
||||
Next:
|
||||
node = next;
|
||||
}
|
||||
|
||||
|
119
3rdparty/asmjit/test/asmjit_test_compiler_x86.cpp
vendored
119
3rdparty/asmjit/test/asmjit_test_compiler_x86.cpp
vendored
@ -3819,6 +3819,124 @@ public:
|
||||
static uint32_t calledFunc(uint32_t x) { return x + 1; }
|
||||
};
|
||||
|
||||
// x86::Compiler - X86Test_FuncCallAVXClobber
|
||||
// ==========================================
|
||||
|
||||
class X86Test_FuncCallAVXClobber : public X86TestCase {
|
||||
public:
|
||||
X86Test_FuncCallAVXClobber() : X86TestCase("FuncCallAVXClobber") {}
|
||||
|
||||
static void add(TestApp& app) {
|
||||
const CpuInfo& cpuInfo = CpuInfo::host();
|
||||
|
||||
if (cpuInfo.features().x86().hasAVX2() && sizeof(void*) == 8)
|
||||
app.add(new X86Test_FuncCallAVXClobber());
|
||||
}
|
||||
|
||||
virtual void compile(x86::Compiler& cc) {
|
||||
FuncNode* mainFunc = cc.addFunc(FuncSignatureT<void, void*, const void*, const void*>(CallConvId::kHost));
|
||||
mainFunc->frame().setAvxEnabled();
|
||||
mainFunc->frame().setAvxCleanup();
|
||||
|
||||
// We need a Windows calling convention to test this properly also on a non-Windows machine.
|
||||
FuncNode* helperFunc = cc.newFunc(FuncSignatureT<void, void*, const void*>(CallConvId::kX64Windows));
|
||||
helperFunc->frame().setAvxEnabled();
|
||||
helperFunc->frame().setAvxCleanup();
|
||||
|
||||
{
|
||||
size_t i;
|
||||
|
||||
x86::Gp dPtr = cc.newIntPtr("dPtr");
|
||||
x86::Gp aPtr = cc.newIntPtr("aPtr");
|
||||
x86::Gp bPtr = cc.newIntPtr("bPtr");
|
||||
x86::Gp tPtr = cc.newIntPtr("tPtr");
|
||||
x86::Ymm acc[8];
|
||||
x86::Mem stack = cc.newStack(32, 1, "stack");
|
||||
|
||||
mainFunc->setArg(0, dPtr);
|
||||
mainFunc->setArg(1, aPtr);
|
||||
mainFunc->setArg(2, bPtr);
|
||||
|
||||
cc.lea(tPtr, stack);
|
||||
for (i = 0; i < 8; i++) {
|
||||
acc[i] = cc.newYmm("acc%zu", i);
|
||||
cc.vmovdqu(acc[i], x86::ptr(aPtr));
|
||||
}
|
||||
|
||||
InvokeNode* invokeNode;
|
||||
cc.invoke(&invokeNode,
|
||||
helperFunc->label(),
|
||||
FuncSignatureT<void, void*, const void*>(CallConvId::kX64Windows));
|
||||
invokeNode->setArg(0, tPtr);
|
||||
invokeNode->setArg(1, bPtr);
|
||||
|
||||
for (i = 1; i < 8; i++) {
|
||||
cc.vpaddd(acc[0], acc[0], acc[i]);
|
||||
}
|
||||
|
||||
cc.vpaddd(acc[0], acc[0], x86::ptr(tPtr));
|
||||
cc.vmovdqu(x86::ptr(dPtr), acc[0]);
|
||||
|
||||
cc.endFunc();
|
||||
}
|
||||
|
||||
{
|
||||
cc.addFunc(helperFunc);
|
||||
|
||||
x86::Gp dPtr = cc.newIntPtr("dPtr");
|
||||
x86::Gp aPtr = cc.newIntPtr("aPtr");
|
||||
|
||||
helperFunc->setArg(0, dPtr);
|
||||
helperFunc->setArg(1, aPtr);
|
||||
|
||||
x86::Gp tmp = cc.newIntPtr("tmp");
|
||||
x86::Ymm acc = cc.newYmm("acc");
|
||||
|
||||
cc.mov(tmp, 1);
|
||||
cc.vmovd(acc.xmm(), tmp);
|
||||
cc.vpbroadcastd(acc, acc.xmm());
|
||||
cc.vpaddd(acc, acc, x86::ptr(aPtr));
|
||||
cc.vmovdqu(x86::ptr(dPtr), acc);
|
||||
|
||||
cc.endFunc();
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool run(void* _func, String& result, String& expect) {
|
||||
typedef void (*Func)(void*, const void*, const void*);
|
||||
Func func = ptr_as_func<Func>(_func);
|
||||
|
||||
size_t i;
|
||||
|
||||
static const uint32_t aData[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
|
||||
static const uint32_t bData[8] = { 6, 3, 5, 9, 1, 8, 7, 2 };
|
||||
|
||||
uint32_t resultData[8];
|
||||
uint32_t expectData[8];
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
expectData[i] = aData[i] * 8 + bData[i] + 1;
|
||||
|
||||
func(resultData, aData, bData);
|
||||
|
||||
result.assign("{");
|
||||
expect.assign("{");
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
result.appendFormat("%u", resultData[i]);
|
||||
expect.appendFormat("%u", expectData[i]);
|
||||
|
||||
if (i != 7) result.append(", ");
|
||||
if (i != 7) expect.append(", ");
|
||||
}
|
||||
|
||||
result.append("}");
|
||||
expect.append("}");
|
||||
|
||||
return result == expect;
|
||||
}
|
||||
};
|
||||
|
||||
// x86::Compiler - X86Test_MiscLocalConstPool
|
||||
// ==========================================
|
||||
|
||||
@ -4186,6 +4304,7 @@ void compiler_add_x86_tests(TestApp& app) {
|
||||
app.addT<X86Test_FuncCallMisc4>();
|
||||
app.addT<X86Test_FuncCallMisc5>();
|
||||
app.addT<X86Test_FuncCallMisc6>();
|
||||
app.addT<X86Test_FuncCallAVXClobber>();
|
||||
|
||||
// Miscellaneous tests.
|
||||
app.addT<X86Test_MiscLocalConstPool>();
|
||||
|
78
3rdparty/asmjit/tools/tablegen-x86.js
vendored
78
3rdparty/asmjit/tools/tablegen-x86.js
vendored
@ -1786,6 +1786,7 @@ class AdditionalInfoTable extends core.Task {
|
||||
run() {
|
||||
const insts = this.ctx.insts;
|
||||
const rwInfoTable = new IndexedArray();
|
||||
const instFlagsTable = new IndexedArray();
|
||||
const additionaInfoTable = new IndexedArray();
|
||||
|
||||
// If the instruction doesn't read any flags it should point to the first index.
|
||||
@ -1800,9 +1801,48 @@ class AdditionalInfoTable extends core.Task {
|
||||
var [r, w] = this.rwFlagsOf(dbInsts);
|
||||
const rData = r.map(function(flag) { return `FLAG(${flag})`; }).join(" | ") || "0";
|
||||
const wData = w.map(function(flag) { return `FLAG(${flag})`; }).join(" | ") || "0";
|
||||
const rwDataIndex = rwInfoTable.addIndexed(`{ ${rData}, ${wData} }`);
|
||||
const instFlags = Object.create(null);
|
||||
|
||||
inst.additionalInfoIndex = additionaInfoTable.addIndexed(`{ { ${features} }, ${rwDataIndex}, 0 }`);
|
||||
switch (inst.name) {
|
||||
case "kmovb":
|
||||
case "kmovd":
|
||||
case "kmovq":
|
||||
case "kmovw":
|
||||
case "mov":
|
||||
case "movq":
|
||||
case "movsd":
|
||||
case "movss":
|
||||
case "movapd":
|
||||
case "movaps":
|
||||
case "movdqa":
|
||||
case "movdqu":
|
||||
case "movupd":
|
||||
case "movups":
|
||||
case "vmovapd":
|
||||
case "vmovaps":
|
||||
case "vmovdqa":
|
||||
case "vmovdqa8":
|
||||
case "vmovdqa16":
|
||||
case "vmovdqa32":
|
||||
case "vmovdqa64":
|
||||
case "vmovdqu":
|
||||
case "vmovdqu8":
|
||||
case "vmovdqu16":
|
||||
case "vmovdqu32":
|
||||
case "vmovdqu64":
|
||||
case "vmovq":
|
||||
case "vmovsd":
|
||||
case "vmovss":
|
||||
case "vmovupd":
|
||||
case "vmovups":
|
||||
instFlags["MovOp"] = true;
|
||||
break;
|
||||
}
|
||||
|
||||
const instFlagsIndex = instFlagsTable.addIndexed("InstRWFlags(" + CxxUtils.flags(instFlags, (f) => { return `FLAG(${f})`; }, "FLAG(None)") + ")");
|
||||
const rwInfoIndex = rwInfoTable.addIndexed(`{ ${rData}, ${wData} }`);
|
||||
|
||||
inst.additionalInfoIndex = additionaInfoTable.addIndexed(`{ ${instFlagsIndex}, ${rwInfoIndex}, { ${features} } }`);
|
||||
});
|
||||
|
||||
var s = `#define EXT(VAL) uint32_t(CpuFeatures::X86::k##VAL)\n` +
|
||||
@ -1811,8 +1851,12 @@ class AdditionalInfoTable extends core.Task {
|
||||
`\n` +
|
||||
`#define FLAG(VAL) uint32_t(CpuRWFlags::kX86_##VAL)\n` +
|
||||
`const InstDB::RWFlagsInfoTable InstDB::_rwFlagsInfoTable[] = {\n${StringUtils.format(rwInfoTable, kIndent, true)}\n};\n` +
|
||||
`#undef FLAG\n` +
|
||||
`\n` +
|
||||
`#define FLAG(VAL) uint32_t(InstRWFlags::k##VAL)\n` +
|
||||
`const InstRWFlags InstDB::_instFlagsTable[] = {\n${StringUtils.format(instFlagsTable, kIndent, true)}\n};\n` +
|
||||
`#undef FLAG\n`;
|
||||
this.inject("AdditionalInfoTable", disclaimer(s), additionaInfoTable.length * 8 + rwInfoTable.length * 8);
|
||||
this.inject("AdditionalInfoTable", disclaimer(s), additionaInfoTable.length * 8 + rwInfoTable.length * 8 + instFlagsTable.length * 4);
|
||||
}
|
||||
|
||||
rwFlagsOf(dbInsts) {
|
||||
@ -2030,7 +2074,12 @@ class InstRWInfoTable extends core.Task {
|
||||
"InstDB::RWInfoRm::kCategory" + rmInfo.category.padEnd(10),
|
||||
StringUtils.decToHex(rmInfo.rmIndexes, 2),
|
||||
String(Math.max(rmInfo.memFixed, 0)).padEnd(2),
|
||||
CxxUtils.flags({ "InstDB::RWInfoRm::kFlagAmbiguous": Boolean(rmInfo.memAmbiguous) }),
|
||||
CxxUtils.flags({
|
||||
"InstDB::RWInfoRm::kFlagAmbiguous": Boolean(rmInfo.memAmbiguous),
|
||||
"InstDB::RWInfoRm::kFlagMovssMovsd": Boolean(inst.name === "movss" || inst.name === "movsd"),
|
||||
"InstDB::RWInfoRm::kFlagPextrw": Boolean(inst.name === "pextrw"),
|
||||
"InstDB::RWInfoRm::kFlagFeatureIfRMI": Boolean(rmInfo.memExtensionIfRMI)
|
||||
}),
|
||||
rmInfo.memExtension === "None" ? "0" : "uint32_t(CpuFeatures::X86::k" + rmInfo.memExtension + ")"
|
||||
);
|
||||
|
||||
@ -2284,7 +2333,8 @@ class InstRWInfoTable extends core.Task {
|
||||
memFixed: this.rmFixedSize(dbInsts),
|
||||
memAmbiguous: this.rmIsAmbiguous(dbInsts),
|
||||
memConsistent: this.rmIsConsistent(dbInsts),
|
||||
memExtension: this.rmExtension(dbInsts)
|
||||
memExtension: this.rmExtension(dbInsts),
|
||||
memExtensionIfRMI: this.rmExtensionIfRMI(dbInsts)
|
||||
};
|
||||
|
||||
if (info.memFixed !== -1)
|
||||
@ -2493,14 +2543,32 @@ class InstRWInfoTable extends core.Task {
|
||||
case "pextrw":
|
||||
return "SSE4_1";
|
||||
|
||||
case "vpslld":
|
||||
case "vpsllq":
|
||||
case "vpsrad":
|
||||
case "vpsrld":
|
||||
case "vpsrlq":
|
||||
return "AVX512_F";
|
||||
|
||||
case "vpslldq":
|
||||
case "vpsllw":
|
||||
case "vpsraw":
|
||||
case "vpsrldq":
|
||||
case "vpsrlw":
|
||||
return "AVX512_BW";
|
||||
|
||||
default:
|
||||
return "None";
|
||||
}
|
||||
}
|
||||
|
||||
rmExtensionIfRMI(dbInsts) {
|
||||
if (!dbInsts.length)
|
||||
return 0;
|
||||
|
||||
const name = dbInsts[0].name;
|
||||
return /^(vpslld|vpsllq|vpsrad|vpsrld|vpsrlq|vpslldq|vpsllw|vpsraw|vpsrldq|vpsrlw)$/.test(name);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
Loading…
Reference in New Issue
Block a user