diff --git a/3rdparty/asmjit/CMakeLists.txt b/3rdparty/asmjit/CMakeLists.txt index 9d459f00cad..aa6df9d6111 100644 --- a/3rdparty/asmjit/CMakeLists.txt +++ b/3rdparty/asmjit/CMakeLists.txt @@ -1,38 +1,16 @@ -cmake_minimum_required(VERSION 3.8 FATAL_ERROR) +cmake_minimum_required(VERSION 3.19 FATAL_ERROR) -cmake_policy(PUSH) - -if (POLICY CMP0063) - cmake_policy(SET CMP0063 NEW) # Honor visibility properties. -endif() - -if (POLICY CMP0092) - cmake_policy(SET CMP0092 NEW) # Don't add -W3 warning level by default. -endif() - -# Don't create a project if it was already created by another CMakeLists.txt. -# This allows one library to embed another library without making a collision. +# Don't create a project if it was already created by another CMakeLists.txt. This makes +# it possible to support both add_subdirectory() and include() ways of using AsmJit as a +# dependency. if (NOT CMAKE_PROJECT_NAME OR "${CMAKE_PROJECT_NAME}" STREQUAL "asmjit") project(asmjit CXX) endif() include(CheckCXXCompilerFlag) -INCLUDE(CheckCXXSourceCompiles) +include(CheckCXXSourceCompiles) include(GNUInstallDirs) -# AsmJit - Deprecated -# =================== - -if (DEFINED ASMJIT_BUILD_EMBED) - message(DEPRECATION "ASMJIT_BUILD_EMBED is deprecated, use ASMJIT_EMBED") - set(ASMJIT_EMBED "${ASMJIT_BUILD_EMBED}") -endif() - -if (DEFINED ASMJIT_BUILD_STATIC) - message(DEPRECATION "ASMJIT_BUILD_STATIC is deprecated, use ASMJIT_STATIC") - set(ASMJIT_STATIC "${ASMJIT_BUILD_STATIC}") -endif() - # AsmJit - Configuration - Build # ============================== @@ -212,18 +190,14 @@ function(asmjit_add_target target target_type) add_library(${target} ${target_type} ${X_SOURCES}) endif() - set_target_properties(${target} PROPERTIES DEFINE_SYMBOL "") - target_link_libraries(${target} PRIVATE ${X_LIBRARIES}) - - # target_link_options was added in cmake v3.13, don't use it for now... - foreach(link_flag ${ASMJIT_PRIVATE_LFLAGS}) - set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS " ${link_flag}") - endforeach() - - target_compile_features(${target} PUBLIC cxx_std_11) - set_property(TARGET ${target} PROPERTY CXX_EXTENSIONS NO) - set_property(TARGET ${target} PROPERTY CXX_VISIBILITY_PRESET hidden) + set_target_properties(${target} + PROPERTIES + DEFINE_SYMBOL "" + CXX_VISIBILITY_PRESET hidden) target_compile_options(${target} PRIVATE ${X_CFLAGS} ${ASMJIT_SANITIZE_CFLAGS} $<$:${X_CFLAGS_DBG}> $<$>:${X_CFLAGS_REL}>) + target_compile_features(${target} PUBLIC cxx_std_11) + target_link_options(${target} PRIVATE ${ASMJIT_PRIVATE_LFLAGS}) + target_link_libraries(${target} PRIVATE ${X_LIBRARIES}) if ("${target_type}" STREQUAL "TEST") add_test(NAME ${target} COMMAND ${target}) @@ -590,10 +564,8 @@ if (NOT ASMJIT_EMBED) $ $) - # Add blend2d::blend2d alias. + # Create an asmjit::asmjit alias. add_library(asmjit::asmjit ALIAS asmjit) - # TODO: [CMAKE] Deprecated alias - we use projectname::libraryname convention now. - add_library(AsmJit::AsmJit ALIAS asmjit) # Add AsmJit install instructions (library and public headers). if (NOT ASMJIT_NO_INSTALL) @@ -713,5 +685,3 @@ if (NOT ASMJIT_EMBED) endif() endif() - -cmake_policy(POP) diff --git a/3rdparty/asmjit/src/asmjit/arm/a64assembler.cpp b/3rdparty/asmjit/src/asmjit/arm/a64assembler.cpp index c926766af6f..32514b9bad9 100644 --- a/3rdparty/asmjit/src/asmjit/arm/a64assembler.cpp +++ b/3rdparty/asmjit/src/asmjit/arm/a64assembler.cpp @@ -53,6 +53,21 @@ static constexpr uint32_t kWX = InstDB::kWX; static const uint8_t armShiftOpToLdStOptMap[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) }; #undef VALUE +// a64::Assembler - ExtendOpToRegType +// ================================== + +static inline RegType extendOptionToRegType(uint32_t option) noexcept { + uint32_t pred = (uint32_t(RegType::kARM_GpW) << (0x0 * 4)) | // 0b000 - UXTB. + (uint32_t(RegType::kARM_GpW) << (0x1 * 4)) | // 0b001 - UXTH. + (uint32_t(RegType::kARM_GpW) << (0x2 * 4)) | // 0b010 - UXTW. + (uint32_t(RegType::kARM_GpX) << (0x3 * 4)) | // 0b011 - UXTX|LSL. + (uint32_t(RegType::kARM_GpW) << (0x4 * 4)) | // 0b100 - SXTB. + (uint32_t(RegType::kARM_GpW) << (0x5 * 4)) | // 0b101 - SXTH. + (uint32_t(RegType::kARM_GpW) << (0x6 * 4)) | // 0b110 - SXTW. + (uint32_t(RegType::kARM_GpX) << (0x7 * 4)) ; // 0b111 - SXTX. + return RegType((pred >> (option * 4u)) & 0xFu); +} + // asmjit::a64::Assembler - SizeOp // =============================== @@ -1228,9 +1243,6 @@ Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, co } if (isign4 == ENC_OPS3(Reg, Reg, Reg) || isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) { - if (!checkSignature(o1, o2)) - goto InvalidInstruction; - uint32_t opSize = x ? 64 : 32; uint64_t shift = 0; uint32_t sType = uint32_t(ShiftOp::kLSL); @@ -1247,11 +1259,17 @@ Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, co if (sType <= uint32_t(ShiftOp::kASR)) { bool hasSP = o0.as().isSP() || o1.as().isSP(); if (!hasSP) { - if (!checkGpId(o0, o1, kZR)) - goto InvalidPhysId; + if (!checkSignature(o1, o2)) { + goto InvalidInstruction; + } - if (shift >= opSize) + if (!checkGpId(o0, o1, kZR)) { + goto InvalidPhysId; + } + + if (shift >= opSize) { goto InvalidImmediate; + } opcode.reset(uint32_t(opData.shiftedOp) << 21); opcode.addImm(x, 31); @@ -1264,8 +1282,10 @@ Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, co } // SP register can only be used with LSL or Extend. - if (sType != uint32_t(ShiftOp::kLSL)) + if (sType != uint32_t(ShiftOp::kLSL)) { goto InvalidImmediate; + } + sType = x ? uint32_t(ShiftOp::kUXTX) : uint32_t(ShiftOp::kUXTW); } @@ -1273,8 +1293,9 @@ Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, co opcode.reset(uint32_t(opData.extendedOp) << 21); sType -= uint32_t(ShiftOp::kUXTB); - if (sType > 7 || shift > 4) + if (sType > 7 || shift > 4) { goto InvalidImmediate; + } if (!(opcode.get() & B(29))) { // ADD|SUB (extend) - ZR is not allowed. @@ -1287,6 +1308,11 @@ Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, co goto InvalidPhysId; } + // Validate whether the register operands match extend option. + if (o2.as().type() != extendOptionToRegType(sType) || o1.as().type() < o2.as().type()) { + goto InvalidInstruction; + } + opcode.addImm(x, 31); opcode.addReg(o2, 16); opcode.addImm(sType, 13); @@ -1412,9 +1438,6 @@ Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, co } if (isign4 == ENC_OPS2(Reg, Reg) || isign4 == ENC_OPS3(Reg, Reg, Imm)) { - if (!checkSignature(o0, o1)) - goto InvalidInstruction; - uint32_t opSize = x ? 64 : 32; uint32_t sType = 0; uint64_t shift = 0; @@ -1429,8 +1452,13 @@ Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, co // Shift operation - LSL, LSR, ASR. if (sType <= uint32_t(ShiftOp::kASR)) { if (!hasSP) { - if (shift >= opSize) + if (!checkSignature(o0, o1)) { + goto InvalidInstruction; + } + + if (shift >= opSize) { goto InvalidImmediate; + } opcode.reset(uint32_t(opData.shiftedOp) << 21); opcode.addImm(x, 31); @@ -1451,8 +1479,14 @@ Error Assembler::_emit(InstId instId, const Operand_& o0, const Operand_& o1, co // Extend operation - UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX. sType -= uint32_t(ShiftOp::kUXTB); - if (sType > 7 || shift > 4) + if (sType > 7 || shift > 4) { goto InvalidImmediate; + } + + // Validate whether the register operands match extend option. + if (o1.as().type() != extendOptionToRegType(sType) || o0.as().type() < o1.as().type()) { + goto InvalidInstruction; + } opcode.reset(uint32_t(opData.extendedOp) << 21); opcode.addImm(x, 31); diff --git a/3rdparty/asmjit/src/asmjit/arm/a64emitter.h b/3rdparty/asmjit/src/asmjit/arm/a64emitter.h index 43484344526..15c91e0ba4a 100644 --- a/3rdparty/asmjit/src/asmjit/arm/a64emitter.h +++ b/3rdparty/asmjit/src/asmjit/arm/a64emitter.h @@ -84,6 +84,17 @@ struct EmitterExplicitT { //! \endcond + + //! \name Native Registers + //! \{ + + //! Returns either 32-bit or 64-bit GP register of the given `id` depending on the emitter's architecture. + inline Gp gpz(uint32_t id) const noexcept { return Gp(_emitter()->_gpSignature, id); } + //! Clones the given `reg` to either 32-bit or 64-bit GP register depending on the emitter's architecture. + inline Gp gpz(const Gp& reg) const noexcept { return Gp(_emitter()->_gpSignature, reg.id()); } + + //! \} + //! \name General Purpose Instructions //! \{ diff --git a/3rdparty/asmjit/src/asmjit/arm/a64func.cpp b/3rdparty/asmjit/src/asmjit/arm/a64func.cpp index a88c4d88506..a33a2f2d9d7 100644 --- a/3rdparty/asmjit/src/asmjit/arm/a64func.cpp +++ b/3rdparty/asmjit/src/asmjit/arm/a64func.cpp @@ -13,7 +13,7 @@ ASMJIT_BEGIN_SUB_NAMESPACE(a64) namespace FuncInternal { -static inline bool shouldThreatAsCDecl(CallConvId ccId) noexcept { +static inline bool shouldTreatAsCDecl(CallConvId ccId) noexcept { return ccId == CallConvId::kCDecl || ccId == CallConvId::kStdCall || ccId == CallConvId::kFastCall || @@ -53,7 +53,7 @@ ASMJIT_FAVOR_SIZE Error initCallConv(CallConv& cc, CallConvId ccId, const Enviro cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7); cc.setNaturalStackAlignment(16); - if (shouldThreatAsCDecl(ccId)) { + if (shouldTreatAsCDecl(ccId)) { // ARM doesn't have that many calling conventions as we can find in X86 world, treat most conventions as __cdecl. cc.setId(CallConvId::kCDecl); cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(Gp::kIdOs, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)); diff --git a/3rdparty/asmjit/src/asmjit/arm/a64globals.h b/3rdparty/asmjit/src/asmjit/arm/a64globals.h index 8093885b32c..720b6f151ce 100644 --- a/3rdparty/asmjit/src/asmjit/arm/a64globals.h +++ b/3rdparty/asmjit/src/asmjit/arm/a64globals.h @@ -21,7 +21,7 @@ ASMJIT_BEGIN_SUB_NAMESPACE(a64) //! AArch64 instruction. //! //! \note Only used to hold ARM-specific enumerations and static functions. -struct Inst { +namespace Inst { //! Instruction id. enum Id : uint32_t { // ${InstId:Begin} diff --git a/3rdparty/asmjit/src/asmjit/arm/a64instapi.cpp b/3rdparty/asmjit/src/asmjit/arm/a64instapi.cpp index 97e23fd9452..023be05573e 100644 --- a/3rdparty/asmjit/src/asmjit/arm/a64instapi.cpp +++ b/3rdparty/asmjit/src/asmjit/arm/a64instapi.cpp @@ -137,11 +137,13 @@ Error queryRWInfo(const BaseInst& inst, const Operand_* operands, size_t opCount if (memOp.hasBase()) { op.addOpFlags(OpRWFlags::kMemBaseRead); + if ((memOp.hasIndex() || memOp.hasOffset()) && memOp.isPreOrPost()) { + op.addOpFlags(OpRWFlags::kMemBaseWrite); + } } if (memOp.hasIndex()) { op.addOpFlags(OpRWFlags::kMemIndexRead); - op.addOpFlags(memOp.isPreOrPost() ? OpRWFlags::kMemIndexWrite : OpRWFlags::kNone); } } } @@ -191,10 +193,13 @@ Error queryRWInfo(const BaseInst& inst, const Operand_* operands, size_t opCount if (memOp.hasBase()) { op.addOpFlags(OpRWFlags::kMemBaseRead); + if ((memOp.hasIndex() || memOp.hasOffset()) && memOp.isPreOrPost()) { + op.addOpFlags(OpRWFlags::kMemBaseWrite); + } } if (memOp.hasIndex()) { - op.addOpFlags(memOp.isPreOrPost() ? OpRWFlags::kMemIndexRW : OpRWFlags::kMemIndexRead); + op.addOpFlags(OpRWFlags::kMemIndexRead); } } } diff --git a/3rdparty/asmjit/src/asmjit/arm/a64instdb.cpp b/3rdparty/asmjit/src/asmjit/arm/a64instdb.cpp index b19f8808a63..bc6646e895e 100644 --- a/3rdparty/asmjit/src/asmjit/arm/a64instdb.cpp +++ b/3rdparty/asmjit/src/asmjit/arm/a64instdb.cpp @@ -210,7 +210,7 @@ const InstInfo _instInfoTable[] = { INST(Ldlarb , BaseRM_NoImm , (0b0000100011011111011111, kW , kZR, 0 ) , kRWI_W , 0 , 8 ), // #149 INST(Ldlarh , BaseRM_NoImm , (0b0100100011011111011111, kW , kZR, 0 ) , kRWI_W , 0 , 9 ), // #150 INST(Ldnp , BaseLdpStp , (0b0010100001, 0 , kWX, 31, 2) , kRWI_WW , 0 , 0 ), // #151 - INST(Ldp , BaseLdpStp , (0b0010100101, 0b0010100011, kWX, 31, 2) , kRWI_W , 0 , 1 ), // #152 + INST(Ldp , BaseLdpStp , (0b0010100101, 0b0010100011, kWX, 31, 2) , kRWI_WW , 0 , 1 ), // #152 INST(Ldpsw , BaseLdpStp , (0b0110100101, 0b0110100011, kX , 0 , 2) , kRWI_WW , 0 , 2 ), // #153 INST(Ldr , BaseLdSt , (0b1011100101, 0b10111000010, 0b10111000011, 0b00011000, kWX, 30, 2, Inst::kIdLdur) , kRWI_W , 0 , 0 ), // #154 INST(Ldraa , BaseRM_SImm10 , (0b1111100000100000000001, kX , kZR, 0, 3) , kRWI_W , 0 , 0 ), // #155 diff --git a/3rdparty/asmjit/src/asmjit/arm/a64operand.h b/3rdparty/asmjit/src/asmjit/arm/a64operand.h index c64f20eb445..9e233062b69 100644 --- a/3rdparty/asmjit/src/asmjit/arm/a64operand.h +++ b/3rdparty/asmjit/src/asmjit/arm/a64operand.h @@ -129,17 +129,45 @@ public: //! Resets vector element type to none. ASMJIT_INLINE_NODEBUG void resetElementType() noexcept { _signature.setField(0); } - ASMJIT_INLINE_NODEBUG constexpr bool isVecB8() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits::kSignature | kSignatureElementB); } - ASMJIT_INLINE_NODEBUG constexpr bool isVecH4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits::kSignature | kSignatureElementH); } - ASMJIT_INLINE_NODEBUG constexpr bool isVecS2() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits::kSignature | kSignatureElementS); } - ASMJIT_INLINE_NODEBUG constexpr bool isVecD1() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits::kSignature); } + ASMJIT_INLINE_NODEBUG constexpr bool isVecB8() const noexcept { + return _signature.subset(uint32_t(kBaseSignatureMask) | uint32_t(kSignatureRegElementTypeMask)) == (RegTraits::kSignature | kSignatureElementB); + } - ASMJIT_INLINE_NODEBUG constexpr bool isVecB16() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits::kSignature | kSignatureElementB); } - ASMJIT_INLINE_NODEBUG constexpr bool isVecH8() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits::kSignature | kSignatureElementH); } - ASMJIT_INLINE_NODEBUG constexpr bool isVecS4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits::kSignature | kSignatureElementS); } - ASMJIT_INLINE_NODEBUG constexpr bool isVecD2() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits::kSignature | kSignatureElementD); } - ASMJIT_INLINE_NODEBUG constexpr bool isVecB4x4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits::kSignature | kSignatureElementB4); } - ASMJIT_INLINE_NODEBUG constexpr bool isVecH2x4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits::kSignature | kSignatureElementH2); } + ASMJIT_INLINE_NODEBUG constexpr bool isVecH4() const noexcept { + return _signature.subset(uint32_t(kBaseSignatureMask) | uint32_t(kSignatureRegElementTypeMask)) == (RegTraits::kSignature | kSignatureElementH); + } + + ASMJIT_INLINE_NODEBUG constexpr bool isVecS2() const noexcept { + return _signature.subset(uint32_t(kBaseSignatureMask) | uint32_t(kSignatureRegElementTypeMask)) == (RegTraits::kSignature | kSignatureElementS); + } + + ASMJIT_INLINE_NODEBUG constexpr bool isVecD1() const noexcept { + return _signature.subset(uint32_t(kBaseSignatureMask) | uint32_t(kSignatureRegElementTypeMask)) == (RegTraits::kSignature); + } + + ASMJIT_INLINE_NODEBUG constexpr bool isVecB16() const noexcept { + return _signature.subset(uint32_t(kBaseSignatureMask) | uint32_t(kSignatureRegElementTypeMask)) == (RegTraits::kSignature | kSignatureElementB); + } + + ASMJIT_INLINE_NODEBUG constexpr bool isVecH8() const noexcept { + return _signature.subset(uint32_t(kBaseSignatureMask) | uint32_t(kSignatureRegElementTypeMask)) == (RegTraits::kSignature | kSignatureElementH); + } + + ASMJIT_INLINE_NODEBUG constexpr bool isVecS4() const noexcept { + return _signature.subset(uint32_t(kBaseSignatureMask) | uint32_t(kSignatureRegElementTypeMask)) == (RegTraits::kSignature | kSignatureElementS); + } + + ASMJIT_INLINE_NODEBUG constexpr bool isVecD2() const noexcept { + return _signature.subset(uint32_t(kBaseSignatureMask) | uint32_t(kSignatureRegElementTypeMask)) == (RegTraits::kSignature | kSignatureElementD); + } + + ASMJIT_INLINE_NODEBUG constexpr bool isVecB4x4() const noexcept { + return _signature.subset(uint32_t(kBaseSignatureMask) | uint32_t(kSignatureRegElementTypeMask)) == (RegTraits::kSignature | kSignatureElementB4); + } + + ASMJIT_INLINE_NODEBUG constexpr bool isVecH2x4() const noexcept { + return _signature.subset(uint32_t(kBaseSignatureMask) | uint32_t(kSignatureRegElementTypeMask)) == (RegTraits::kSignature | kSignatureElementH2); + } //! Creates a cloned register with element access. ASMJIT_INLINE_NODEBUG Vec at(uint32_t elementIndex) const noexcept { diff --git a/3rdparty/asmjit/src/asmjit/core.h b/3rdparty/asmjit/src/asmjit/core.h index 66cbce642a3..cb19333ac38 100644 --- a/3rdparty/asmjit/src/asmjit/core.h +++ b/3rdparty/asmjit/src/asmjit/core.h @@ -145,8 +145,7 @@ namespace asmjit { //! ### Supported Backends / Architectures //! //! - **X86** and **X86_64** - Both 32-bit and 64-bit backends tested on CI. -//! - **AArch64** - AArch64 backend is currently only partially tested (there is no native AArch64 runner to test -//! AsmJit Builder/Compiler). +//! - **AArch64** - Tested on CI (Native Apple runners and Linux emulated via QEMU). //! //! ### Static Builds and Embedding //! @@ -740,15 +739,17 @@ namespace asmjit { //! JitAllocator allocator; //! //! // Allocate an executable virtual memory and handle a possible failure. -//! void* p = allocator.alloc(estimatedSize); -//! if (!p) +//! JitAllocator::Span span; +//! Error err = allocator.alloc(span, estimatedSize); +//! +//! if (err != kErrorOk) // <- NOTE: This must be checked, always! //! return 0; //! //! // Now relocate the code to the address provided by the memory allocator. -//! // Please note that this DOESN'T COPY anything to `p`. This function will -//! // store the address in CodeHolder and use relocation entries to patch the -//! // existing code in all sections to respect the base address provided. -//! code.relocateToBase((uint64_t)p); +//! // Please note that this DOESN'T COPY anything to it. This function will +//! // store the address in CodeHolder and use relocation entries to patch +//! // the existing code in all sections to respect the base address provided. +//! code.relocateToBase((uint64_t)span.rx()); //! //! // This is purely optional. There are cases in which the relocation can omit //! // unneeded data, which would shrink the size of address table. If that @@ -761,12 +762,17 @@ namespace asmjit { //! // additional options that can be used to also zero pad sections' virtual //! // size, etc. //! // -//! // With some additional features, copyFlattenData() does roughly this: -//! // for (Section* section : code.sections()) -//! // memcpy((uint8_t*)p + section->offset(), -//! // section->data(), -//! // section->bufferSize()); -//! code.copyFlattenedData(p, codeSize, CopySectionFlags::kPadSectionBuffer); +//! // With some additional features, copyFlattenData() does roughly the following: +//! // +//! // allocator.write([&](JitAllocator::Span& span) { +//! // for (Section* section : code.sections()) { +//! // uint8_t* p = (uint8_t*)span.rw() + section->offset(); +//! // memcpy(p, section->data(), section->bufferSize()); +//! // } +//! // } +//! allocator.write([&](JitAllocator::Span& span) { +//! code.copyFlattenedData(span.rw(), codeSize, CopySectionFlags::kPadSectionBuffer); +//! }); //! //! // Execute the generated function. //! int inA[4] = { 4, 3, 2, 1 }; diff --git a/3rdparty/asmjit/src/asmjit/core/api-config.h b/3rdparty/asmjit/src/asmjit/core/api-config.h index c9199c9d7f6..bbc35067778 100644 --- a/3rdparty/asmjit/src/asmjit/core/api-config.h +++ b/3rdparty/asmjit/src/asmjit/core/api-config.h @@ -54,8 +54,6 @@ // Build Options // ============= -#define ASMJIT_STATIC - // NOTE: Doxygen cannot document macros that are not defined, that's why we have to define them and then undefine // them immediately, so it won't use the macros with its own preprocessor. #ifdef _DOXYGEN @@ -234,7 +232,7 @@ namespace asmjit { #define ASMJIT_ARCH_BITS (ASMJIT_ARCH_X86 | ASMJIT_ARCH_ARM | ASMJIT_ARCH_MIPS | ASMJIT_ARCH_RISCV) #if ASMJIT_ARCH_BITS == 0 #undef ASMJIT_ARCH_BITS - #if defined (__LP64__) || defined(_LP64) + #if defined(__LP64__) || defined(_LP64) #define ASMJIT_ARCH_BITS 64 #else #define ASMJIT_ARCH_BITS 32 diff --git a/3rdparty/asmjit/src/asmjit/core/cpuinfo.cpp b/3rdparty/asmjit/src/asmjit/core/cpuinfo.cpp index c6d35459389..88f85e0f82e 100644 --- a/3rdparty/asmjit/src/asmjit/core/cpuinfo.cpp +++ b/3rdparty/asmjit/src/asmjit/core/cpuinfo.cpp @@ -16,10 +16,6 @@ #endif #endif // ASMJIT_ARCH_X86 -#if !defined(_WIN32) - #include -#endif - #if ASMJIT_ARCH_ARM // Required by various utilities that are required by features detection. #if !defined(_WIN32) @@ -53,6 +49,17 @@ #endif #endif // ASMJIT_ARCH_ARM +#if !defined(_WIN32) && (ASMJIT_ARCH_X86 || ASMJIT_ARCH_ARM) + #include +#endif + +// Unfortunately when compiling in C++11 mode MSVC would warn about unused functions as +// [[maybe_unused]] attribute is not used in that case (it's used only by C++17 mode and later). +#if defined(_MSC_VER) + #pragma warning(push) + #pragma warning(disable: 4505) // unreferenced local function has been removed. +#endif // _MSC_VER + ASMJIT_BEGIN_NAMESPACE // CpuInfo - Detect - Compatibility @@ -198,7 +205,7 @@ static ASMJIT_FAVOR_SIZE void simplifyCpuBrand(char* s) noexcept { if (!c) break; - if (!(c == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@'))) { + if (!(c == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@' || s[1] == '\0'))) { *d++ = c; prev = c; } @@ -1997,4 +2004,8 @@ const CpuInfo& CpuInfo::host() noexcept { return cpuInfoGlobal; } +#if defined(_MSC_VER) + #pragma warning(pop) +#endif // _MSC_VER + ASMJIT_END_NAMESPACE diff --git a/3rdparty/asmjit/src/asmjit/core/emitter.h b/3rdparty/asmjit/src/asmjit/core/emitter.h index 09c72a68eae..3053721c9c6 100644 --- a/3rdparty/asmjit/src/asmjit/core/emitter.h +++ b/3rdparty/asmjit/src/asmjit/core/emitter.h @@ -373,6 +373,9 @@ public: //! Returns the target architecture's GP register size (4 or 8 bytes). ASMJIT_INLINE_NODEBUG uint32_t registerSize() const noexcept { return environment().registerSize(); } + //! Returns a signature of a native general purpose register (either 32-bit or 64-bit depending on the architecture). + ASMJIT_INLINE_NODEBUG OperandSignature gpSignature() const noexcept { return _gpSignature; } + //! Returns instruction alignment. //! //! The following values are returned based on the target architecture: diff --git a/3rdparty/asmjit/src/asmjit/core/jitallocator.cpp b/3rdparty/asmjit/src/asmjit/core/jitallocator.cpp index 760826b0372..44e864105fe 100644 --- a/3rdparty/asmjit/src/asmjit/core/jitallocator.cpp +++ b/3rdparty/asmjit/src/asmjit/core/jitallocator.cpp @@ -744,26 +744,28 @@ void JitAllocator::reset(ResetPolicy resetPolicy) noexcept { JitAllocatorPool& pool = impl->pools[poolId]; JitAllocatorBlock* block = pool.blocks.first(); - JitAllocatorBlock* blockToKeep = nullptr; - if (resetPolicy != ResetPolicy::kHard && uint32_t(impl->options & JitAllocatorOptions::kImmediateRelease) == 0) { - blockToKeep = block; - block = block->next(); - } - - while (block) { - JitAllocatorBlock* next = block->next(); - JitAllocatorImpl_deleteBlock(impl, block); - block = next; - } - pool.reset(); - if (blockToKeep) { - blockToKeep->_listNodes[0] = nullptr; - blockToKeep->_listNodes[1] = nullptr; - JitAllocatorImpl_wipeOutBlock(impl, blockToKeep); - JitAllocatorImpl_insertBlock(impl, blockToKeep); - pool.emptyBlockCount = 1; + if (block) { + JitAllocatorBlock* blockToKeep = nullptr; + if (resetPolicy != ResetPolicy::kHard && uint32_t(impl->options & JitAllocatorOptions::kImmediateRelease) == 0) { + blockToKeep = block; + block = block->next(); + } + + while (block) { + JitAllocatorBlock* next = block->next(); + JitAllocatorImpl_deleteBlock(impl, block); + block = next; + } + + if (blockToKeep) { + blockToKeep->_listNodes[0] = nullptr; + blockToKeep->_listNodes[1] = nullptr; + JitAllocatorImpl_wipeOutBlock(impl, blockToKeep); + JitAllocatorImpl_insertBlock(impl, blockToKeep); + pool.emptyBlockCount = 1; + } } } } @@ -1387,6 +1389,11 @@ static void BitVectorRangeIterator_testRandom(Random& rnd, size_t count) noexcep } } +static void test_jit_allocator_reset_empty() noexcept { + JitAllocator allocator; + allocator.reset(ResetPolicy::kSoft); +} + static void test_jit_allocator_alloc_release() noexcept { size_t kCount = BrokenAPI::hasArg("--quick") ? 20000 : 100000; @@ -1553,6 +1560,7 @@ static void test_jit_allocator_query() noexcept { } UNIT(jit_allocator) { + test_jit_allocator_reset_empty(); test_jit_allocator_alloc_release(); test_jit_allocator_query(); } diff --git a/3rdparty/asmjit/src/asmjit/core/operand.h b/3rdparty/asmjit/src/asmjit/core/operand.h index 8a1eee79b2f..3626779316d 100644 --- a/3rdparty/asmjit/src/asmjit/core/operand.h +++ b/3rdparty/asmjit/src/asmjit/core/operand.h @@ -738,26 +738,24 @@ struct Operand_ { //! Returns a size of a register or an X86 memory operand. //! - //! At the moment only X86 and X86_64 memory operands have a size - other memory operands can use bits that represent - //! size as an additional payload. This means that memory size is architecture specific and should be accessed via - //! \ref x86::Mem::size(). Sometimes when the user knows that the operand is either a register or memory operand this - //! function can be helpful as it avoids casting. - ASMJIT_INLINE_NODEBUG constexpr uint32_t x86RmSize() const noexcept { - return _signature.size(); - } - -#if !defined(ASMJIT_NO_DEPRECATED) - ASMJIT_DEPRECATED("hasSize() is no longer portable - use x86RmSize() instead, if your target is X86/X86_64") - ASMJIT_INLINE_NODEBUG constexpr bool hasSize() const noexcept { return x86RmSize() != 0u; } - - ASMJIT_DEPRECATED("hasSize() is no longer portable - use x86RmSize() instead, if your target is X86/X86_64") - ASMJIT_INLINE_NODEBUG constexpr bool hasSize(uint32_t s) const noexcept { return x86RmSize() == s; } - - ASMJIT_DEPRECATED("size() is no longer portable - use x86RmSize() instead, if your target is X86/X86_64") - ASMJIT_INLINE_NODEBUG constexpr uint32_t size() const noexcept { return _signature.getField(); } -#endif + //! \remarks At the moment only X86 and X86_64 memory operands have a size - other memory operands can use bits + //! that represent size as an additional payload. This means that memory size is architecture specific and should + //! be accessed via \ref x86::Mem::size(). Sometimes when the user knows that the operand is either a register or + //! memory operand this function can be helpful as it avoids casting, but it only works when it targets X86 and X86_64. + ASMJIT_INLINE_NODEBUG constexpr uint32_t x86RmSize() const noexcept { return _signature.size(); } //! \} + +#if !defined(ASMJIT_NO_DEPRECATED) + ASMJIT_DEPRECATED("hasSize() is no longer portable - use x86RmSize() or x86::Mem::hasSize() instead, if your target is X86/X86_64") + ASMJIT_INLINE_NODEBUG constexpr bool hasSize() const noexcept { return x86RmSize() != 0u; } + + ASMJIT_DEPRECATED("hasSize() is no longer portable - use x86RmSize() or x86::Mem::hasSize() instead, if your target is X86/X86_64") + ASMJIT_INLINE_NODEBUG constexpr bool hasSize(uint32_t s) const noexcept { return x86RmSize() == s; } + + ASMJIT_DEPRECATED("size() is no longer portable - use x86RmSize() or x86::Mem::size() instead, if your target is X86/X86_64") + ASMJIT_INLINE_NODEBUG constexpr uint32_t size() const noexcept { return _signature.getField(); } +#endif }; //! Base class representing an operand in AsmJit (default constructed version). @@ -1600,9 +1598,6 @@ public: //! Resets the memory operand's INDEX register. ASMJIT_INLINE_NODEBUG void resetIndex() noexcept { _setIndex(RegType::kNone, 0); } - //! Sets the memory operand size (in bytes). - ASMJIT_INLINE_NODEBUG void setSize(uint32_t size) noexcept { _signature.setField(size); } - //! Tests whether the memory operand has a 64-bit offset or absolute address. //! //! If this is true then `hasBase()` must always report false. @@ -1670,6 +1665,11 @@ public: ASMJIT_INLINE_NODEBUG void resetOffsetLo32() noexcept { setOffsetLo32(0); } //! \} + +#if !defined(ASMJIT_NO_DEPRECATED) + ASMJIT_DEPRECATED("setSize() is no longer portable - use setX86RmSize() or x86::Mem::setSize() instead, if your target is X86/X86_64") + ASMJIT_INLINE_NODEBUG void setSize(uint32_t size) noexcept { _signature.setField(size); } +#endif }; //! Type of the an immediate value. diff --git a/3rdparty/asmjit/src/asmjit/core/radefs_p.h b/3rdparty/asmjit/src/asmjit/core/radefs_p.h index 99e79be3ebb..32503961068 100644 --- a/3rdparty/asmjit/src/asmjit/core/radefs_p.h +++ b/3rdparty/asmjit/src/asmjit/core/radefs_p.h @@ -559,7 +559,7 @@ public: ASMJIT_FORCE_INLINE Error nonOverlappingUnionOf(ZoneAllocator* allocator, const RALiveSpans& x, const RALiveSpans& y, const DataType& yData) noexcept { uint32_t finalSize = x.size() + y.size(); - ASMJIT_PROPAGATE(_data.reserve(allocator, finalSize)); + ASMJIT_PROPAGATE(_data.growingReserve(allocator, finalSize)); T* dstPtr = _data.data(); const T* xSpan = x.data(); @@ -694,7 +694,7 @@ typedef RALiveSpans LiveRegSpans; //! - LEA x{ W|Out}, [x{R|Use} + y{R|Out}] -> {x:R|W|Use|Out y:R|Use} //! //! It should be obvious from the example above how these flags get created. Each operand contains READ/WRITE -//! information, which is then merged to RATiedReg's flags. However, we also need to represent the possitility +//! information, which is then merged to RATiedReg's flags. However, we also need to represent the possibility //! to view the operation as two independent operations - USE and OUT, because the register allocator first //! allocates USE registers, and then assigns OUT registers independently of USE registers. enum class RATiedFlags : uint32_t { @@ -767,6 +767,12 @@ enum class RATiedFlags : uint32_t { // Instruction Flags (Never used by RATiedReg) // ------------------------------------------- + //! Instruction has been patched to address a memory location instead of a register. + //! + //! This is currently only possible on X86 or X86_64 targets. It informs rewriter to rewrite the instruction if + //! necessary. + kInst_RegToMemPatched = 0x40000000u, + //! Instruction is transformable to another instruction if necessary. //! //! This is flag that is only used by \ref RAInst to inform register allocator that the instruction has some diff --git a/3rdparty/asmjit/src/asmjit/core/ralocal.cpp b/3rdparty/asmjit/src/asmjit/core/ralocal.cpp index 16f11a800c5..358fbf58f8b 100644 --- a/3rdparty/asmjit/src/asmjit/core/ralocal.cpp +++ b/3rdparty/asmjit/src/asmjit/core/ralocal.cpp @@ -137,9 +137,6 @@ Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, cons dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap); dst.assignWorkIdsFromPhysIds(); - if (tryMode) - return kErrorOk; - for (RegGroup group : RegGroupVirtValues{}) { // STEP 1 // ------ @@ -597,10 +594,14 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept { if (rmSize <= workReg->virtReg()->virtSize()) { Operand& op = node->operands()[opIndex]; op = _pass->workRegAsMem(workReg); - op.as().setSize(rmSize); + + // NOTE: We cannot use `x86::Mem::setSize()` from here, so let's manipulate the signature directly. + op._signature.setSize(rmSize); + tiedReg->_useRewriteMask = 0; tiedReg->markUseDone(); + raInst->addFlags(RATiedFlags::kInst_RegToMemPatched); usePending--; rmAllocated = true; @@ -687,7 +688,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept { // ------ // // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't allocated yet. This is a bit - // complicated as the allocation is iterative. In some cases we have to wait before allocating a particual + // complicated as the allocation is iterative. In some cases we have to wait before allocating a particular // physical register as it's still occupied by some other one, which we need to move before we can use it. // In this case we skip it and allocate another some other instead (making it free for another iteration). // @@ -836,7 +837,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept { // STEP 9 // ------ // - // Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary. + // Vector registers can be clobbered partially by invoke - find if that's the case and clobber when necessary. if (node->isInvoke() && group == RegGroup::kVec) { const InvokeNode* invokeNode = node->as(); diff --git a/3rdparty/asmjit/src/asmjit/core/rapass_p.h b/3rdparty/asmjit/src/asmjit/core/rapass_p.h index 90d4ae49be5..967624045c5 100644 --- a/3rdparty/asmjit/src/asmjit/core/rapass_p.h +++ b/3rdparty/asmjit/src/asmjit/core/rapass_p.h @@ -335,6 +335,8 @@ public: //! Clears instruction `flags` from this RAInst. ASMJIT_INLINE_NODEBUG void clearFlags(RATiedFlags flags) noexcept { _flags &= ~flags; } + //! Tests whether one operand of this instruction has been patched from Reg to Mem. + ASMJIT_INLINE_NODEBUG bool isRegToMemPatched() const noexcept { return hasFlag(RATiedFlags::kInst_RegToMemPatched); } //! Tests whether this instruction can be transformed to another instruction if necessary. ASMJIT_INLINE_NODEBUG bool isTransformable() const noexcept { return hasFlag(RATiedFlags::kInst_IsTransformable); } diff --git a/3rdparty/asmjit/src/asmjit/core/string.cpp b/3rdparty/asmjit/src/asmjit/core/string.cpp index 123e861983c..369d060e248 100644 --- a/3rdparty/asmjit/src/asmjit/core/string.cpp +++ b/3rdparty/asmjit/src/asmjit/core/string.cpp @@ -14,9 +14,51 @@ ASMJIT_BEGIN_NAMESPACE static const char String_baseN[] = "0123456789ABCDEF"; -constexpr size_t kMinAllocSize = 64; +constexpr size_t kMinAllocSize = 128; constexpr size_t kMaxAllocSize = SIZE_MAX - Globals::kGrowThreshold; +// Based on ZoneVector_growCapacity(). +// +// NOTE: The sizes here include null terminators - that way we can have aligned allocations that are power of 2s +// initially. +static ASMJIT_FORCE_INLINE size_t String_growCapacity(size_t byteSize, size_t minimumByteSize) noexcept { + static constexpr size_t kGrowThreshold = Globals::kGrowThreshold; + + ASMJIT_ASSERT(minimumByteSize < kMaxAllocSize); + + // This is more than exponential growth at the beginning. + if (byteSize < kMinAllocSize) { + byteSize = kMinAllocSize; + } + else if (byteSize < 512) { + byteSize = 512; + } + + if (byteSize < minimumByteSize) { + // Exponential growth before we reach `kGrowThreshold`. + byteSize = Support::alignUpPowerOf2(minimumByteSize); + + // Bail to `minimumByteSize` in case of overflow - most likely whatever that is happening afterwards would just fail. + if (byteSize < minimumByteSize) { + return minimumByteSize; + } + + // Pretty much chunked growth advancing by `kGrowThreshold` after we exceed it. + if (byteSize > kGrowThreshold) { + // Align to kGrowThreshold. + size_t remainder = minimumByteSize % kGrowThreshold; + + byteSize = minimumByteSize + remainder; + + // Bail to `minimumByteSize` in case of overflow. + if (byteSize < minimumByteSize) + return minimumByteSize; + } + } + + return Support::min(byteSize, kMaxAllocSize); +} + // String - Clear & Reset // ====================== @@ -49,13 +91,13 @@ char* String::prepare(ModifyOp op, size_t size) noexcept { size_t curCapacity; if (isLargeOrExternal()) { - curData = this->_large.data; - curSize = this->_large.size; - curCapacity = this->_large.capacity; + curData = _large.data; + curSize = _large.size; + curCapacity = _large.capacity; } else { - curData = this->_small.data; - curSize = this->_small.type; + curData = _small.data; + curSize = _small.type; curCapacity = kSSOCapacity; } @@ -90,25 +132,20 @@ char* String::prepare(ModifyOp op, size_t size) noexcept { } else { // Prevent arithmetic overflow. - if (ASMJIT_UNLIKELY(size >= kMaxAllocSize - curSize)) + if (ASMJIT_UNLIKELY(size >= kMaxAllocSize - curSize - 1)) return nullptr; size_t newSize = size + curSize; size_t newSizePlusOne = newSize + 1; - if (newSizePlusOne > curCapacity) { - size_t newCapacity = Support::max(curCapacity + 1, kMinAllocSize); + if (newSize > curCapacity) { + size_t newCapacityPlusOne = String_growCapacity(size + 1u, newSizePlusOne); + ASMJIT_ASSERT(newCapacityPlusOne >= newSizePlusOne); - if (newCapacity < newSizePlusOne && newCapacity < Globals::kGrowThreshold) - newCapacity = Support::alignUpPowerOf2(newCapacity); - - if (newCapacity < newSizePlusOne) - newCapacity = Support::alignUp(newSizePlusOne, Globals::kGrowThreshold); - - if (ASMJIT_UNLIKELY(newCapacity < newSizePlusOne)) + if (ASMJIT_UNLIKELY(newCapacityPlusOne < newSizePlusOne)) return nullptr; - char* newData = static_cast(::malloc(newCapacity)); + char* newData = static_cast(::malloc(newCapacityPlusOne)); if (ASMJIT_UNLIKELY(!newData)) return nullptr; @@ -119,7 +156,7 @@ char* String::prepare(ModifyOp op, size_t size) noexcept { _large.type = kTypeLarge; _large.size = newSize; - _large.capacity = newCapacity - 1; + _large.capacity = newCapacityPlusOne - 1; _large.data = newData; newData[newSize] = '\0'; @@ -488,9 +525,28 @@ bool String::equals(const char* other, size_t size) const noexcept { // ============== #if defined(ASMJIT_TEST) +static void test_string_grow() noexcept { + String s; + size_t c = s.capacity(); + + INFO("Testing string grow strategy (SSO capacity: %zu)", c); + for (size_t i = 0; i < 1000000; i++) { + s.append('x'); + if (s.capacity() != c) { + c = s.capacity(); + INFO(" String reallocated to new capacity: %zu", c); + } + } + + // We don't expect a 1 million character string to occupy 4MiB, for example. So verify that! + EXPECT_LT(c, size_t(4 * 1024 * 1024)); +} + UNIT(core_string) { String s; + INFO("Testing string functionality"); + EXPECT_FALSE(s.isLargeOrExternal()); EXPECT_FALSE(s.isExternal()); @@ -553,6 +609,8 @@ UNIT(core_string) { EXPECT_TRUE(sTmp.isExternal()); EXPECT_EQ(sTmp.appendChars(' ', 1000), kErrorOk); EXPECT_FALSE(sTmp.isExternal()); + + test_string_grow(); } #endif diff --git a/3rdparty/asmjit/src/asmjit/core/support.h b/3rdparty/asmjit/src/asmjit/core/support.h index 345cf8cf4cd..b5be91bcd3e 100644 --- a/3rdparty/asmjit/src/asmjit/core/support.h +++ b/3rdparty/asmjit/src/asmjit/core/support.h @@ -1615,10 +1615,10 @@ public: ASMJIT_INLINE_NODEBUG bool operator>=(const ArrayReverseIterator& other) const noexcept { return _ptr >= other._ptr; } ASMJIT_INLINE_NODEBUG ArrayReverseIterator& operator++() noexcept { _ptr--; return *this; } - ASMJIT_INLINE_NODEBUG ArrayReverseIterator& operator++(int) noexcept { ArrayReverseIterator prev(*this); _ptr--; return prev; } - ASMJIT_INLINE_NODEBUG ArrayReverseIterator& operator--() noexcept { _ptr++; return *this; } - ASMJIT_INLINE_NODEBUG ArrayReverseIterator& operator--(int) noexcept { ArrayReverseIterator prev(*this); _ptr++; return prev; } + + ASMJIT_INLINE_NODEBUG ArrayReverseIterator operator++(int) noexcept { ArrayReverseIterator prev(*this); _ptr--; return prev; } + ASMJIT_INLINE_NODEBUG ArrayReverseIterator operator--(int) noexcept { ArrayReverseIterator prev(*this); _ptr++; return prev; } template ASMJIT_INLINE_NODEBUG ArrayReverseIterator operator+(const Diff& n) noexcept { return ArrayReverseIterator(_ptr -= n); } template ASMJIT_INLINE_NODEBUG ArrayReverseIterator operator-(const Diff& n) noexcept { return ArrayReverseIterator(_ptr += n); } diff --git a/3rdparty/asmjit/src/asmjit/core/zonestack.h b/3rdparty/asmjit/src/asmjit/core/zonestack.h index 2cf078b3d21..16d5d09dbc1 100644 --- a/3rdparty/asmjit/src/asmjit/core/zonestack.h +++ b/3rdparty/asmjit/src/asmjit/core/zonestack.h @@ -62,7 +62,9 @@ public: ASMJIT_INLINE_NODEBUG void setEnd(T* end) noexcept { _end = (void*)end; } template - ASMJIT_INLINE_NODEBUG T* data() const noexcept { return (T*)((uint8_t*)(this) + sizeof(Block)); } + ASMJIT_INLINE_NODEBUG const T* data() const noexcept { return (const T*)((const uint8_t*)(this) + sizeof(Block)); } + template + ASMJIT_INLINE_NODEBUG T* data() noexcept { return (T*)((uint8_t*)(this) + sizeof(Block)); } template ASMJIT_INLINE_NODEBUG bool canPrepend() const noexcept { return _start > data(); } diff --git a/3rdparty/asmjit/src/asmjit/core/zonevector.cpp b/3rdparty/asmjit/src/asmjit/core/zonevector.cpp index 24860219de2..b68e25abf65 100644 --- a/3rdparty/asmjit/src/asmjit/core/zonevector.cpp +++ b/3rdparty/asmjit/src/asmjit/core/zonevector.cpp @@ -13,8 +13,63 @@ ASMJIT_BEGIN_NAMESPACE // ZoneVectorBase - Helpers // ======================== +// ZoneVector is used as an array to hold short-lived data structures used during code generation. The growing +// strategy is simple - use small capacity at the beginning (very good for ZoneAllocator) and then grow quicker +// to prevent successive reallocations. +static ASMJIT_FORCE_INLINE uint32_t ZoneVector_growCapacity(uint32_t current, uint32_t growMinimum, uint32_t sizeOfT) noexcept { + static constexpr size_t kGrowThreshold = Globals::kGrowThreshold; + + size_t byteSize = size_t(current) * sizeOfT; + size_t minimumByteSize = size_t(growMinimum) * sizeOfT; + + // This is more than exponential growth at the beginning. + if (byteSize < 32) { + byteSize = 32; + } + else if (byteSize < 128) { + byteSize = 128; + } + else if (byteSize < 512) { + byteSize = 512; + } + + if (byteSize < minimumByteSize) { + // Exponential growth before we reach `kGrowThreshold`. + byteSize = Support::alignUpPowerOf2(minimumByteSize); + + // Bail to `growMinimum` in case of overflow - most likely whatever that is happening afterwards would just fail. + if (byteSize < minimumByteSize) { + return growMinimum; + } + + // Pretty much chunked growth advancing by `kGrowThreshold` after we exceed it. + // This should not be a common case, so we don't really have to optimize for it. + if (byteSize > kGrowThreshold) { + // Align to kGrowThreshold. + size_t remainder = minimumByteSize % kGrowThreshold; + + byteSize = minimumByteSize + remainder; + + // Bail to `growMinimum` in case of overflow - should never happen as it's unlikely we would hit this on a 32-bit + // machine (consecutive near 4GiB allocation is impossible, and this should never happen on 64-bit machine as we + // use 32-bit size & capacity, so overflow of 64 bit integer is not possible. Added just as an extreme measure. + if (byteSize < minimumByteSize) + return growMinimum; + } + } + + size_t n = byteSize / sizeOfT; + return uint32_t(Support::min(n, 0xFFFFFFFFu)); +} + +static ASMJIT_FORCE_INLINE bool ZoneVector_byteSizeIsSafe(size_t nBytes, uint32_t n) noexcept { + if (sizeof(uint32_t) < sizeof(size_t)) + return true; // there is no problem when running on a 64-bit machine. + else + return nBytes >= size_t(n); +}; + Error ZoneVectorBase::_grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept { - uint32_t threshold = Globals::kGrowThreshold / sizeOfT; uint32_t capacity = _capacity; uint32_t after = _size; @@ -25,29 +80,7 @@ Error ZoneVectorBase::_grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t if (capacity >= after) return kErrorOk; - // ZoneVector is used as an array to hold short-lived data structures used - // during code generation. The growing strategy is simple - use small capacity - // at the beginning (very good for ZoneAllocator) and then grow quicker to - // prevent successive reallocations. - if (capacity < 4) - capacity = 4; - else if (capacity < 8) - capacity = 8; - else if (capacity < 16) - capacity = 16; - else if (capacity < 64) - capacity = 64; - else if (capacity < 256) - capacity = 256; - - while (capacity < after) { - if (capacity < threshold) - capacity *= 2; - else - capacity += threshold; - } - - return _reserve(allocator, sizeOfT, capacity); + return _reserve(allocator, sizeOfT, ZoneVector_growCapacity(capacity, after, sizeOfT)); } Error ZoneVectorBase::_reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept { @@ -55,8 +88,8 @@ Error ZoneVectorBase::_reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint3 if (oldCapacity >= n) return kErrorOk; - uint32_t nBytes = n * sizeOfT; - if (ASMJIT_UNLIKELY(nBytes < n)) + size_t nBytes = size_t(n) * sizeOfT; + if (ASMJIT_UNLIKELY(!ZoneVector_byteSizeIsSafe(nBytes, n))) return DebugUtils::errored(kErrorOutOfMemory); size_t allocatedBytes; @@ -65,19 +98,28 @@ Error ZoneVectorBase::_reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint3 if (ASMJIT_UNLIKELY(!newData)) return DebugUtils::errored(kErrorOutOfMemory); + uint32_t newCapacity = uint32_t(allocatedBytes / sizeOfT); + ASMJIT_ASSERT(newCapacity >= n); + void* oldData = _data; if (oldData && _size) { memcpy(newData, oldData, size_t(_size) * sizeOfT); allocator->release(oldData, size_t(oldCapacity) * sizeOfT); } - _capacity = uint32_t(allocatedBytes / sizeOfT); - ASMJIT_ASSERT(_capacity >= n); - _data = newData; + _capacity = newCapacity; + return kErrorOk; } +Error ZoneVectorBase::_growingReserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept { + uint32_t capacity = _capacity; + if (capacity >= n) + return kErrorOk; + return _reserve(allocator, sizeOfT, ZoneVector_growCapacity(capacity, n, sizeOfT)); +} + Error ZoneVectorBase::_resize(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept { uint32_t size = _size; @@ -266,6 +308,8 @@ Error ZoneBitVector::_append(ZoneAllocator* allocator, bool value) noexcept { #if defined(ASMJIT_TEST) template static void test_zone_vector(ZoneAllocator* allocator, const char* typeName) { + constexpr uint32_t kMiB = 1024 * 1024; + int i; int kMax = 100000; @@ -301,12 +345,22 @@ static void test_zone_vector(ZoneAllocator* allocator, const char* typeName) { int64_t fsum = 0; int64_t rsum = 0; - for (const T& item : vec) { fsum += item; } - for (auto it = vec.rbegin(); it != vec.rend(); ++it) { rsum += *it; } + for (const T& item : vec) { + fsum += item; + } + + for (auto it = vec.rbegin(); it != vec.rend(); ++it) { + rsum += *it; + } EXPECT_EQ(fsum, rsum); - vec.release(allocator); + + INFO("ZoneBitVector::growingReserve()"); + for (uint32_t j = 0; j < 40 / sizeof(T); j += 8) { + EXPECT_EQ(vec.growingReserve(allocator, j * kMiB), kErrorOk); + EXPECT_GE(vec.capacity(), j * kMiB); + } } static void test_zone_bitvector(ZoneAllocator* allocator) { diff --git a/3rdparty/asmjit/src/asmjit/core/zonevector.h b/3rdparty/asmjit/src/asmjit/core/zonevector.h index 13d28bbefa2..f38dca583a0 100644 --- a/3rdparty/asmjit/src/asmjit/core/zonevector.h +++ b/3rdparty/asmjit/src/asmjit/core/zonevector.h @@ -58,6 +58,7 @@ protected: ASMJIT_API Error _grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept; ASMJIT_API Error _resize(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept; ASMJIT_API Error _reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept; + ASMJIT_API Error _growingReserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept; inline void _swap(ZoneVectorBase& other) noexcept { std::swap(_data, other._data); @@ -414,7 +415,21 @@ public: //! Reallocates the internal array to fit at least `n` items. inline Error reserve(ZoneAllocator* allocator, uint32_t n) noexcept { - return n > _capacity ? ZoneVectorBase::_reserve(allocator, sizeof(T), n) : Error(kErrorOk); + if (ASMJIT_UNLIKELY(n > _capacity)) + return ZoneVectorBase::_reserve(allocator, sizeof(T), n); + else + return Error(kErrorOk); + } + + //! Reallocates the internal array to fit at least `n` items with growing semantics. + //! + //! If the vector is smaller than `n` the same growing calculations will be used as if N items were appended + //! to an empty vector, which means reserving additional space for more append operations that could follow. + inline Error growingReserve(ZoneAllocator* allocator, uint32_t n) noexcept { + if (ASMJIT_UNLIKELY(n > _capacity)) + return ZoneVectorBase::_growingReserve(allocator, sizeof(T), n); + else + return Error(kErrorOk); } inline Error willGrow(ZoneAllocator* allocator, uint32_t n = 1) noexcept { diff --git a/3rdparty/asmjit/src/asmjit/x86/x86assembler.cpp b/3rdparty/asmjit/src/asmjit/x86/x86assembler.cpp index f48c3b2c3b5..35c5502cc66 100644 --- a/3rdparty/asmjit/src/asmjit/x86/x86assembler.cpp +++ b/3rdparty/asmjit/src/asmjit/x86/x86assembler.cpp @@ -345,6 +345,10 @@ static ASMJIT_FORCE_INLINE uint32_t x86AltOpcodeOf(const InstDB::InstInfo* info) return InstDB::_altOpcodeTable[info->_altOpcodeIndex]; } +static ASMJIT_FORCE_INLINE bool x86IsMmxOrXmm(const Reg& reg) noexcept { + return reg.type() == RegType::kX86_Mm || reg.type() == RegType::kX86_Xmm; +} + // x86::Assembler - X86BufferWriter // ================================ @@ -2572,37 +2576,41 @@ CaseFpuArith_Mem: case InstDB::kEncodingExtMovd: CaseExtMovd: - opReg = o0.id(); - opcode.add66hIf(Reg::isXmm(o0)); + if (x86IsMmxOrXmm(o0.as())) { + opReg = o0.id(); + opcode.add66hIf(Reg::isXmm(o0)); - // MM/XMM <- Gp - if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1)) { - rbReg = o1.id(); - goto EmitX86R; - } + // MM/XMM <- Gp + if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1)) { + rbReg = o1.id(); + goto EmitX86R; + } - // MM/XMM <- Mem - if (isign3 == ENC_OPS2(Reg, Mem)) { - rmRel = &o1; - goto EmitX86M; + // MM/XMM <- Mem + if (isign3 == ENC_OPS2(Reg, Mem)) { + rmRel = &o1; + goto EmitX86M; + } } // The following instructions use the secondary opcode. - opcode &= Opcode::kW; - opcode |= x86AltOpcodeOf(instInfo); - opReg = o1.id(); - opcode.add66hIf(Reg::isXmm(o1)); + if (x86IsMmxOrXmm(o1.as())) { + opcode &= Opcode::kW; + opcode |= x86AltOpcodeOf(instInfo); + opReg = o1.id(); + opcode.add66hIf(Reg::isXmm(o1)); - // GP <- MM/XMM - if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o0)) { - rbReg = o0.id(); - goto EmitX86R; - } + // GP <- MM/XMM + if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o0)) { + rbReg = o0.id(); + goto EmitX86R; + } - // Mem <- MM/XMM - if (isign3 == ENC_OPS2(Mem, Reg)) { - rmRel = &o0; - goto EmitX86M; + // Mem <- MM/XMM + if (isign3 == ENC_OPS2(Mem, Reg)) { + rmRel = &o0; + goto EmitX86M; + } } break; diff --git a/3rdparty/asmjit/src/asmjit/x86/x86emitter.h b/3rdparty/asmjit/src/asmjit/x86/x86emitter.h index 4855e9957c3..60881d3ff93 100644 --- a/3rdparty/asmjit/src/asmjit/x86/x86emitter.h +++ b/3rdparty/asmjit/src/asmjit/x86/x86emitter.h @@ -147,8 +147,10 @@ struct EmitterExplicitT { //! \name Native Registers //! \{ - //! Returns either GPD or GPQ register of the given `id` depending on the emitter's architecture. + //! Returns either 32-bit or 64-bit GP register of the given `id` depending on the emitter's architecture. inline Gp gpz(uint32_t id) const noexcept { return Gp(_emitter()->_gpSignature, id); } + //! Clones the given `reg` to either 32-bit or 64-bit GP register depending on the emitter's architecture. + inline Gp gpz(const Gp& reg) const noexcept { return Gp(_emitter()->_gpSignature, reg.id()); } inline Gp zax() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdAx); } inline Gp zcx() const noexcept { return Gp(_emitter()->_gpSignature, Gp::kIdCx); } diff --git a/3rdparty/asmjit/src/asmjit/x86/x86func.cpp b/3rdparty/asmjit/src/asmjit/x86/x86func.cpp index db5b30f86eb..ac73aff8bdd 100644 --- a/3rdparty/asmjit/src/asmjit/x86/x86func.cpp +++ b/3rdparty/asmjit/src/asmjit/x86/x86func.cpp @@ -14,7 +14,7 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) namespace FuncInternal { -static inline bool shouldThreatAsCDeclIn64BitMode(CallConvId ccId) noexcept { +static inline bool shouldTreatAsCDeclIn64BitMode(CallConvId ccId) noexcept { return ccId == CallConvId::kCDecl || ccId == CallConvId::kStdCall || ccId == CallConvId::kThisCall || @@ -143,7 +143,7 @@ ASMJIT_FAVOR_SIZE Error initCallConv(CallConv& cc, CallConvId ccId, const Enviro // Preprocess the calling convention into a common id as many conventions are normally ignored even by C/C++ // compilers and treated as `__cdecl`. - if (shouldThreatAsCDeclIn64BitMode(ccId)) + if (shouldTreatAsCDeclIn64BitMode(ccId)) ccId = winABI ? CallConvId::kX64Windows : CallConvId::kX64SystemV; switch (ccId) { diff --git a/3rdparty/asmjit/src/asmjit/x86/x86instapi.cpp b/3rdparty/asmjit/src/asmjit/x86/x86instapi.cpp index 27671b3adc5..fc17b5cdaea 100644 --- a/3rdparty/asmjit/src/asmjit/x86/x86instapi.cpp +++ b/3rdparty/asmjit/src/asmjit/x86/x86instapi.cpp @@ -895,8 +895,10 @@ Error queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, siz case Inst::kIdVpternlogq: { if (opCount == 4 && operands[3].isImm()) { uint32_t predicate = operands[3].as().valueAs(); + if ((predicate >> 4) == (predicate & 0xF)) { out->_operands[0].clearOpFlags(OpRWFlags::kRead); + out->_operands[0].setReadByteMask(0); } } break; diff --git a/3rdparty/asmjit/src/asmjit/x86/x86instdb.cpp b/3rdparty/asmjit/src/asmjit/x86/x86instdb.cpp index 4269076f1c4..c31ce91019a 100644 --- a/3rdparty/asmjit/src/asmjit/x86/x86instdb.cpp +++ b/3rdparty/asmjit/src/asmjit/x86/x86instdb.cpp @@ -1236,44 +1236,44 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Vminss , VexRvm , V(F30F00,5D,_,I,I,0,2,T1S), 0 , 109, 0 , 264, 147), // #1187 INST(Vmlaunch , X86Op , O(000F01,C2,_,_,_,_,_,_ ), 0 , 23 , 0 , 31 , 67 ), // #1188 INST(Vmload , X86Op_xAX , O(000F01,DA,_,_,_,_,_,_ ), 0 , 23 , 0 , 334, 23 ), // #1189 - INST(Vmmcall , X86Op , O(000F01,D9,_,_,_,_,_,_ ), 0 , 23 , 0 , 335, 23 ), // #1190 - INST(Vmovapd , VexRmMr_Lx , V(660F00,28,_,x,I,1,4,FVM), V(660F00,29,_,x,I,1,4,FVM), 104, 86 , 336, 173), // #1191 - INST(Vmovaps , VexRmMr_Lx , V(000F00,28,_,x,I,0,4,FVM), V(000F00,29,_,x,I,0,4,FVM), 106, 87 , 336, 173), // #1192 - INST(Vmovd , VexMovdMovq , V(660F00,6E,_,0,0,0,2,T1S), V(660F00,7E,_,0,0,0,2,T1S), 198, 88 , 337, 147), // #1193 - INST(Vmovddup , VexRm_Lx , V(F20F00,12,_,x,I,1,3,DUP), 0 , 199, 0 , 338, 145), // #1194 - INST(Vmovdqa , VexRmMr_Lx , V(660F00,6F,_,x,I,_,_,_ ), V(660F00,7F,_,x,I,_,_,_ ), 71 , 89 , 339, 174), // #1195 - INST(Vmovdqa32 , VexRmMr_Lx , E(660F00,6F,_,x,_,0,4,FVM), E(660F00,7F,_,x,_,0,4,FVM), 200, 90 , 340, 175), // #1196 - INST(Vmovdqa64 , VexRmMr_Lx , E(660F00,6F,_,x,_,1,4,FVM), E(660F00,7F,_,x,_,1,4,FVM), 136, 91 , 340, 175), // #1197 - INST(Vmovdqu , VexRmMr_Lx , V(F30F00,6F,_,x,I,_,_,_ ), V(F30F00,7F,_,x,I,_,_,_ ), 201, 92 , 339, 174), // #1198 - INST(Vmovdqu16 , VexRmMr_Lx , E(F20F00,6F,_,x,_,1,4,FVM), E(F20F00,7F,_,x,_,1,4,FVM), 167, 93 , 340, 176), // #1199 - INST(Vmovdqu32 , VexRmMr_Lx , E(F30F00,6F,_,x,_,0,4,FVM), E(F30F00,7F,_,x,_,0,4,FVM), 202, 94 , 340, 175), // #1200 - INST(Vmovdqu64 , VexRmMr_Lx , E(F30F00,6F,_,x,_,1,4,FVM), E(F30F00,7F,_,x,_,1,4,FVM), 150, 95 , 340, 175), // #1201 - INST(Vmovdqu8 , VexRmMr_Lx , E(F20F00,6F,_,x,_,0,4,FVM), E(F20F00,7F,_,x,_,0,4,FVM), 165, 96 , 340, 176), // #1202 - INST(Vmovhlps , VexRvm , V(000F00,12,_,0,I,0,_,_ ), 0 , 74 , 0 , 341, 147), // #1203 - INST(Vmovhpd , VexRvmMr , V(660F00,16,_,0,I,1,3,T1S), V(660F00,17,_,0,I,1,3,T1S), 126, 97 , 342, 147), // #1204 - INST(Vmovhps , VexRvmMr , V(000F00,16,_,0,I,0,3,T2 ), V(000F00,17,_,0,I,0,3,T2 ), 203, 98 , 342, 147), // #1205 - INST(Vmovlhps , VexRvm , V(000F00,16,_,0,I,0,_,_ ), 0 , 74 , 0 , 341, 147), // #1206 - INST(Vmovlpd , VexRvmMr , V(660F00,12,_,0,I,1,3,T1S), V(660F00,13,_,0,I,1,3,T1S), 126, 99 , 342, 147), // #1207 - INST(Vmovlps , VexRvmMr , V(000F00,12,_,0,I,0,3,T2 ), V(000F00,13,_,0,I,0,3,T2 ), 203, 100, 342, 147), // #1208 - INST(Vmovmskpd , VexRm_Lx , V(660F00,50,_,x,I,_,_,_ ), 0 , 71 , 0 , 343, 149), // #1209 - INST(Vmovmskps , VexRm_Lx , V(000F00,50,_,x,I,_,_,_ ), 0 , 74 , 0 , 343, 149), // #1210 - INST(Vmovntdq , VexMr_Lx , V(660F00,E7,_,x,I,0,4,FVM), 0 , 145, 0 , 344, 145), // #1211 - INST(Vmovntdqa , VexRm_Lx , V(660F38,2A,_,x,I,0,4,FVM), 0 , 111, 0 , 345, 157), // #1212 - INST(Vmovntpd , VexMr_Lx , V(660F00,2B,_,x,I,1,4,FVM), 0 , 104, 0 , 344, 145), // #1213 - INST(Vmovntps , VexMr_Lx , V(000F00,2B,_,x,I,0,4,FVM), 0 , 106, 0 , 344, 145), // #1214 - INST(Vmovq , VexMovdMovq , V(660F00,6E,_,0,I,1,3,T1S), V(660F00,7E,_,0,I,1,3,T1S), 126, 101, 346, 177), // #1215 - INST(Vmovsd , VexMovssMovsd , V(F20F00,10,_,I,I,1,3,T1S), V(F20F00,11,_,I,I,1,3,T1S), 107, 102, 347, 177), // #1216 - INST(Vmovsh , VexMovssMovsd , E(F3MAP5,10,_,I,_,0,1,T1S), E(F3MAP5,11,_,I,_,0,1,T1S), 108, 103, 348, 148), // #1217 - INST(Vmovshdup , VexRm_Lx , V(F30F00,16,_,x,I,0,4,FVM), 0 , 162, 0 , 349, 145), // #1218 - INST(Vmovsldup , VexRm_Lx , V(F30F00,12,_,x,I,0,4,FVM), 0 , 162, 0 , 349, 145), // #1219 - INST(Vmovss , VexMovssMovsd , V(F30F00,10,_,I,I,0,2,T1S), V(F30F00,11,_,I,I,0,2,T1S), 109, 104, 350, 177), // #1220 - INST(Vmovupd , VexRmMr_Lx , V(660F00,10,_,x,I,1,4,FVM), V(660F00,11,_,x,I,1,4,FVM), 104, 105, 336, 173), // #1221 - INST(Vmovups , VexRmMr_Lx , V(000F00,10,_,x,I,0,4,FVM), V(000F00,11,_,x,I,0,4,FVM), 106, 106, 336, 173), // #1222 - INST(Vmovw , VexMovdMovq , E(66MAP5,6E,_,0,_,I,1,T1S), E(66MAP5,7E,_,0,_,I,1,T1S), 204, 107, 351, 148), // #1223 + INST(Vmmcall , X86Op , O(000F01,D9,_,_,_,_,_,_ ), 0 , 23 , 0 , 31 , 23 ), // #1190 + INST(Vmovapd , VexRmMr_Lx , V(660F00,28,_,x,I,1,4,FVM), V(660F00,29,_,x,I,1,4,FVM), 104, 86 , 335, 173), // #1191 + INST(Vmovaps , VexRmMr_Lx , V(000F00,28,_,x,I,0,4,FVM), V(000F00,29,_,x,I,0,4,FVM), 106, 87 , 335, 173), // #1192 + INST(Vmovd , VexMovdMovq , V(660F00,6E,_,0,0,0,2,T1S), V(660F00,7E,_,0,0,0,2,T1S), 198, 88 , 336, 147), // #1193 + INST(Vmovddup , VexRm_Lx , V(F20F00,12,_,x,I,1,3,DUP), 0 , 199, 0 , 337, 145), // #1194 + INST(Vmovdqa , VexRmMr_Lx , V(660F00,6F,_,x,I,_,_,_ ), V(660F00,7F,_,x,I,_,_,_ ), 71 , 89 , 338, 174), // #1195 + INST(Vmovdqa32 , VexRmMr_Lx , E(660F00,6F,_,x,_,0,4,FVM), E(660F00,7F,_,x,_,0,4,FVM), 200, 90 , 339, 175), // #1196 + INST(Vmovdqa64 , VexRmMr_Lx , E(660F00,6F,_,x,_,1,4,FVM), E(660F00,7F,_,x,_,1,4,FVM), 136, 91 , 339, 175), // #1197 + INST(Vmovdqu , VexRmMr_Lx , V(F30F00,6F,_,x,I,_,_,_ ), V(F30F00,7F,_,x,I,_,_,_ ), 201, 92 , 338, 174), // #1198 + INST(Vmovdqu16 , VexRmMr_Lx , E(F20F00,6F,_,x,_,1,4,FVM), E(F20F00,7F,_,x,_,1,4,FVM), 167, 93 , 339, 176), // #1199 + INST(Vmovdqu32 , VexRmMr_Lx , E(F30F00,6F,_,x,_,0,4,FVM), E(F30F00,7F,_,x,_,0,4,FVM), 202, 94 , 339, 175), // #1200 + INST(Vmovdqu64 , VexRmMr_Lx , E(F30F00,6F,_,x,_,1,4,FVM), E(F30F00,7F,_,x,_,1,4,FVM), 150, 95 , 339, 175), // #1201 + INST(Vmovdqu8 , VexRmMr_Lx , E(F20F00,6F,_,x,_,0,4,FVM), E(F20F00,7F,_,x,_,0,4,FVM), 165, 96 , 339, 176), // #1202 + INST(Vmovhlps , VexRvm , V(000F00,12,_,0,I,0,_,_ ), 0 , 74 , 0 , 340, 147), // #1203 + INST(Vmovhpd , VexRvmMr , V(660F00,16,_,0,I,1,3,T1S), V(660F00,17,_,0,I,1,3,T1S), 126, 97 , 341, 147), // #1204 + INST(Vmovhps , VexRvmMr , V(000F00,16,_,0,I,0,3,T2 ), V(000F00,17,_,0,I,0,3,T2 ), 203, 98 , 341, 147), // #1205 + INST(Vmovlhps , VexRvm , V(000F00,16,_,0,I,0,_,_ ), 0 , 74 , 0 , 340, 147), // #1206 + INST(Vmovlpd , VexRvmMr , V(660F00,12,_,0,I,1,3,T1S), V(660F00,13,_,0,I,1,3,T1S), 126, 99 , 341, 147), // #1207 + INST(Vmovlps , VexRvmMr , V(000F00,12,_,0,I,0,3,T2 ), V(000F00,13,_,0,I,0,3,T2 ), 203, 100, 341, 147), // #1208 + INST(Vmovmskpd , VexRm_Lx , V(660F00,50,_,x,I,_,_,_ ), 0 , 71 , 0 , 342, 149), // #1209 + INST(Vmovmskps , VexRm_Lx , V(000F00,50,_,x,I,_,_,_ ), 0 , 74 , 0 , 342, 149), // #1210 + INST(Vmovntdq , VexMr_Lx , V(660F00,E7,_,x,I,0,4,FVM), 0 , 145, 0 , 343, 145), // #1211 + INST(Vmovntdqa , VexRm_Lx , V(660F38,2A,_,x,I,0,4,FVM), 0 , 111, 0 , 344, 157), // #1212 + INST(Vmovntpd , VexMr_Lx , V(660F00,2B,_,x,I,1,4,FVM), 0 , 104, 0 , 343, 145), // #1213 + INST(Vmovntps , VexMr_Lx , V(000F00,2B,_,x,I,0,4,FVM), 0 , 106, 0 , 343, 145), // #1214 + INST(Vmovq , VexMovdMovq , V(660F00,6E,_,0,I,1,3,T1S), V(660F00,7E,_,0,I,1,3,T1S), 126, 101, 345, 177), // #1215 + INST(Vmovsd , VexMovssMovsd , V(F20F00,10,_,I,I,1,3,T1S), V(F20F00,11,_,I,I,1,3,T1S), 107, 102, 346, 177), // #1216 + INST(Vmovsh , VexMovssMovsd , E(F3MAP5,10,_,I,_,0,1,T1S), E(F3MAP5,11,_,I,_,0,1,T1S), 108, 103, 347, 148), // #1217 + INST(Vmovshdup , VexRm_Lx , V(F30F00,16,_,x,I,0,4,FVM), 0 , 162, 0 , 348, 145), // #1218 + INST(Vmovsldup , VexRm_Lx , V(F30F00,12,_,x,I,0,4,FVM), 0 , 162, 0 , 348, 145), // #1219 + INST(Vmovss , VexMovssMovsd , V(F30F00,10,_,I,I,0,2,T1S), V(F30F00,11,_,I,I,0,2,T1S), 109, 104, 349, 177), // #1220 + INST(Vmovupd , VexRmMr_Lx , V(660F00,10,_,x,I,1,4,FVM), V(660F00,11,_,x,I,1,4,FVM), 104, 105, 335, 173), // #1221 + INST(Vmovups , VexRmMr_Lx , V(000F00,10,_,x,I,0,4,FVM), V(000F00,11,_,x,I,0,4,FVM), 106, 106, 335, 173), // #1222 + INST(Vmovw , VexMovdMovq , E(66MAP5,6E,_,0,_,I,1,T1S), E(66MAP5,7E,_,0,_,I,1,T1S), 204, 107, 350, 148), // #1223 INST(Vmpsadbw , VexRvmi_Lx , V(660F3A,42,_,x,I,_,_,_ ), 0 , 75 , 0 , 219, 178), // #1224 INST(Vmptrld , X86M_Only , O(000F00,C7,6,_,_,_,_,_ ), 0 , 82 , 0 , 33 , 67 ), // #1225 INST(Vmptrst , X86M_Only , O(000F00,C7,7,_,_,_,_,_ ), 0 , 24 , 0 , 33 , 67 ), // #1226 - INST(Vmread , X86Mr_NoSize , O(000F00,78,_,_,_,_,_,_ ), 0 , 5 , 0 , 352, 67 ), // #1227 + INST(Vmread , X86Mr_NoSize , O(000F00,78,_,_,_,_,_,_ ), 0 , 5 , 0 , 351, 67 ), // #1227 INST(Vmresume , X86Op , O(000F01,C3,_,_,_,_,_,_ ), 0 , 23 , 0 , 31 , 67 ), // #1228 INST(Vmrun , X86Op_xAX , O(000F01,D8,_,_,_,_,_,_ ), 0 , 23 , 0 , 334, 23 ), // #1229 INST(Vmsave , X86Op_xAX , O(000F01,DB,_,_,_,_,_,_ ), 0 , 23 , 0 , 334, 23 ), // #1230 @@ -1283,19 +1283,19 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Vmulsd , VexRvm , V(F20F00,59,_,I,I,1,3,T1S), 0 , 107, 0 , 203, 147), // #1234 INST(Vmulsh , VexRvm , E(F3MAP5,59,_,_,_,0,1,T1S), 0 , 108, 0 , 204, 148), // #1235 INST(Vmulss , VexRvm , V(F30F00,59,_,I,I,0,2,T1S), 0 , 109, 0 , 205, 147), // #1236 - INST(Vmwrite , X86Rm_NoSize , O(000F00,79,_,_,_,_,_,_ ), 0 , 5 , 0 , 353, 67 ), // #1237 + INST(Vmwrite , X86Rm_NoSize , O(000F00,79,_,_,_,_,_,_ ), 0 , 5 , 0 , 352, 67 ), // #1237 INST(Vmxoff , X86Op , O(000F01,C4,_,_,_,_,_,_ ), 0 , 23 , 0 , 31 , 67 ), // #1238 INST(Vmxon , X86M_Only , O(F30F00,C7,6,_,_,_,_,_ ), 0 , 26 , 0 , 33 , 67 ), // #1239 INST(Vorpd , VexRvm_Lx , V(660F00,56,_,x,I,1,4,FV ), 0 , 104, 0 , 214, 153), // #1240 INST(Vorps , VexRvm_Lx , V(000F00,56,_,x,I,0,4,FV ), 0 , 106, 0 , 215, 153), // #1241 - INST(Vp2intersectd , VexRvm_Lx_2xK , E(F20F38,68,_,_,_,0,4,FV ), 0 , 132, 0 , 354, 179), // #1242 - INST(Vp2intersectq , VexRvm_Lx_2xK , E(F20F38,68,_,_,_,1,4,FV ), 0 , 205, 0 , 355, 179), // #1243 + INST(Vp2intersectd , VexRvm_Lx_2xK , E(F20F38,68,_,_,_,0,4,FV ), 0 , 132, 0 , 353, 179), // #1242 + INST(Vp2intersectq , VexRvm_Lx_2xK , E(F20F38,68,_,_,_,1,4,FV ), 0 , 205, 0 , 354, 179), // #1243 INST(Vp4dpwssd , VexRm_T1_4X , E(F20F38,52,_,2,_,0,4,T4X), 0 , 102, 0 , 198, 180), // #1244 INST(Vp4dpwssds , VexRm_T1_4X , E(F20F38,53,_,2,_,0,4,T4X), 0 , 102, 0 , 198, 180), // #1245 - INST(Vpabsb , VexRm_Lx , V(660F38,1C,_,x,I,_,4,FVM), 0 , 111, 0 , 349, 181), // #1246 - INST(Vpabsd , VexRm_Lx , V(660F38,1E,_,x,I,0,4,FV ), 0 , 111, 0 , 356, 157), // #1247 - INST(Vpabsq , VexRm_Lx , E(660F38,1F,_,x,_,1,4,FV ), 0 , 114, 0 , 357, 152), // #1248 - INST(Vpabsw , VexRm_Lx , V(660F38,1D,_,x,I,_,4,FVM), 0 , 111, 0 , 349, 181), // #1249 + INST(Vpabsb , VexRm_Lx , V(660F38,1C,_,x,I,_,4,FVM), 0 , 111, 0 , 348, 181), // #1246 + INST(Vpabsd , VexRm_Lx , V(660F38,1E,_,x,I,0,4,FV ), 0 , 111, 0 , 355, 157), // #1247 + INST(Vpabsq , VexRm_Lx , E(660F38,1F,_,x,_,1,4,FV ), 0 , 114, 0 , 356, 152), // #1248 + INST(Vpabsw , VexRm_Lx , V(660F38,1D,_,x,I,_,4,FVM), 0 , 111, 0 , 348, 181), // #1249 INST(Vpackssdw , VexRvm_Lx , V(660F00,6B,_,x,I,0,4,FV ), 0 , 145, 0 , 213, 181), // #1250 INST(Vpacksswb , VexRvm_Lx , V(660F00,63,_,x,I,I,4,FVM), 0 , 145, 0 , 322, 181), // #1251 INST(Vpackusdw , VexRvm_Lx , V(660F38,2B,_,x,I,0,4,FV ), 0 , 111, 0 , 213, 181), // #1252 @@ -1309,49 +1309,49 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Vpaddusw , VexRvm_Lx , V(660F00,DD,_,x,I,I,4,FVM), 0 , 145, 0 , 322, 181), // #1260 INST(Vpaddw , VexRvm_Lx , V(660F00,FD,_,x,I,I,4,FVM), 0 , 145, 0 , 322, 181), // #1261 INST(Vpalignr , VexRvmi_Lx , V(660F3A,0F,_,x,I,I,4,FVM), 0 , 206, 0 , 321, 181), // #1262 - INST(Vpand , VexRvm_Lx , V(660F00,DB,_,x,I,_,_,_ ), 0 , 71 , 0 , 358, 178), // #1263 - INST(Vpandd , VexRvm_Lx , E(660F00,DB,_,x,_,0,4,FV ), 0 , 200, 0 , 359, 152), // #1264 - INST(Vpandn , VexRvm_Lx , V(660F00,DF,_,x,I,_,_,_ ), 0 , 71 , 0 , 360, 178), // #1265 - INST(Vpandnd , VexRvm_Lx , E(660F00,DF,_,x,_,0,4,FV ), 0 , 200, 0 , 361, 152), // #1266 - INST(Vpandnq , VexRvm_Lx , E(660F00,DF,_,x,_,1,4,FV ), 0 , 136, 0 , 362, 152), // #1267 - INST(Vpandq , VexRvm_Lx , E(660F00,DB,_,x,_,1,4,FV ), 0 , 136, 0 , 363, 152), // #1268 + INST(Vpand , VexRvm_Lx , V(660F00,DB,_,x,I,_,_,_ ), 0 , 71 , 0 , 357, 178), // #1263 + INST(Vpandd , VexRvm_Lx , E(660F00,DB,_,x,_,0,4,FV ), 0 , 200, 0 , 358, 152), // #1264 + INST(Vpandn , VexRvm_Lx , V(660F00,DF,_,x,I,_,_,_ ), 0 , 71 , 0 , 359, 178), // #1265 + INST(Vpandnd , VexRvm_Lx , E(660F00,DF,_,x,_,0,4,FV ), 0 , 200, 0 , 360, 152), // #1266 + INST(Vpandnq , VexRvm_Lx , E(660F00,DF,_,x,_,1,4,FV ), 0 , 136, 0 , 361, 152), // #1267 + INST(Vpandq , VexRvm_Lx , E(660F00,DB,_,x,_,1,4,FV ), 0 , 136, 0 , 362, 152), // #1268 INST(Vpavgb , VexRvm_Lx , V(660F00,E0,_,x,I,I,4,FVM), 0 , 145, 0 , 322, 181), // #1269 INST(Vpavgw , VexRvm_Lx , V(660F00,E3,_,x,I,I,4,FVM), 0 , 145, 0 , 322, 181), // #1270 INST(Vpblendd , VexRvmi_Lx , V(660F3A,02,_,x,0,_,_,_ ), 0 , 75 , 0 , 219, 156), // #1271 - INST(Vpblendmb , VexRvm_Lx , E(660F38,66,_,x,_,0,4,FVM), 0 , 115, 0 , 364, 163), // #1272 + INST(Vpblendmb , VexRvm_Lx , E(660F38,66,_,x,_,0,4,FVM), 0 , 115, 0 , 363, 163), // #1272 INST(Vpblendmd , VexRvm_Lx , E(660F38,64,_,x,_,0,4,FV ), 0 , 115, 0 , 218, 152), // #1273 INST(Vpblendmq , VexRvm_Lx , E(660F38,64,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1274 - INST(Vpblendmw , VexRvm_Lx , E(660F38,66,_,x,_,1,4,FVM), 0 , 114, 0 , 364, 163), // #1275 + INST(Vpblendmw , VexRvm_Lx , E(660F38,66,_,x,_,1,4,FVM), 0 , 114, 0 , 363, 163), // #1275 INST(Vpblendvb , VexRvmr_Lx , V(660F3A,4C,_,x,0,_,_,_ ), 0 , 75 , 0 , 220, 178), // #1276 INST(Vpblendw , VexRvmi_Lx , V(660F3A,0E,_,x,I,_,_,_ ), 0 , 75 , 0 , 219, 178), // #1277 - INST(Vpbroadcastb , VexRm_Lx_Bcst , V(660F38,78,_,x,0,0,0,T1S), E(660F38,7A,_,x,0,0,0,T1S), 30 , 108, 365, 182), // #1278 - INST(Vpbroadcastd , VexRm_Lx_Bcst , V(660F38,58,_,x,0,0,2,T1S), E(660F38,7C,_,x,0,0,0,T1S), 123, 109, 366, 169), // #1279 - INST(Vpbroadcastmb2q , VexRm_Lx , E(F30F38,2A,_,x,_,1,_,_ ), 0 , 207, 0 , 367, 183), // #1280 - INST(Vpbroadcastmw2d , VexRm_Lx , E(F30F38,3A,_,x,_,0,_,_ ), 0 , 208, 0 , 367, 183), // #1281 - INST(Vpbroadcastq , VexRm_Lx_Bcst , V(660F38,59,_,x,0,1,3,T1S), E(660F38,7C,_,x,0,1,0,T1S), 122, 110, 368, 169), // #1282 - INST(Vpbroadcastw , VexRm_Lx_Bcst , V(660F38,79,_,x,0,0,1,T1S), E(660F38,7B,_,x,0,0,0,T1S), 209, 111, 369, 182), // #1283 - INST(Vpclmulqdq , VexRvmi_Lx , V(660F3A,44,_,x,I,_,4,FVM), 0 , 206, 0 , 370, 184), // #1284 + INST(Vpbroadcastb , VexRm_Lx_Bcst , V(660F38,78,_,x,0,0,0,T1S), E(660F38,7A,_,x,0,0,0,T1S), 30 , 108, 364, 182), // #1278 + INST(Vpbroadcastd , VexRm_Lx_Bcst , V(660F38,58,_,x,0,0,2,T1S), E(660F38,7C,_,x,0,0,0,T1S), 123, 109, 365, 169), // #1279 + INST(Vpbroadcastmb2q , VexRm_Lx , E(F30F38,2A,_,x,_,1,_,_ ), 0 , 207, 0 , 366, 183), // #1280 + INST(Vpbroadcastmw2d , VexRm_Lx , E(F30F38,3A,_,x,_,0,_,_ ), 0 , 208, 0 , 366, 183), // #1281 + INST(Vpbroadcastq , VexRm_Lx_Bcst , V(660F38,59,_,x,0,1,3,T1S), E(660F38,7C,_,x,0,1,0,T1S), 122, 110, 367, 169), // #1282 + INST(Vpbroadcastw , VexRm_Lx_Bcst , V(660F38,79,_,x,0,0,1,T1S), E(660F38,7B,_,x,0,0,0,T1S), 209, 111, 368, 182), // #1283 + INST(Vpclmulqdq , VexRvmi_Lx , V(660F3A,44,_,x,I,_,4,FVM), 0 , 206, 0 , 369, 184), // #1284 INST(Vpcmov , VexRvrmRvmr_Lx , V(XOP_M8,A2,_,x,x,_,_,_ ), 0 , 210, 0 , 296, 168), // #1285 - INST(Vpcmpb , VexRvmi_Lx , E(660F3A,3F,_,x,_,0,4,FVM), 0 , 112, 0 , 371, 163), // #1286 - INST(Vpcmpd , VexRvmi_Lx , E(660F3A,1F,_,x,_,0,4,FV ), 0 , 112, 0 , 372, 152), // #1287 - INST(Vpcmpeqb , VexRvm_Lx_KEvex , V(660F00,74,_,x,I,I,4,FV ), 0 , 145, 0 , 373, 181), // #1288 - INST(Vpcmpeqd , VexRvm_Lx_KEvex , V(660F00,76,_,x,I,0,4,FVM), 0 , 145, 0 , 374, 157), // #1289 - INST(Vpcmpeqq , VexRvm_Lx_KEvex , V(660F38,29,_,x,I,1,4,FVM), 0 , 211, 0 , 375, 157), // #1290 - INST(Vpcmpeqw , VexRvm_Lx_KEvex , V(660F00,75,_,x,I,I,4,FV ), 0 , 145, 0 , 373, 181), // #1291 - INST(Vpcmpestri , VexRmi , V(660F3A,61,_,0,I,_,_,_ ), 0 , 75 , 0 , 376, 185), // #1292 - INST(Vpcmpestrm , VexRmi , V(660F3A,60,_,0,I,_,_,_ ), 0 , 75 , 0 , 377, 185), // #1293 - INST(Vpcmpgtb , VexRvm_Lx_KEvex , V(660F00,64,_,x,I,I,4,FV ), 0 , 145, 0 , 373, 181), // #1294 - INST(Vpcmpgtd , VexRvm_Lx_KEvex , V(660F00,66,_,x,I,0,4,FVM), 0 , 145, 0 , 374, 157), // #1295 - INST(Vpcmpgtq , VexRvm_Lx_KEvex , V(660F38,37,_,x,I,1,4,FVM), 0 , 211, 0 , 375, 157), // #1296 - INST(Vpcmpgtw , VexRvm_Lx_KEvex , V(660F00,65,_,x,I,I,4,FV ), 0 , 145, 0 , 373, 181), // #1297 - INST(Vpcmpistri , VexRmi , V(660F3A,63,_,0,I,_,_,_ ), 0 , 75 , 0 , 378, 185), // #1298 - INST(Vpcmpistrm , VexRmi , V(660F3A,62,_,0,I,_,_,_ ), 0 , 75 , 0 , 379, 185), // #1299 - INST(Vpcmpq , VexRvmi_Lx , E(660F3A,1F,_,x,_,1,4,FV ), 0 , 113, 0 , 380, 152), // #1300 - INST(Vpcmpub , VexRvmi_Lx , E(660F3A,3E,_,x,_,0,4,FVM), 0 , 112, 0 , 371, 163), // #1301 - INST(Vpcmpud , VexRvmi_Lx , E(660F3A,1E,_,x,_,0,4,FV ), 0 , 112, 0 , 372, 152), // #1302 - INST(Vpcmpuq , VexRvmi_Lx , E(660F3A,1E,_,x,_,1,4,FV ), 0 , 113, 0 , 380, 152), // #1303 - INST(Vpcmpuw , VexRvmi_Lx , E(660F3A,3E,_,x,_,1,4,FVM), 0 , 113, 0 , 380, 163), // #1304 - INST(Vpcmpw , VexRvmi_Lx , E(660F3A,3F,_,x,_,1,4,FVM), 0 , 113, 0 , 380, 163), // #1305 + INST(Vpcmpb , VexRvmi_Lx , E(660F3A,3F,_,x,_,0,4,FVM), 0 , 112, 0 , 370, 163), // #1286 + INST(Vpcmpd , VexRvmi_Lx , E(660F3A,1F,_,x,_,0,4,FV ), 0 , 112, 0 , 371, 152), // #1287 + INST(Vpcmpeqb , VexRvm_Lx_KEvex , V(660F00,74,_,x,I,I,4,FV ), 0 , 145, 0 , 372, 181), // #1288 + INST(Vpcmpeqd , VexRvm_Lx_KEvex , V(660F00,76,_,x,I,0,4,FVM), 0 , 145, 0 , 373, 157), // #1289 + INST(Vpcmpeqq , VexRvm_Lx_KEvex , V(660F38,29,_,x,I,1,4,FVM), 0 , 211, 0 , 374, 157), // #1290 + INST(Vpcmpeqw , VexRvm_Lx_KEvex , V(660F00,75,_,x,I,I,4,FV ), 0 , 145, 0 , 372, 181), // #1291 + INST(Vpcmpestri , VexRmi , V(660F3A,61,_,0,I,_,_,_ ), 0 , 75 , 0 , 375, 185), // #1292 + INST(Vpcmpestrm , VexRmi , V(660F3A,60,_,0,I,_,_,_ ), 0 , 75 , 0 , 376, 185), // #1293 + INST(Vpcmpgtb , VexRvm_Lx_KEvex , V(660F00,64,_,x,I,I,4,FV ), 0 , 145, 0 , 372, 181), // #1294 + INST(Vpcmpgtd , VexRvm_Lx_KEvex , V(660F00,66,_,x,I,0,4,FVM), 0 , 145, 0 , 373, 157), // #1295 + INST(Vpcmpgtq , VexRvm_Lx_KEvex , V(660F38,37,_,x,I,1,4,FVM), 0 , 211, 0 , 374, 157), // #1296 + INST(Vpcmpgtw , VexRvm_Lx_KEvex , V(660F00,65,_,x,I,I,4,FV ), 0 , 145, 0 , 372, 181), // #1297 + INST(Vpcmpistri , VexRmi , V(660F3A,63,_,0,I,_,_,_ ), 0 , 75 , 0 , 377, 185), // #1298 + INST(Vpcmpistrm , VexRmi , V(660F3A,62,_,0,I,_,_,_ ), 0 , 75 , 0 , 378, 185), // #1299 + INST(Vpcmpq , VexRvmi_Lx , E(660F3A,1F,_,x,_,1,4,FV ), 0 , 113, 0 , 379, 152), // #1300 + INST(Vpcmpub , VexRvmi_Lx , E(660F3A,3E,_,x,_,0,4,FVM), 0 , 112, 0 , 370, 163), // #1301 + INST(Vpcmpud , VexRvmi_Lx , E(660F3A,1E,_,x,_,0,4,FV ), 0 , 112, 0 , 371, 152), // #1302 + INST(Vpcmpuq , VexRvmi_Lx , E(660F3A,1E,_,x,_,1,4,FV ), 0 , 113, 0 , 379, 152), // #1303 + INST(Vpcmpuw , VexRvmi_Lx , E(660F3A,3E,_,x,_,1,4,FVM), 0 , 113, 0 , 379, 163), // #1304 + INST(Vpcmpw , VexRvmi_Lx , E(660F3A,3F,_,x,_,1,4,FVM), 0 , 113, 0 , 379, 163), // #1305 INST(Vpcomb , VexRvmi , V(XOP_M8,CC,_,0,0,_,_,_ ), 0 , 210, 0 , 283, 168), // #1306 INST(Vpcomd , VexRvmi , V(XOP_M8,CE,_,0,0,_,_,_ ), 0 , 210, 0 , 283, 168), // #1307 INST(Vpcompressb , VexMr_Lx , E(660F38,63,_,x,_,0,0,T1S), 0 , 212, 0 , 237, 186), // #1308 @@ -1364,56 +1364,56 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Vpcomuq , VexRvmi , V(XOP_M8,EF,_,0,0,_,_,_ ), 0 , 210, 0 , 283, 168), // #1315 INST(Vpcomuw , VexRvmi , V(XOP_M8,ED,_,0,0,_,_,_ ), 0 , 210, 0 , 283, 168), // #1316 INST(Vpcomw , VexRvmi , V(XOP_M8,CD,_,0,0,_,_,_ ), 0 , 210, 0 , 283, 168), // #1317 - INST(Vpconflictd , VexRm_Lx , E(660F38,C4,_,x,_,0,4,FV ), 0 , 115, 0 , 381, 183), // #1318 - INST(Vpconflictq , VexRm_Lx , E(660F38,C4,_,x,_,1,4,FV ), 0 , 114, 0 , 381, 183), // #1319 + INST(Vpconflictd , VexRm_Lx , E(660F38,C4,_,x,_,0,4,FV ), 0 , 115, 0 , 380, 183), // #1318 + INST(Vpconflictq , VexRm_Lx , E(660F38,C4,_,x,_,1,4,FV ), 0 , 114, 0 , 380, 183), // #1319 INST(Vpdpbssd , VexRvm_Lx , V(F20F38,50,_,x,0,_,_,_ ), 0 , 85 , 0 , 206, 187), // #1320 INST(Vpdpbssds , VexRvm_Lx , V(F20F38,51,_,x,0,_,_,_ ), 0 , 85 , 0 , 206, 187), // #1321 INST(Vpdpbsud , VexRvm_Lx , V(F30F38,50,_,x,0,_,_,_ ), 0 , 89 , 0 , 206, 187), // #1322 INST(Vpdpbsuds , VexRvm_Lx , V(F30F38,51,_,x,0,_,_,_ ), 0 , 89 , 0 , 206, 187), // #1323 - INST(Vpdpbusd , VexRvm_Lx , V(660F38,50,_,x,_,0,4,FV ), 0 , 111, 0 , 382, 188), // #1324 - INST(Vpdpbusds , VexRvm_Lx , V(660F38,51,_,x,_,0,4,FV ), 0 , 111, 0 , 382, 188), // #1325 + INST(Vpdpbusd , VexRvm_Lx , V(660F38,50,_,x,_,0,4,FV ), 0 , 111, 0 , 381, 188), // #1324 + INST(Vpdpbusds , VexRvm_Lx , V(660F38,51,_,x,_,0,4,FV ), 0 , 111, 0 , 381, 188), // #1325 INST(Vpdpbuud , VexRvm_Lx , V(000F38,50,_,x,0,_,_,_ ), 0 , 11 , 0 , 206, 187), // #1326 INST(Vpdpbuuds , VexRvm_Lx , V(000F38,51,_,x,0,_,_,_ ), 0 , 11 , 0 , 206, 187), // #1327 - INST(Vpdpwssd , VexRvm_Lx , V(660F38,52,_,x,_,0,4,FV ), 0 , 111, 0 , 382, 188), // #1328 - INST(Vpdpwssds , VexRvm_Lx , V(660F38,53,_,x,_,0,4,FV ), 0 , 111, 0 , 382, 188), // #1329 + INST(Vpdpwssd , VexRvm_Lx , V(660F38,52,_,x,_,0,4,FV ), 0 , 111, 0 , 381, 188), // #1328 + INST(Vpdpwssds , VexRvm_Lx , V(660F38,53,_,x,_,0,4,FV ), 0 , 111, 0 , 381, 188), // #1329 INST(Vpdpwsud , VexRvm_Lx , V(F30F38,D2,_,x,0,_,_,_ ), 0 , 89 , 0 , 206, 189), // #1330 INST(Vpdpwsuds , VexRvm_Lx , V(F30F38,D3,_,x,0,_,_,_ ), 0 , 89 , 0 , 206, 189), // #1331 INST(Vpdpwusd , VexRvm_Lx , V(660F38,D2,_,x,0,_,_,_ ), 0 , 30 , 0 , 206, 189), // #1332 INST(Vpdpwusds , VexRvm_Lx , V(660F38,D3,_,x,0,_,_,_ ), 0 , 30 , 0 , 206, 189), // #1333 INST(Vpdpwuud , VexRvm_Lx , V(000F38,D2,_,x,0,_,_,_ ), 0 , 11 , 0 , 206, 189), // #1334 INST(Vpdpwuuds , VexRvm_Lx , V(000F38,D3,_,x,0,_,_,_ ), 0 , 11 , 0 , 206, 189), // #1335 - INST(Vperm2f128 , VexRvmi , V(660F3A,06,_,1,0,_,_,_ ), 0 , 174, 0 , 383, 149), // #1336 - INST(Vperm2i128 , VexRvmi , V(660F3A,46,_,1,0,_,_,_ ), 0 , 174, 0 , 383, 156), // #1337 - INST(Vpermb , VexRvm_Lx , E(660F38,8D,_,x,_,0,4,FVM), 0 , 115, 0 , 364, 190), // #1338 - INST(Vpermd , VexRvm_Lx , V(660F38,36,_,x,0,0,4,FV ), 0 , 111, 0 , 384, 169), // #1339 - INST(Vpermi2b , VexRvm_Lx , E(660F38,75,_,x,_,0,4,FVM), 0 , 115, 0 , 364, 190), // #1340 + INST(Vperm2f128 , VexRvmi , V(660F3A,06,_,1,0,_,_,_ ), 0 , 174, 0 , 382, 149), // #1336 + INST(Vperm2i128 , VexRvmi , V(660F3A,46,_,1,0,_,_,_ ), 0 , 174, 0 , 382, 156), // #1337 + INST(Vpermb , VexRvm_Lx , E(660F38,8D,_,x,_,0,4,FVM), 0 , 115, 0 , 363, 190), // #1338 + INST(Vpermd , VexRvm_Lx , V(660F38,36,_,x,0,0,4,FV ), 0 , 111, 0 , 383, 169), // #1339 + INST(Vpermi2b , VexRvm_Lx , E(660F38,75,_,x,_,0,4,FVM), 0 , 115, 0 , 363, 190), // #1340 INST(Vpermi2d , VexRvm_Lx , E(660F38,76,_,x,_,0,4,FV ), 0 , 115, 0 , 218, 152), // #1341 INST(Vpermi2pd , VexRvm_Lx , E(660F38,77,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1342 INST(Vpermi2ps , VexRvm_Lx , E(660F38,77,_,x,_,0,4,FV ), 0 , 115, 0 , 218, 152), // #1343 INST(Vpermi2q , VexRvm_Lx , E(660F38,76,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1344 - INST(Vpermi2w , VexRvm_Lx , E(660F38,75,_,x,_,1,4,FVM), 0 , 114, 0 , 364, 163), // #1345 - INST(Vpermil2pd , VexRvrmiRvmri_Lx , V(660F3A,49,_,x,x,_,_,_ ), 0 , 75 , 0 , 385, 168), // #1346 - INST(Vpermil2ps , VexRvrmiRvmri_Lx , V(660F3A,48,_,x,x,_,_,_ ), 0 , 75 , 0 , 385, 168), // #1347 - INST(Vpermilpd , VexRvmRmi_Lx , V(660F38,0D,_,x,0,1,4,FV ), V(660F3A,05,_,x,0,1,4,FV ), 211, 112, 386, 145), // #1348 - INST(Vpermilps , VexRvmRmi_Lx , V(660F38,0C,_,x,0,0,4,FV ), V(660F3A,04,_,x,0,0,4,FV ), 111, 113, 387, 145), // #1349 - INST(Vpermpd , VexRvmRmi_Lx , E(660F38,16,_,x,1,1,4,FV ), V(660F3A,01,_,x,1,1,4,FV ), 214, 114, 388, 169), // #1350 - INST(Vpermps , VexRvm_Lx , V(660F38,16,_,x,0,0,4,FV ), 0 , 111, 0 , 384, 169), // #1351 - INST(Vpermq , VexRvmRmi_Lx , E(660F38,36,_,x,_,1,4,FV ), V(660F3A,00,_,x,1,1,4,FV ), 114, 115, 388, 169), // #1352 - INST(Vpermt2b , VexRvm_Lx , E(660F38,7D,_,x,_,0,4,FVM), 0 , 115, 0 , 364, 190), // #1353 + INST(Vpermi2w , VexRvm_Lx , E(660F38,75,_,x,_,1,4,FVM), 0 , 114, 0 , 363, 163), // #1345 + INST(Vpermil2pd , VexRvrmiRvmri_Lx , V(660F3A,49,_,x,x,_,_,_ ), 0 , 75 , 0 , 384, 168), // #1346 + INST(Vpermil2ps , VexRvrmiRvmri_Lx , V(660F3A,48,_,x,x,_,_,_ ), 0 , 75 , 0 , 384, 168), // #1347 + INST(Vpermilpd , VexRvmRmi_Lx , V(660F38,0D,_,x,0,1,4,FV ), V(660F3A,05,_,x,0,1,4,FV ), 211, 112, 385, 145), // #1348 + INST(Vpermilps , VexRvmRmi_Lx , V(660F38,0C,_,x,0,0,4,FV ), V(660F3A,04,_,x,0,0,4,FV ), 111, 113, 386, 145), // #1349 + INST(Vpermpd , VexRvmRmi_Lx , E(660F38,16,_,x,1,1,4,FV ), V(660F3A,01,_,x,1,1,4,FV ), 214, 114, 387, 169), // #1350 + INST(Vpermps , VexRvm_Lx , V(660F38,16,_,x,0,0,4,FV ), 0 , 111, 0 , 383, 169), // #1351 + INST(Vpermq , VexRvmRmi_Lx , E(660F38,36,_,x,_,1,4,FV ), V(660F3A,00,_,x,1,1,4,FV ), 114, 115, 387, 169), // #1352 + INST(Vpermt2b , VexRvm_Lx , E(660F38,7D,_,x,_,0,4,FVM), 0 , 115, 0 , 363, 190), // #1353 INST(Vpermt2d , VexRvm_Lx , E(660F38,7E,_,x,_,0,4,FV ), 0 , 115, 0 , 218, 152), // #1354 INST(Vpermt2pd , VexRvm_Lx , E(660F38,7F,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1355 INST(Vpermt2ps , VexRvm_Lx , E(660F38,7F,_,x,_,0,4,FV ), 0 , 115, 0 , 218, 152), // #1356 INST(Vpermt2q , VexRvm_Lx , E(660F38,7E,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1357 - INST(Vpermt2w , VexRvm_Lx , E(660F38,7D,_,x,_,1,4,FVM), 0 , 114, 0 , 364, 163), // #1358 - INST(Vpermw , VexRvm_Lx , E(660F38,8D,_,x,_,1,4,FVM), 0 , 114, 0 , 364, 163), // #1359 + INST(Vpermt2w , VexRvm_Lx , E(660F38,7D,_,x,_,1,4,FVM), 0 , 114, 0 , 363, 163), // #1358 + INST(Vpermw , VexRvm_Lx , E(660F38,8D,_,x,_,1,4,FVM), 0 , 114, 0 , 363, 163), // #1359 INST(Vpexpandb , VexRm_Lx , E(660F38,62,_,x,_,0,0,T1S), 0 , 212, 0 , 286, 186), // #1360 INST(Vpexpandd , VexRm_Lx , E(660F38,89,_,x,_,0,2,T1S), 0 , 130, 0 , 286, 152), // #1361 INST(Vpexpandq , VexRm_Lx , E(660F38,89,_,x,_,1,3,T1S), 0 , 129, 0 , 286, 152), // #1362 INST(Vpexpandw , VexRm_Lx , E(660F38,62,_,x,_,1,1,T1S), 0 , 213, 0 , 286, 186), // #1363 - INST(Vpextrb , VexMri , V(660F3A,14,_,0,0,I,0,T1S), 0 , 75 , 0 , 389, 191), // #1364 + INST(Vpextrb , VexMri , V(660F3A,14,_,0,0,I,0,T1S), 0 , 75 , 0 , 388, 191), // #1364 INST(Vpextrd , VexMri , V(660F3A,16,_,0,0,0,2,T1S), 0 , 179, 0 , 290, 192), // #1365 - INST(Vpextrq , VexMri , V(660F3A,16,_,0,1,1,3,T1S), 0 , 215, 0 , 390, 192), // #1366 - INST(Vpextrw , VexMri_Vpextrw , V(660F3A,15,_,0,0,I,1,T1S), 0 , 216, 0 , 391, 191), // #1367 + INST(Vpextrq , VexMri , V(660F3A,16,_,0,1,1,3,T1S), 0 , 215, 0 , 389, 192), // #1366 + INST(Vpextrw , VexMri_Vpextrw , V(660F3A,15,_,0,0,I,1,T1S), 0 , 216, 0 , 390, 191), // #1367 INST(Vpgatherdd , VexRmvRm_VM , V(660F38,90,_,x,0,_,_,_ ), E(660F38,90,_,x,_,0,2,T1S), 30 , 116, 309, 169), // #1368 INST(Vpgatherdq , VexRmvRm_VM , V(660F38,90,_,x,1,_,_,_ ), E(660F38,90,_,x,_,1,3,T1S), 191, 117, 308, 169), // #1369 INST(Vpgatherqd , VexRmvRm_VM , V(660F38,91,_,x,0,_,_,_ ), E(660F38,91,_,x,_,0,2,T1S), 30 , 118, 314, 169), // #1370 @@ -1440,85 +1440,85 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Vphsubsw , VexRvm_Lx , V(660F38,07,_,x,I,_,_,_ ), 0 , 30 , 0 , 206, 178), // #1391 INST(Vphsubw , VexRvm_Lx , V(660F38,05,_,x,I,_,_,_ ), 0 , 30 , 0 , 206, 178), // #1392 INST(Vphsubwd , VexRm , V(XOP_M9,E2,_,0,0,_,_,_ ), 0 , 81 , 0 , 208, 168), // #1393 - INST(Vpinsrb , VexRvmi , V(660F3A,20,_,0,0,I,0,T1S), 0 , 75 , 0 , 392, 191), // #1394 - INST(Vpinsrd , VexRvmi , V(660F3A,22,_,0,0,0,2,T1S), 0 , 179, 0 , 393, 192), // #1395 - INST(Vpinsrq , VexRvmi , V(660F3A,22,_,0,1,1,3,T1S), 0 , 215, 0 , 394, 192), // #1396 - INST(Vpinsrw , VexRvmi , V(660F00,C4,_,0,0,I,1,T1S), 0 , 217, 0 , 395, 191), // #1397 - INST(Vplzcntd , VexRm_Lx , E(660F38,44,_,x,_,0,4,FV ), 0 , 115, 0 , 381, 183), // #1398 - INST(Vplzcntq , VexRm_Lx , E(660F38,44,_,x,_,1,4,FV ), 0 , 114, 0 , 357, 183), // #1399 - INST(Vpmacsdd , VexRvmr , V(XOP_M8,9E,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1400 - INST(Vpmacsdqh , VexRvmr , V(XOP_M8,9F,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1401 - INST(Vpmacsdql , VexRvmr , V(XOP_M8,97,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1402 - INST(Vpmacssdd , VexRvmr , V(XOP_M8,8E,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1403 - INST(Vpmacssdqh , VexRvmr , V(XOP_M8,8F,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1404 - INST(Vpmacssdql , VexRvmr , V(XOP_M8,87,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1405 - INST(Vpmacsswd , VexRvmr , V(XOP_M8,86,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1406 - INST(Vpmacssww , VexRvmr , V(XOP_M8,85,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1407 - INST(Vpmacswd , VexRvmr , V(XOP_M8,96,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1408 - INST(Vpmacsww , VexRvmr , V(XOP_M8,95,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1409 - INST(Vpmadcsswd , VexRvmr , V(XOP_M8,A6,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1410 - INST(Vpmadcswd , VexRvmr , V(XOP_M8,B6,_,0,0,_,_,_ ), 0 , 210, 0 , 396, 168), // #1411 - INST(Vpmadd52huq , VexRvm_Lx , V(660F38,B5,_,x,1,1,4,FV ), 0 , 184, 0 , 397, 193), // #1412 - INST(Vpmadd52luq , VexRvm_Lx , V(660F38,B4,_,x,1,1,4,FV ), 0 , 184, 0 , 397, 193), // #1413 + INST(Vpinsrb , VexRvmi , V(660F3A,20,_,0,0,I,0,T1S), 0 , 75 , 0 , 391, 191), // #1394 + INST(Vpinsrd , VexRvmi , V(660F3A,22,_,0,0,0,2,T1S), 0 , 179, 0 , 392, 192), // #1395 + INST(Vpinsrq , VexRvmi , V(660F3A,22,_,0,1,1,3,T1S), 0 , 215, 0 , 393, 192), // #1396 + INST(Vpinsrw , VexRvmi , V(660F00,C4,_,0,0,I,1,T1S), 0 , 217, 0 , 394, 191), // #1397 + INST(Vplzcntd , VexRm_Lx , E(660F38,44,_,x,_,0,4,FV ), 0 , 115, 0 , 380, 183), // #1398 + INST(Vplzcntq , VexRm_Lx , E(660F38,44,_,x,_,1,4,FV ), 0 , 114, 0 , 356, 183), // #1399 + INST(Vpmacsdd , VexRvmr , V(XOP_M8,9E,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1400 + INST(Vpmacsdqh , VexRvmr , V(XOP_M8,9F,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1401 + INST(Vpmacsdql , VexRvmr , V(XOP_M8,97,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1402 + INST(Vpmacssdd , VexRvmr , V(XOP_M8,8E,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1403 + INST(Vpmacssdqh , VexRvmr , V(XOP_M8,8F,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1404 + INST(Vpmacssdql , VexRvmr , V(XOP_M8,87,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1405 + INST(Vpmacsswd , VexRvmr , V(XOP_M8,86,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1406 + INST(Vpmacssww , VexRvmr , V(XOP_M8,85,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1407 + INST(Vpmacswd , VexRvmr , V(XOP_M8,96,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1408 + INST(Vpmacsww , VexRvmr , V(XOP_M8,95,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1409 + INST(Vpmadcsswd , VexRvmr , V(XOP_M8,A6,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1410 + INST(Vpmadcswd , VexRvmr , V(XOP_M8,B6,_,0,0,_,_,_ ), 0 , 210, 0 , 395, 168), // #1411 + INST(Vpmadd52huq , VexRvm_Lx , V(660F38,B5,_,x,1,1,4,FV ), 0 , 184, 0 , 396, 193), // #1412 + INST(Vpmadd52luq , VexRvm_Lx , V(660F38,B4,_,x,1,1,4,FV ), 0 , 184, 0 , 396, 193), // #1413 INST(Vpmaddubsw , VexRvm_Lx , V(660F38,04,_,x,I,I,4,FVM), 0 , 111, 0 , 322, 181), // #1414 INST(Vpmaddwd , VexRvm_Lx , V(660F00,F5,_,x,I,I,4,FVM), 0 , 145, 0 , 322, 181), // #1415 INST(Vpmaskmovd , VexRvmMvr_Lx , V(660F38,8C,_,x,0,_,_,_ ), V(660F38,8E,_,x,0,_,_,_ ), 30 , 120, 329, 156), // #1416 INST(Vpmaskmovq , VexRvmMvr_Lx , V(660F38,8C,_,x,1,_,_,_ ), V(660F38,8E,_,x,1,_,_,_ ), 191, 121, 329, 156), // #1417 - INST(Vpmaxsb , VexRvm_Lx , V(660F38,3C,_,x,I,I,4,FVM), 0 , 111, 0 , 398, 181), // #1418 + INST(Vpmaxsb , VexRvm_Lx , V(660F38,3C,_,x,I,I,4,FVM), 0 , 111, 0 , 397, 181), // #1418 INST(Vpmaxsd , VexRvm_Lx , V(660F38,3D,_,x,I,0,4,FV ), 0 , 111, 0 , 215, 157), // #1419 INST(Vpmaxsq , VexRvm_Lx , E(660F38,3D,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1420 - INST(Vpmaxsw , VexRvm_Lx , V(660F00,EE,_,x,I,I,4,FVM), 0 , 145, 0 , 398, 181), // #1421 - INST(Vpmaxub , VexRvm_Lx , V(660F00,DE,_,x,I,I,4,FVM), 0 , 145, 0 , 398, 181), // #1422 + INST(Vpmaxsw , VexRvm_Lx , V(660F00,EE,_,x,I,I,4,FVM), 0 , 145, 0 , 397, 181), // #1421 + INST(Vpmaxub , VexRvm_Lx , V(660F00,DE,_,x,I,I,4,FVM), 0 , 145, 0 , 397, 181), // #1422 INST(Vpmaxud , VexRvm_Lx , V(660F38,3F,_,x,I,0,4,FV ), 0 , 111, 0 , 215, 157), // #1423 INST(Vpmaxuq , VexRvm_Lx , E(660F38,3F,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1424 - INST(Vpmaxuw , VexRvm_Lx , V(660F38,3E,_,x,I,I,4,FVM), 0 , 111, 0 , 398, 181), // #1425 - INST(Vpminsb , VexRvm_Lx , V(660F38,38,_,x,I,I,4,FVM), 0 , 111, 0 , 398, 181), // #1426 + INST(Vpmaxuw , VexRvm_Lx , V(660F38,3E,_,x,I,I,4,FVM), 0 , 111, 0 , 397, 181), // #1425 + INST(Vpminsb , VexRvm_Lx , V(660F38,38,_,x,I,I,4,FVM), 0 , 111, 0 , 397, 181), // #1426 INST(Vpminsd , VexRvm_Lx , V(660F38,39,_,x,I,0,4,FV ), 0 , 111, 0 , 215, 157), // #1427 INST(Vpminsq , VexRvm_Lx , E(660F38,39,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1428 - INST(Vpminsw , VexRvm_Lx , V(660F00,EA,_,x,I,I,4,FVM), 0 , 145, 0 , 398, 181), // #1429 - INST(Vpminub , VexRvm_Lx , V(660F00,DA,_,x,I,_,4,FVM), 0 , 145, 0 , 398, 181), // #1430 + INST(Vpminsw , VexRvm_Lx , V(660F00,EA,_,x,I,I,4,FVM), 0 , 145, 0 , 397, 181), // #1429 + INST(Vpminub , VexRvm_Lx , V(660F00,DA,_,x,I,_,4,FVM), 0 , 145, 0 , 397, 181), // #1430 INST(Vpminud , VexRvm_Lx , V(660F38,3B,_,x,I,0,4,FV ), 0 , 111, 0 , 215, 157), // #1431 INST(Vpminuq , VexRvm_Lx , E(660F38,3B,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1432 - INST(Vpminuw , VexRvm_Lx , V(660F38,3A,_,x,I,_,4,FVM), 0 , 111, 0 , 398, 181), // #1433 - INST(Vpmovb2m , VexRm_Lx , E(F30F38,29,_,x,_,0,_,_ ), 0 , 208, 0 , 399, 163), // #1434 - INST(Vpmovd2m , VexRm_Lx , E(F30F38,39,_,x,_,0,_,_ ), 0 , 208, 0 , 399, 155), // #1435 - INST(Vpmovdb , VexMr_Lx , E(F30F38,31,_,x,_,0,2,QVM), 0 , 218, 0 , 400, 152), // #1436 - INST(Vpmovdw , VexMr_Lx , E(F30F38,33,_,x,_,0,3,HVM), 0 , 219, 0 , 401, 152), // #1437 - INST(Vpmovm2b , VexRm_Lx , E(F30F38,28,_,x,_,0,_,_ ), 0 , 208, 0 , 367, 163), // #1438 - INST(Vpmovm2d , VexRm_Lx , E(F30F38,38,_,x,_,0,_,_ ), 0 , 208, 0 , 367, 155), // #1439 - INST(Vpmovm2q , VexRm_Lx , E(F30F38,38,_,x,_,1,_,_ ), 0 , 207, 0 , 367, 155), // #1440 - INST(Vpmovm2w , VexRm_Lx , E(F30F38,28,_,x,_,1,_,_ ), 0 , 207, 0 , 367, 163), // #1441 - INST(Vpmovmskb , VexRm_Lx , V(660F00,D7,_,x,I,_,_,_ ), 0 , 71 , 0 , 343, 178), // #1442 - INST(Vpmovq2m , VexRm_Lx , E(F30F38,39,_,x,_,1,_,_ ), 0 , 207, 0 , 399, 155), // #1443 - INST(Vpmovqb , VexMr_Lx , E(F30F38,32,_,x,_,0,1,OVM), 0 , 220, 0 , 402, 152), // #1444 - INST(Vpmovqd , VexMr_Lx , E(F30F38,35,_,x,_,0,3,HVM), 0 , 219, 0 , 401, 152), // #1445 - INST(Vpmovqw , VexMr_Lx , E(F30F38,34,_,x,_,0,2,QVM), 0 , 218, 0 , 400, 152), // #1446 - INST(Vpmovsdb , VexMr_Lx , E(F30F38,21,_,x,_,0,2,QVM), 0 , 218, 0 , 400, 152), // #1447 - INST(Vpmovsdw , VexMr_Lx , E(F30F38,23,_,x,_,0,3,HVM), 0 , 219, 0 , 401, 152), // #1448 - INST(Vpmovsqb , VexMr_Lx , E(F30F38,22,_,x,_,0,1,OVM), 0 , 220, 0 , 402, 152), // #1449 - INST(Vpmovsqd , VexMr_Lx , E(F30F38,25,_,x,_,0,3,HVM), 0 , 219, 0 , 401, 152), // #1450 - INST(Vpmovsqw , VexMr_Lx , E(F30F38,24,_,x,_,0,2,QVM), 0 , 218, 0 , 400, 152), // #1451 - INST(Vpmovswb , VexMr_Lx , E(F30F38,20,_,x,_,0,3,HVM), 0 , 219, 0 , 401, 163), // #1452 - INST(Vpmovsxbd , VexRm_Lx , V(660F38,21,_,x,I,I,2,QVM), 0 , 221, 0 , 403, 157), // #1453 - INST(Vpmovsxbq , VexRm_Lx , V(660F38,22,_,x,I,I,1,OVM), 0 , 222, 0 , 404, 157), // #1454 - INST(Vpmovsxbw , VexRm_Lx , V(660F38,20,_,x,I,I,3,HVM), 0 , 140, 0 , 405, 181), // #1455 - INST(Vpmovsxdq , VexRm_Lx , V(660F38,25,_,x,I,0,3,HVM), 0 , 140, 0 , 405, 157), // #1456 - INST(Vpmovsxwd , VexRm_Lx , V(660F38,23,_,x,I,I,3,HVM), 0 , 140, 0 , 405, 157), // #1457 - INST(Vpmovsxwq , VexRm_Lx , V(660F38,24,_,x,I,I,2,QVM), 0 , 221, 0 , 403, 157), // #1458 - INST(Vpmovusdb , VexMr_Lx , E(F30F38,11,_,x,_,0,2,QVM), 0 , 218, 0 , 400, 152), // #1459 - INST(Vpmovusdw , VexMr_Lx , E(F30F38,13,_,x,_,0,3,HVM), 0 , 219, 0 , 401, 152), // #1460 - INST(Vpmovusqb , VexMr_Lx , E(F30F38,12,_,x,_,0,1,OVM), 0 , 220, 0 , 402, 152), // #1461 - INST(Vpmovusqd , VexMr_Lx , E(F30F38,15,_,x,_,0,3,HVM), 0 , 219, 0 , 401, 152), // #1462 - INST(Vpmovusqw , VexMr_Lx , E(F30F38,14,_,x,_,0,2,QVM), 0 , 218, 0 , 400, 152), // #1463 - INST(Vpmovuswb , VexMr_Lx , E(F30F38,10,_,x,_,0,3,HVM), 0 , 219, 0 , 401, 163), // #1464 - INST(Vpmovw2m , VexRm_Lx , E(F30F38,29,_,x,_,1,_,_ ), 0 , 207, 0 , 399, 163), // #1465 - INST(Vpmovwb , VexMr_Lx , E(F30F38,30,_,x,_,0,3,HVM), 0 , 219, 0 , 401, 163), // #1466 - INST(Vpmovzxbd , VexRm_Lx , V(660F38,31,_,x,I,I,2,QVM), 0 , 221, 0 , 403, 157), // #1467 - INST(Vpmovzxbq , VexRm_Lx , V(660F38,32,_,x,I,I,1,OVM), 0 , 222, 0 , 404, 157), // #1468 - INST(Vpmovzxbw , VexRm_Lx , V(660F38,30,_,x,I,I,3,HVM), 0 , 140, 0 , 405, 181), // #1469 - INST(Vpmovzxdq , VexRm_Lx , V(660F38,35,_,x,I,0,3,HVM), 0 , 140, 0 , 405, 157), // #1470 - INST(Vpmovzxwd , VexRm_Lx , V(660F38,33,_,x,I,I,3,HVM), 0 , 140, 0 , 405, 157), // #1471 - INST(Vpmovzxwq , VexRm_Lx , V(660F38,34,_,x,I,I,2,QVM), 0 , 221, 0 , 403, 157), // #1472 + INST(Vpminuw , VexRvm_Lx , V(660F38,3A,_,x,I,_,4,FVM), 0 , 111, 0 , 397, 181), // #1433 + INST(Vpmovb2m , VexRm_Lx , E(F30F38,29,_,x,_,0,_,_ ), 0 , 208, 0 , 398, 163), // #1434 + INST(Vpmovd2m , VexRm_Lx , E(F30F38,39,_,x,_,0,_,_ ), 0 , 208, 0 , 398, 155), // #1435 + INST(Vpmovdb , VexMr_Lx , E(F30F38,31,_,x,_,0,2,QVM), 0 , 218, 0 , 399, 152), // #1436 + INST(Vpmovdw , VexMr_Lx , E(F30F38,33,_,x,_,0,3,HVM), 0 , 219, 0 , 400, 152), // #1437 + INST(Vpmovm2b , VexRm_Lx , E(F30F38,28,_,x,_,0,_,_ ), 0 , 208, 0 , 366, 163), // #1438 + INST(Vpmovm2d , VexRm_Lx , E(F30F38,38,_,x,_,0,_,_ ), 0 , 208, 0 , 366, 155), // #1439 + INST(Vpmovm2q , VexRm_Lx , E(F30F38,38,_,x,_,1,_,_ ), 0 , 207, 0 , 366, 155), // #1440 + INST(Vpmovm2w , VexRm_Lx , E(F30F38,28,_,x,_,1,_,_ ), 0 , 207, 0 , 366, 163), // #1441 + INST(Vpmovmskb , VexRm_Lx , V(660F00,D7,_,x,I,_,_,_ ), 0 , 71 , 0 , 342, 178), // #1442 + INST(Vpmovq2m , VexRm_Lx , E(F30F38,39,_,x,_,1,_,_ ), 0 , 207, 0 , 398, 155), // #1443 + INST(Vpmovqb , VexMr_Lx , E(F30F38,32,_,x,_,0,1,OVM), 0 , 220, 0 , 401, 152), // #1444 + INST(Vpmovqd , VexMr_Lx , E(F30F38,35,_,x,_,0,3,HVM), 0 , 219, 0 , 400, 152), // #1445 + INST(Vpmovqw , VexMr_Lx , E(F30F38,34,_,x,_,0,2,QVM), 0 , 218, 0 , 399, 152), // #1446 + INST(Vpmovsdb , VexMr_Lx , E(F30F38,21,_,x,_,0,2,QVM), 0 , 218, 0 , 399, 152), // #1447 + INST(Vpmovsdw , VexMr_Lx , E(F30F38,23,_,x,_,0,3,HVM), 0 , 219, 0 , 400, 152), // #1448 + INST(Vpmovsqb , VexMr_Lx , E(F30F38,22,_,x,_,0,1,OVM), 0 , 220, 0 , 401, 152), // #1449 + INST(Vpmovsqd , VexMr_Lx , E(F30F38,25,_,x,_,0,3,HVM), 0 , 219, 0 , 400, 152), // #1450 + INST(Vpmovsqw , VexMr_Lx , E(F30F38,24,_,x,_,0,2,QVM), 0 , 218, 0 , 399, 152), // #1451 + INST(Vpmovswb , VexMr_Lx , E(F30F38,20,_,x,_,0,3,HVM), 0 , 219, 0 , 400, 163), // #1452 + INST(Vpmovsxbd , VexRm_Lx , V(660F38,21,_,x,I,I,2,QVM), 0 , 221, 0 , 402, 157), // #1453 + INST(Vpmovsxbq , VexRm_Lx , V(660F38,22,_,x,I,I,1,OVM), 0 , 222, 0 , 403, 157), // #1454 + INST(Vpmovsxbw , VexRm_Lx , V(660F38,20,_,x,I,I,3,HVM), 0 , 140, 0 , 404, 181), // #1455 + INST(Vpmovsxdq , VexRm_Lx , V(660F38,25,_,x,I,0,3,HVM), 0 , 140, 0 , 404, 157), // #1456 + INST(Vpmovsxwd , VexRm_Lx , V(660F38,23,_,x,I,I,3,HVM), 0 , 140, 0 , 404, 157), // #1457 + INST(Vpmovsxwq , VexRm_Lx , V(660F38,24,_,x,I,I,2,QVM), 0 , 221, 0 , 402, 157), // #1458 + INST(Vpmovusdb , VexMr_Lx , E(F30F38,11,_,x,_,0,2,QVM), 0 , 218, 0 , 399, 152), // #1459 + INST(Vpmovusdw , VexMr_Lx , E(F30F38,13,_,x,_,0,3,HVM), 0 , 219, 0 , 400, 152), // #1460 + INST(Vpmovusqb , VexMr_Lx , E(F30F38,12,_,x,_,0,1,OVM), 0 , 220, 0 , 401, 152), // #1461 + INST(Vpmovusqd , VexMr_Lx , E(F30F38,15,_,x,_,0,3,HVM), 0 , 219, 0 , 400, 152), // #1462 + INST(Vpmovusqw , VexMr_Lx , E(F30F38,14,_,x,_,0,2,QVM), 0 , 218, 0 , 399, 152), // #1463 + INST(Vpmovuswb , VexMr_Lx , E(F30F38,10,_,x,_,0,3,HVM), 0 , 219, 0 , 400, 163), // #1464 + INST(Vpmovw2m , VexRm_Lx , E(F30F38,29,_,x,_,1,_,_ ), 0 , 207, 0 , 398, 163), // #1465 + INST(Vpmovwb , VexMr_Lx , E(F30F38,30,_,x,_,0,3,HVM), 0 , 219, 0 , 400, 163), // #1466 + INST(Vpmovzxbd , VexRm_Lx , V(660F38,31,_,x,I,I,2,QVM), 0 , 221, 0 , 402, 157), // #1467 + INST(Vpmovzxbq , VexRm_Lx , V(660F38,32,_,x,I,I,1,OVM), 0 , 222, 0 , 403, 157), // #1468 + INST(Vpmovzxbw , VexRm_Lx , V(660F38,30,_,x,I,I,3,HVM), 0 , 140, 0 , 404, 181), // #1469 + INST(Vpmovzxdq , VexRm_Lx , V(660F38,35,_,x,I,0,3,HVM), 0 , 140, 0 , 404, 157), // #1470 + INST(Vpmovzxwd , VexRm_Lx , V(660F38,33,_,x,I,I,3,HVM), 0 , 140, 0 , 404, 157), // #1471 + INST(Vpmovzxwq , VexRm_Lx , V(660F38,34,_,x,I,I,2,QVM), 0 , 221, 0 , 402, 157), // #1472 INST(Vpmuldq , VexRvm_Lx , V(660F38,28,_,x,I,1,4,FV ), 0 , 211, 0 , 212, 157), // #1473 INST(Vpmulhrsw , VexRvm_Lx , V(660F38,0B,_,x,I,I,4,FVM), 0 , 111, 0 , 322, 181), // #1474 INST(Vpmulhuw , VexRvm_Lx , V(660F00,E4,_,x,I,I,4,FVM), 0 , 145, 0 , 322, 181), // #1475 @@ -1529,97 +1529,97 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Vpmultishiftqb , VexRvm_Lx , E(660F38,83,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 190), // #1480 INST(Vpmuludq , VexRvm_Lx , V(660F00,F4,_,x,I,1,4,FV ), 0 , 104, 0 , 212, 157), // #1481 INST(Vpopcntb , VexRm_Lx , E(660F38,54,_,x,_,0,4,FV ), 0 , 115, 0 , 286, 194), // #1482 - INST(Vpopcntd , VexRm_Lx , E(660F38,55,_,x,_,0,4,FVM), 0 , 115, 0 , 381, 195), // #1483 - INST(Vpopcntq , VexRm_Lx , E(660F38,55,_,x,_,1,4,FVM), 0 , 114, 0 , 357, 195), // #1484 + INST(Vpopcntd , VexRm_Lx , E(660F38,55,_,x,_,0,4,FVM), 0 , 115, 0 , 380, 195), // #1483 + INST(Vpopcntq , VexRm_Lx , E(660F38,55,_,x,_,1,4,FVM), 0 , 114, 0 , 356, 195), // #1484 INST(Vpopcntw , VexRm_Lx , E(660F38,54,_,x,_,1,4,FV ), 0 , 114, 0 , 286, 194), // #1485 - INST(Vpor , VexRvm_Lx , V(660F00,EB,_,x,I,_,_,_ ), 0 , 71 , 0 , 358, 178), // #1486 - INST(Vpord , VexRvm_Lx , E(660F00,EB,_,x,_,0,4,FV ), 0 , 200, 0 , 359, 152), // #1487 - INST(Vporq , VexRvm_Lx , E(660F00,EB,_,x,_,1,4,FV ), 0 , 136, 0 , 363, 152), // #1488 - INST(Vpperm , VexRvrmRvmr , V(XOP_M8,A3,_,0,x,_,_,_ ), 0 , 210, 0 , 406, 168), // #1489 - INST(Vprold , VexVmi_Lx , E(660F00,72,1,x,_,0,4,FV ), 0 , 223, 0 , 407, 152), // #1490 - INST(Vprolq , VexVmi_Lx , E(660F00,72,1,x,_,1,4,FV ), 0 , 224, 0 , 408, 152), // #1491 + INST(Vpor , VexRvm_Lx , V(660F00,EB,_,x,I,_,_,_ ), 0 , 71 , 0 , 357, 178), // #1486 + INST(Vpord , VexRvm_Lx , E(660F00,EB,_,x,_,0,4,FV ), 0 , 200, 0 , 358, 152), // #1487 + INST(Vporq , VexRvm_Lx , E(660F00,EB,_,x,_,1,4,FV ), 0 , 136, 0 , 362, 152), // #1488 + INST(Vpperm , VexRvrmRvmr , V(XOP_M8,A3,_,0,x,_,_,_ ), 0 , 210, 0 , 405, 168), // #1489 + INST(Vprold , VexVmi_Lx , E(660F00,72,1,x,_,0,4,FV ), 0 , 223, 0 , 406, 152), // #1490 + INST(Vprolq , VexVmi_Lx , E(660F00,72,1,x,_,1,4,FV ), 0 , 224, 0 , 407, 152), // #1491 INST(Vprolvd , VexRvm_Lx , E(660F38,15,_,x,_,0,4,FV ), 0 , 115, 0 , 218, 152), // #1492 INST(Vprolvq , VexRvm_Lx , E(660F38,15,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1493 - INST(Vprord , VexVmi_Lx , E(660F00,72,0,x,_,0,4,FV ), 0 , 200, 0 , 407, 152), // #1494 - INST(Vprorq , VexVmi_Lx , E(660F00,72,0,x,_,1,4,FV ), 0 , 136, 0 , 408, 152), // #1495 + INST(Vprord , VexVmi_Lx , E(660F00,72,0,x,_,0,4,FV ), 0 , 200, 0 , 406, 152), // #1494 + INST(Vprorq , VexVmi_Lx , E(660F00,72,0,x,_,1,4,FV ), 0 , 136, 0 , 407, 152), // #1495 INST(Vprorvd , VexRvm_Lx , E(660F38,14,_,x,_,0,4,FV ), 0 , 115, 0 , 218, 152), // #1496 INST(Vprorvq , VexRvm_Lx , E(660F38,14,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1497 - INST(Vprotb , VexRvmRmvRmi , V(XOP_M9,90,_,0,x,_,_,_ ), V(XOP_M8,C0,_,0,x,_,_,_ ), 81 , 122, 409, 168), // #1498 - INST(Vprotd , VexRvmRmvRmi , V(XOP_M9,92,_,0,x,_,_,_ ), V(XOP_M8,C2,_,0,x,_,_,_ ), 81 , 123, 409, 168), // #1499 - INST(Vprotq , VexRvmRmvRmi , V(XOP_M9,93,_,0,x,_,_,_ ), V(XOP_M8,C3,_,0,x,_,_,_ ), 81 , 124, 409, 168), // #1500 - INST(Vprotw , VexRvmRmvRmi , V(XOP_M9,91,_,0,x,_,_,_ ), V(XOP_M8,C1,_,0,x,_,_,_ ), 81 , 125, 409, 168), // #1501 + INST(Vprotb , VexRvmRmvRmi , V(XOP_M9,90,_,0,x,_,_,_ ), V(XOP_M8,C0,_,0,x,_,_,_ ), 81 , 122, 408, 168), // #1498 + INST(Vprotd , VexRvmRmvRmi , V(XOP_M9,92,_,0,x,_,_,_ ), V(XOP_M8,C2,_,0,x,_,_,_ ), 81 , 123, 408, 168), // #1499 + INST(Vprotq , VexRvmRmvRmi , V(XOP_M9,93,_,0,x,_,_,_ ), V(XOP_M8,C3,_,0,x,_,_,_ ), 81 , 124, 408, 168), // #1500 + INST(Vprotw , VexRvmRmvRmi , V(XOP_M9,91,_,0,x,_,_,_ ), V(XOP_M8,C1,_,0,x,_,_,_ ), 81 , 125, 408, 168), // #1501 INST(Vpsadbw , VexRvm_Lx , V(660F00,F6,_,x,I,I,4,FVM), 0 , 145, 0 , 207, 181), // #1502 - INST(Vpscatterdd , VexMr_VM , E(660F38,A0,_,x,_,0,2,T1S), 0 , 130, 0 , 410, 152), // #1503 - INST(Vpscatterdq , VexMr_VM , E(660F38,A0,_,x,_,1,3,T1S), 0 , 129, 0 , 411, 152), // #1504 - INST(Vpscatterqd , VexMr_VM , E(660F38,A1,_,x,_,0,2,T1S), 0 , 130, 0 , 412, 152), // #1505 - INST(Vpscatterqq , VexMr_VM , E(660F38,A1,_,x,_,1,3,T1S), 0 , 129, 0 , 413, 152), // #1506 - INST(Vpshab , VexRvmRmv , V(XOP_M9,98,_,0,x,_,_,_ ), 0 , 81 , 0 , 414, 168), // #1507 - INST(Vpshad , VexRvmRmv , V(XOP_M9,9A,_,0,x,_,_,_ ), 0 , 81 , 0 , 414, 168), // #1508 - INST(Vpshaq , VexRvmRmv , V(XOP_M9,9B,_,0,x,_,_,_ ), 0 , 81 , 0 , 414, 168), // #1509 - INST(Vpshaw , VexRvmRmv , V(XOP_M9,99,_,0,x,_,_,_ ), 0 , 81 , 0 , 414, 168), // #1510 - INST(Vpshlb , VexRvmRmv , V(XOP_M9,94,_,0,x,_,_,_ ), 0 , 81 , 0 , 414, 168), // #1511 - INST(Vpshld , VexRvmRmv , V(XOP_M9,96,_,0,x,_,_,_ ), 0 , 81 , 0 , 414, 168), // #1512 + INST(Vpscatterdd , VexMr_VM , E(660F38,A0,_,x,_,0,2,T1S), 0 , 130, 0 , 409, 152), // #1503 + INST(Vpscatterdq , VexMr_VM , E(660F38,A0,_,x,_,1,3,T1S), 0 , 129, 0 , 410, 152), // #1504 + INST(Vpscatterqd , VexMr_VM , E(660F38,A1,_,x,_,0,2,T1S), 0 , 130, 0 , 411, 152), // #1505 + INST(Vpscatterqq , VexMr_VM , E(660F38,A1,_,x,_,1,3,T1S), 0 , 129, 0 , 412, 152), // #1506 + INST(Vpshab , VexRvmRmv , V(XOP_M9,98,_,0,x,_,_,_ ), 0 , 81 , 0 , 413, 168), // #1507 + INST(Vpshad , VexRvmRmv , V(XOP_M9,9A,_,0,x,_,_,_ ), 0 , 81 , 0 , 413, 168), // #1508 + INST(Vpshaq , VexRvmRmv , V(XOP_M9,9B,_,0,x,_,_,_ ), 0 , 81 , 0 , 413, 168), // #1509 + INST(Vpshaw , VexRvmRmv , V(XOP_M9,99,_,0,x,_,_,_ ), 0 , 81 , 0 , 413, 168), // #1510 + INST(Vpshlb , VexRvmRmv , V(XOP_M9,94,_,0,x,_,_,_ ), 0 , 81 , 0 , 413, 168), // #1511 + INST(Vpshld , VexRvmRmv , V(XOP_M9,96,_,0,x,_,_,_ ), 0 , 81 , 0 , 413, 168), // #1512 INST(Vpshldd , VexRvmi_Lx , E(660F3A,71,_,x,_,0,4,FV ), 0 , 112, 0 , 210, 186), // #1513 INST(Vpshldq , VexRvmi_Lx , E(660F3A,71,_,x,_,1,4,FV ), 0 , 113, 0 , 211, 186), // #1514 INST(Vpshldvd , VexRvm_Lx , E(660F38,71,_,x,_,0,4,FV ), 0 , 115, 0 , 218, 186), // #1515 INST(Vpshldvq , VexRvm_Lx , E(660F38,71,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 186), // #1516 - INST(Vpshldvw , VexRvm_Lx , E(660F38,70,_,x,_,1,4,FVM), 0 , 114, 0 , 364, 186), // #1517 + INST(Vpshldvw , VexRvm_Lx , E(660F38,70,_,x,_,1,4,FVM), 0 , 114, 0 , 363, 186), // #1517 INST(Vpshldw , VexRvmi_Lx , E(660F3A,70,_,x,_,1,4,FVM), 0 , 113, 0 , 282, 186), // #1518 - INST(Vpshlq , VexRvmRmv , V(XOP_M9,97,_,0,x,_,_,_ ), 0 , 81 , 0 , 414, 168), // #1519 - INST(Vpshlw , VexRvmRmv , V(XOP_M9,95,_,0,x,_,_,_ ), 0 , 81 , 0 , 414, 168), // #1520 + INST(Vpshlq , VexRvmRmv , V(XOP_M9,97,_,0,x,_,_,_ ), 0 , 81 , 0 , 413, 168), // #1519 + INST(Vpshlw , VexRvmRmv , V(XOP_M9,95,_,0,x,_,_,_ ), 0 , 81 , 0 , 413, 168), // #1520 INST(Vpshrdd , VexRvmi_Lx , E(660F3A,73,_,x,_,0,4,FV ), 0 , 112, 0 , 210, 186), // #1521 INST(Vpshrdq , VexRvmi_Lx , E(660F3A,73,_,x,_,1,4,FV ), 0 , 113, 0 , 211, 186), // #1522 INST(Vpshrdvd , VexRvm_Lx , E(660F38,73,_,x,_,0,4,FV ), 0 , 115, 0 , 218, 186), // #1523 INST(Vpshrdvq , VexRvm_Lx , E(660F38,73,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 186), // #1524 - INST(Vpshrdvw , VexRvm_Lx , E(660F38,72,_,x,_,1,4,FVM), 0 , 114, 0 , 364, 186), // #1525 + INST(Vpshrdvw , VexRvm_Lx , E(660F38,72,_,x,_,1,4,FVM), 0 , 114, 0 , 363, 186), // #1525 INST(Vpshrdw , VexRvmi_Lx , E(660F3A,72,_,x,_,1,4,FVM), 0 , 113, 0 , 282, 186), // #1526 INST(Vpshufb , VexRvm_Lx , V(660F38,00,_,x,I,I,4,FVM), 0 , 111, 0 , 322, 181), // #1527 - INST(Vpshufbitqmb , VexRvm_Lx , E(660F38,8F,_,x,0,0,4,FVM), 0 , 115, 0 , 415, 194), // #1528 - INST(Vpshufd , VexRmi_Lx , V(660F00,70,_,x,I,0,4,FV ), 0 , 145, 0 , 416, 157), // #1529 - INST(Vpshufhw , VexRmi_Lx , V(F30F00,70,_,x,I,I,4,FVM), 0 , 162, 0 , 417, 181), // #1530 - INST(Vpshuflw , VexRmi_Lx , V(F20F00,70,_,x,I,I,4,FVM), 0 , 225, 0 , 417, 181), // #1531 + INST(Vpshufbitqmb , VexRvm_Lx , E(660F38,8F,_,x,0,0,4,FVM), 0 , 115, 0 , 414, 194), // #1528 + INST(Vpshufd , VexRmi_Lx , V(660F00,70,_,x,I,0,4,FV ), 0 , 145, 0 , 415, 157), // #1529 + INST(Vpshufhw , VexRmi_Lx , V(F30F00,70,_,x,I,I,4,FVM), 0 , 162, 0 , 416, 181), // #1530 + INST(Vpshuflw , VexRmi_Lx , V(F20F00,70,_,x,I,I,4,FVM), 0 , 225, 0 , 416, 181), // #1531 INST(Vpsignb , VexRvm_Lx , V(660F38,08,_,x,I,_,_,_ ), 0 , 30 , 0 , 206, 178), // #1532 INST(Vpsignd , VexRvm_Lx , V(660F38,0A,_,x,I,_,_,_ ), 0 , 30 , 0 , 206, 178), // #1533 INST(Vpsignw , VexRvm_Lx , V(660F38,09,_,x,I,_,_,_ ), 0 , 30 , 0 , 206, 178), // #1534 - INST(Vpslld , VexRvmVmi_Lx_MEvex , V(660F00,F2,_,x,I,0,4,128), V(660F00,72,6,x,I,0,4,FV ), 226, 126, 418, 157), // #1535 - INST(Vpslldq , VexVmi_Lx_MEvex , V(660F00,73,7,x,I,I,4,FVM), 0 , 227, 0 , 419, 181), // #1536 - INST(Vpsllq , VexRvmVmi_Lx_MEvex , V(660F00,F3,_,x,I,1,4,128), V(660F00,73,6,x,I,1,4,FV ), 228, 127, 420, 157), // #1537 + INST(Vpslld , VexRvmVmi_Lx_MEvex , V(660F00,F2,_,x,I,0,4,128), V(660F00,72,6,x,I,0,4,FV ), 226, 126, 417, 157), // #1535 + INST(Vpslldq , VexVmi_Lx_MEvex , V(660F00,73,7,x,I,I,4,FVM), 0 , 227, 0 , 418, 181), // #1536 + INST(Vpsllq , VexRvmVmi_Lx_MEvex , V(660F00,F3,_,x,I,1,4,128), V(660F00,73,6,x,I,1,4,FV ), 228, 127, 419, 157), // #1537 INST(Vpsllvd , VexRvm_Lx , V(660F38,47,_,x,0,0,4,FV ), 0 , 111, 0 , 213, 169), // #1538 INST(Vpsllvq , VexRvm_Lx , V(660F38,47,_,x,1,1,4,FV ), 0 , 184, 0 , 212, 169), // #1539 - INST(Vpsllvw , VexRvm_Lx , E(660F38,12,_,x,_,1,4,FVM), 0 , 114, 0 , 364, 163), // #1540 - INST(Vpsllw , VexRvmVmi_Lx_MEvex , V(660F00,F1,_,x,I,I,4,128), V(660F00,71,6,x,I,I,4,FVM), 226, 128, 421, 181), // #1541 - INST(Vpsrad , VexRvmVmi_Lx_MEvex , V(660F00,E2,_,x,I,0,4,128), V(660F00,72,4,x,I,0,4,FV ), 226, 129, 418, 157), // #1542 - INST(Vpsraq , VexRvmVmi_Lx_MEvex , E(660F00,E2,_,x,_,1,4,128), E(660F00,72,4,x,_,1,4,FV ), 229, 130, 422, 152), // #1543 + INST(Vpsllvw , VexRvm_Lx , E(660F38,12,_,x,_,1,4,FVM), 0 , 114, 0 , 363, 163), // #1540 + INST(Vpsllw , VexRvmVmi_Lx_MEvex , V(660F00,F1,_,x,I,I,4,128), V(660F00,71,6,x,I,I,4,FVM), 226, 128, 420, 181), // #1541 + INST(Vpsrad , VexRvmVmi_Lx_MEvex , V(660F00,E2,_,x,I,0,4,128), V(660F00,72,4,x,I,0,4,FV ), 226, 129, 417, 157), // #1542 + INST(Vpsraq , VexRvmVmi_Lx_MEvex , E(660F00,E2,_,x,_,1,4,128), E(660F00,72,4,x,_,1,4,FV ), 229, 130, 421, 152), // #1543 INST(Vpsravd , VexRvm_Lx , V(660F38,46,_,x,0,0,4,FV ), 0 , 111, 0 , 213, 169), // #1544 INST(Vpsravq , VexRvm_Lx , E(660F38,46,_,x,_,1,4,FV ), 0 , 114, 0 , 217, 152), // #1545 - INST(Vpsravw , VexRvm_Lx , E(660F38,11,_,x,_,1,4,FVM), 0 , 114, 0 , 364, 163), // #1546 - INST(Vpsraw , VexRvmVmi_Lx_MEvex , V(660F00,E1,_,x,I,I,4,128), V(660F00,71,4,x,I,I,4,FVM), 226, 131, 421, 181), // #1547 - INST(Vpsrld , VexRvmVmi_Lx_MEvex , V(660F00,D2,_,x,I,0,4,128), V(660F00,72,2,x,I,0,4,FV ), 226, 132, 418, 157), // #1548 - INST(Vpsrldq , VexVmi_Lx_MEvex , V(660F00,73,3,x,I,I,4,FVM), 0 , 230, 0 , 419, 181), // #1549 - INST(Vpsrlq , VexRvmVmi_Lx_MEvex , V(660F00,D3,_,x,I,1,4,128), V(660F00,73,2,x,I,1,4,FV ), 228, 133, 420, 157), // #1550 + INST(Vpsravw , VexRvm_Lx , E(660F38,11,_,x,_,1,4,FVM), 0 , 114, 0 , 363, 163), // #1546 + INST(Vpsraw , VexRvmVmi_Lx_MEvex , V(660F00,E1,_,x,I,I,4,128), V(660F00,71,4,x,I,I,4,FVM), 226, 131, 420, 181), // #1547 + INST(Vpsrld , VexRvmVmi_Lx_MEvex , V(660F00,D2,_,x,I,0,4,128), V(660F00,72,2,x,I,0,4,FV ), 226, 132, 417, 157), // #1548 + INST(Vpsrldq , VexVmi_Lx_MEvex , V(660F00,73,3,x,I,I,4,FVM), 0 , 230, 0 , 418, 181), // #1549 + INST(Vpsrlq , VexRvmVmi_Lx_MEvex , V(660F00,D3,_,x,I,1,4,128), V(660F00,73,2,x,I,1,4,FV ), 228, 133, 419, 157), // #1550 INST(Vpsrlvd , VexRvm_Lx , V(660F38,45,_,x,0,0,4,FV ), 0 , 111, 0 , 213, 169), // #1551 INST(Vpsrlvq , VexRvm_Lx , V(660F38,45,_,x,1,1,4,FV ), 0 , 184, 0 , 212, 169), // #1552 - INST(Vpsrlvw , VexRvm_Lx , E(660F38,10,_,x,_,1,4,FVM), 0 , 114, 0 , 364, 163), // #1553 - INST(Vpsrlw , VexRvmVmi_Lx_MEvex , V(660F00,D1,_,x,I,I,4,128), V(660F00,71,2,x,I,I,4,FVM), 226, 134, 421, 181), // #1554 - INST(Vpsubb , VexRvm_Lx , V(660F00,F8,_,x,I,I,4,FVM), 0 , 145, 0 , 423, 181), // #1555 - INST(Vpsubd , VexRvm_Lx , V(660F00,FA,_,x,I,0,4,FV ), 0 , 145, 0 , 424, 157), // #1556 - INST(Vpsubq , VexRvm_Lx , V(660F00,FB,_,x,I,1,4,FV ), 0 , 104, 0 , 425, 157), // #1557 - INST(Vpsubsb , VexRvm_Lx , V(660F00,E8,_,x,I,I,4,FVM), 0 , 145, 0 , 423, 181), // #1558 - INST(Vpsubsw , VexRvm_Lx , V(660F00,E9,_,x,I,I,4,FVM), 0 , 145, 0 , 423, 181), // #1559 - INST(Vpsubusb , VexRvm_Lx , V(660F00,D8,_,x,I,I,4,FVM), 0 , 145, 0 , 423, 181), // #1560 - INST(Vpsubusw , VexRvm_Lx , V(660F00,D9,_,x,I,I,4,FVM), 0 , 145, 0 , 423, 181), // #1561 - INST(Vpsubw , VexRvm_Lx , V(660F00,F9,_,x,I,I,4,FVM), 0 , 145, 0 , 423, 181), // #1562 + INST(Vpsrlvw , VexRvm_Lx , E(660F38,10,_,x,_,1,4,FVM), 0 , 114, 0 , 363, 163), // #1553 + INST(Vpsrlw , VexRvmVmi_Lx_MEvex , V(660F00,D1,_,x,I,I,4,128), V(660F00,71,2,x,I,I,4,FVM), 226, 134, 420, 181), // #1554 + INST(Vpsubb , VexRvm_Lx , V(660F00,F8,_,x,I,I,4,FVM), 0 , 145, 0 , 422, 181), // #1555 + INST(Vpsubd , VexRvm_Lx , V(660F00,FA,_,x,I,0,4,FV ), 0 , 145, 0 , 423, 157), // #1556 + INST(Vpsubq , VexRvm_Lx , V(660F00,FB,_,x,I,1,4,FV ), 0 , 104, 0 , 424, 157), // #1557 + INST(Vpsubsb , VexRvm_Lx , V(660F00,E8,_,x,I,I,4,FVM), 0 , 145, 0 , 422, 181), // #1558 + INST(Vpsubsw , VexRvm_Lx , V(660F00,E9,_,x,I,I,4,FVM), 0 , 145, 0 , 422, 181), // #1559 + INST(Vpsubusb , VexRvm_Lx , V(660F00,D8,_,x,I,I,4,FVM), 0 , 145, 0 , 422, 181), // #1560 + INST(Vpsubusw , VexRvm_Lx , V(660F00,D9,_,x,I,I,4,FVM), 0 , 145, 0 , 422, 181), // #1561 + INST(Vpsubw , VexRvm_Lx , V(660F00,F9,_,x,I,I,4,FVM), 0 , 145, 0 , 422, 181), // #1562 INST(Vpternlogd , VexRvmi_Lx , E(660F3A,25,_,x,_,0,4,FV ), 0 , 112, 0 , 210, 152), // #1563 INST(Vpternlogq , VexRvmi_Lx , E(660F3A,25,_,x,_,1,4,FV ), 0 , 113, 0 , 211, 152), // #1564 INST(Vptest , VexRm_Lx , V(660F38,17,_,x,I,_,_,_ ), 0 , 30 , 0 , 305, 185), // #1565 - INST(Vptestmb , VexRvm_Lx , E(660F38,26,_,x,_,0,4,FVM), 0 , 115, 0 , 415, 163), // #1566 - INST(Vptestmd , VexRvm_Lx , E(660F38,27,_,x,_,0,4,FV ), 0 , 115, 0 , 426, 152), // #1567 - INST(Vptestmq , VexRvm_Lx , E(660F38,27,_,x,_,1,4,FV ), 0 , 114, 0 , 427, 152), // #1568 - INST(Vptestmw , VexRvm_Lx , E(660F38,26,_,x,_,1,4,FVM), 0 , 114, 0 , 415, 163), // #1569 - INST(Vptestnmb , VexRvm_Lx , E(F30F38,26,_,x,_,0,4,FVM), 0 , 171, 0 , 415, 163), // #1570 - INST(Vptestnmd , VexRvm_Lx , E(F30F38,27,_,x,_,0,4,FV ), 0 , 171, 0 , 426, 152), // #1571 - INST(Vptestnmq , VexRvm_Lx , E(F30F38,27,_,x,_,1,4,FV ), 0 , 231, 0 , 427, 152), // #1572 - INST(Vptestnmw , VexRvm_Lx , E(F30F38,26,_,x,_,1,4,FVM), 0 , 231, 0 , 415, 163), // #1573 + INST(Vptestmb , VexRvm_Lx , E(660F38,26,_,x,_,0,4,FVM), 0 , 115, 0 , 414, 163), // #1566 + INST(Vptestmd , VexRvm_Lx , E(660F38,27,_,x,_,0,4,FV ), 0 , 115, 0 , 425, 152), // #1567 + INST(Vptestmq , VexRvm_Lx , E(660F38,27,_,x,_,1,4,FV ), 0 , 114, 0 , 426, 152), // #1568 + INST(Vptestmw , VexRvm_Lx , E(660F38,26,_,x,_,1,4,FVM), 0 , 114, 0 , 414, 163), // #1569 + INST(Vptestnmb , VexRvm_Lx , E(F30F38,26,_,x,_,0,4,FVM), 0 , 171, 0 , 414, 163), // #1570 + INST(Vptestnmd , VexRvm_Lx , E(F30F38,27,_,x,_,0,4,FV ), 0 , 171, 0 , 425, 152), // #1571 + INST(Vptestnmq , VexRvm_Lx , E(F30F38,27,_,x,_,1,4,FV ), 0 , 231, 0 , 426, 152), // #1572 + INST(Vptestnmw , VexRvm_Lx , E(F30F38,26,_,x,_,1,4,FVM), 0 , 231, 0 , 414, 163), // #1573 INST(Vpunpckhbw , VexRvm_Lx , V(660F00,68,_,x,I,I,4,FVM), 0 , 145, 0 , 322, 181), // #1574 INST(Vpunpckhdq , VexRvm_Lx , V(660F00,6A,_,x,I,0,4,FV ), 0 , 145, 0 , 213, 157), // #1575 INST(Vpunpckhqdq , VexRvm_Lx , V(660F00,6D,_,x,I,1,4,FV ), 0 , 104, 0 , 212, 157), // #1576 @@ -1628,61 +1628,61 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Vpunpckldq , VexRvm_Lx , V(660F00,62,_,x,I,0,4,FV ), 0 , 145, 0 , 213, 157), // #1579 INST(Vpunpcklqdq , VexRvm_Lx , V(660F00,6C,_,x,I,1,4,FV ), 0 , 104, 0 , 212, 157), // #1580 INST(Vpunpcklwd , VexRvm_Lx , V(660F00,61,_,x,I,I,4,FVM), 0 , 145, 0 , 322, 181), // #1581 - INST(Vpxor , VexRvm_Lx , V(660F00,EF,_,x,I,_,_,_ ), 0 , 71 , 0 , 360, 178), // #1582 - INST(Vpxord , VexRvm_Lx , E(660F00,EF,_,x,_,0,4,FV ), 0 , 200, 0 , 361, 152), // #1583 - INST(Vpxorq , VexRvm_Lx , E(660F00,EF,_,x,_,1,4,FV ), 0 , 136, 0 , 362, 152), // #1584 + INST(Vpxor , VexRvm_Lx , V(660F00,EF,_,x,I,_,_,_ ), 0 , 71 , 0 , 359, 178), // #1582 + INST(Vpxord , VexRvm_Lx , E(660F00,EF,_,x,_,0,4,FV ), 0 , 200, 0 , 360, 152), // #1583 + INST(Vpxorq , VexRvm_Lx , E(660F00,EF,_,x,_,1,4,FV ), 0 , 136, 0 , 361, 152), // #1584 INST(Vrangepd , VexRvmi_Lx , E(660F3A,50,_,x,_,1,4,FV ), 0 , 113, 0 , 292, 155), // #1585 INST(Vrangeps , VexRvmi_Lx , E(660F3A,50,_,x,_,0,4,FV ), 0 , 112, 0 , 293, 155), // #1586 INST(Vrangesd , VexRvmi , E(660F3A,51,_,I,_,1,3,T1S), 0 , 182, 0 , 294, 76 ), // #1587 INST(Vrangess , VexRvmi , E(660F3A,51,_,I,_,0,2,T1S), 0 , 183, 0 , 295, 76 ), // #1588 - INST(Vrcp14pd , VexRm_Lx , E(660F38,4C,_,x,_,1,4,FV ), 0 , 114, 0 , 357, 152), // #1589 - INST(Vrcp14ps , VexRm_Lx , E(660F38,4C,_,x,_,0,4,FV ), 0 , 115, 0 , 381, 152), // #1590 - INST(Vrcp14sd , VexRvm , E(660F38,4D,_,I,_,1,3,T1S), 0 , 129, 0 , 428, 78 ), // #1591 - INST(Vrcp14ss , VexRvm , E(660F38,4D,_,I,_,0,2,T1S), 0 , 130, 0 , 429, 78 ), // #1592 + INST(Vrcp14pd , VexRm_Lx , E(660F38,4C,_,x,_,1,4,FV ), 0 , 114, 0 , 356, 152), // #1589 + INST(Vrcp14ps , VexRm_Lx , E(660F38,4C,_,x,_,0,4,FV ), 0 , 115, 0 , 380, 152), // #1590 + INST(Vrcp14sd , VexRvm , E(660F38,4D,_,I,_,1,3,T1S), 0 , 129, 0 , 427, 78 ), // #1591 + INST(Vrcp14ss , VexRvm , E(660F38,4D,_,I,_,0,2,T1S), 0 , 130, 0 , 428, 78 ), // #1592 INST(Vrcp28pd , VexRm , E(660F38,CA,_,2,_,1,4,FV ), 0 , 172, 0 , 284, 164), // #1593 INST(Vrcp28ps , VexRm , E(660F38,CA,_,2,_,0,4,FV ), 0 , 173, 0 , 285, 164), // #1594 INST(Vrcp28sd , VexRvm , E(660F38,CB,_,I,_,1,3,T1S), 0 , 129, 0 , 315, 164), // #1595 INST(Vrcp28ss , VexRvm , E(660F38,CB,_,I,_,0,2,T1S), 0 , 130, 0 , 316, 164), // #1596 - INST(Vrcpph , VexRm_Lx , E(66MAP6,4C,_,_,_,0,4,FV ), 0 , 185, 0 , 430, 148), // #1597 + INST(Vrcpph , VexRm_Lx , E(66MAP6,4C,_,_,_,0,4,FV ), 0 , 185, 0 , 429, 148), // #1597 INST(Vrcpps , VexRm_Lx , V(000F00,53,_,x,I,_,_,_ ), 0 , 74 , 0 , 305, 149), // #1598 - INST(Vrcpsh , VexRvm , E(66MAP6,4D,_,_,_,0,1,T1S), 0 , 187, 0 , 431, 148), // #1599 - INST(Vrcpss , VexRvm , V(F30F00,53,_,I,I,_,_,_ ), 0 , 201, 0 , 432, 149), // #1600 - INST(Vreducepd , VexRmi_Lx , E(660F3A,56,_,x,_,1,4,FV ), 0 , 113, 0 , 408, 155), // #1601 + INST(Vrcpsh , VexRvm , E(66MAP6,4D,_,_,_,0,1,T1S), 0 , 187, 0 , 430, 148), // #1599 + INST(Vrcpss , VexRvm , V(F30F00,53,_,I,I,_,_,_ ), 0 , 201, 0 , 431, 149), // #1600 + INST(Vreducepd , VexRmi_Lx , E(660F3A,56,_,x,_,1,4,FV ), 0 , 113, 0 , 407, 155), // #1601 INST(Vreduceph , VexRmi_Lx , E(000F3A,56,_,_,_,0,4,FV ), 0 , 124, 0 , 318, 146), // #1602 - INST(Vreduceps , VexRmi_Lx , E(660F3A,56,_,x,_,0,4,FV ), 0 , 112, 0 , 407, 155), // #1603 - INST(Vreducesd , VexRvmi , E(660F3A,57,_,I,_,1,3,T1S), 0 , 182, 0 , 433, 76 ), // #1604 + INST(Vreduceps , VexRmi_Lx , E(660F3A,56,_,x,_,0,4,FV ), 0 , 112, 0 , 406, 155), // #1603 + INST(Vreducesd , VexRvmi , E(660F3A,57,_,I,_,1,3,T1S), 0 , 182, 0 , 432, 76 ), // #1604 INST(Vreducesh , VexRvmi , E(000F3A,57,_,_,_,0,1,T1S), 0 , 190, 0 , 320, 148), // #1605 - INST(Vreducess , VexRvmi , E(660F3A,57,_,I,_,0,2,T1S), 0 , 183, 0 , 434, 76 ), // #1606 + INST(Vreducess , VexRvmi , E(660F3A,57,_,I,_,0,2,T1S), 0 , 183, 0 , 433, 76 ), // #1606 INST(Vrndscalepd , VexRmi_Lx , E(660F3A,09,_,x,_,1,4,FV ), 0 , 113, 0 , 317, 152), // #1607 INST(Vrndscaleph , VexRmi_Lx , E(000F3A,08,_,_,_,0,4,FV ), 0 , 124, 0 , 318, 146), // #1608 INST(Vrndscaleps , VexRmi_Lx , E(660F3A,08,_,x,_,0,4,FV ), 0 , 112, 0 , 319, 152), // #1609 INST(Vrndscalesd , VexRvmi , E(660F3A,0B,_,I,_,1,3,T1S), 0 , 182, 0 , 294, 78 ), // #1610 INST(Vrndscalesh , VexRvmi , E(000F3A,0A,_,_,_,0,1,T1S), 0 , 190, 0 , 320, 148), // #1611 INST(Vrndscaless , VexRvmi , E(660F3A,0A,_,I,_,0,2,T1S), 0 , 183, 0 , 295, 78 ), // #1612 - INST(Vroundpd , VexRmi_Lx , V(660F3A,09,_,x,I,_,_,_ ), 0 , 75 , 0 , 435, 149), // #1613 - INST(Vroundps , VexRmi_Lx , V(660F3A,08,_,x,I,_,_,_ ), 0 , 75 , 0 , 435, 149), // #1614 - INST(Vroundsd , VexRvmi , V(660F3A,0B,_,I,I,_,_,_ ), 0 , 75 , 0 , 436, 149), // #1615 - INST(Vroundss , VexRvmi , V(660F3A,0A,_,I,I,_,_,_ ), 0 , 75 , 0 , 437, 149), // #1616 - INST(Vrsqrt14pd , VexRm_Lx , E(660F38,4E,_,x,_,1,4,FV ), 0 , 114, 0 , 357, 152), // #1617 - INST(Vrsqrt14ps , VexRm_Lx , E(660F38,4E,_,x,_,0,4,FV ), 0 , 115, 0 , 381, 152), // #1618 - INST(Vrsqrt14sd , VexRvm , E(660F38,4F,_,I,_,1,3,T1S), 0 , 129, 0 , 428, 78 ), // #1619 - INST(Vrsqrt14ss , VexRvm , E(660F38,4F,_,I,_,0,2,T1S), 0 , 130, 0 , 429, 78 ), // #1620 + INST(Vroundpd , VexRmi_Lx , V(660F3A,09,_,x,I,_,_,_ ), 0 , 75 , 0 , 434, 149), // #1613 + INST(Vroundps , VexRmi_Lx , V(660F3A,08,_,x,I,_,_,_ ), 0 , 75 , 0 , 434, 149), // #1614 + INST(Vroundsd , VexRvmi , V(660F3A,0B,_,I,I,_,_,_ ), 0 , 75 , 0 , 435, 149), // #1615 + INST(Vroundss , VexRvmi , V(660F3A,0A,_,I,I,_,_,_ ), 0 , 75 , 0 , 436, 149), // #1616 + INST(Vrsqrt14pd , VexRm_Lx , E(660F38,4E,_,x,_,1,4,FV ), 0 , 114, 0 , 356, 152), // #1617 + INST(Vrsqrt14ps , VexRm_Lx , E(660F38,4E,_,x,_,0,4,FV ), 0 , 115, 0 , 380, 152), // #1618 + INST(Vrsqrt14sd , VexRvm , E(660F38,4F,_,I,_,1,3,T1S), 0 , 129, 0 , 427, 78 ), // #1619 + INST(Vrsqrt14ss , VexRvm , E(660F38,4F,_,I,_,0,2,T1S), 0 , 130, 0 , 428, 78 ), // #1620 INST(Vrsqrt28pd , VexRm , E(660F38,CC,_,2,_,1,4,FV ), 0 , 172, 0 , 284, 164), // #1621 INST(Vrsqrt28ps , VexRm , E(660F38,CC,_,2,_,0,4,FV ), 0 , 173, 0 , 285, 164), // #1622 INST(Vrsqrt28sd , VexRvm , E(660F38,CD,_,I,_,1,3,T1S), 0 , 129, 0 , 315, 164), // #1623 INST(Vrsqrt28ss , VexRvm , E(660F38,CD,_,I,_,0,2,T1S), 0 , 130, 0 , 316, 164), // #1624 - INST(Vrsqrtph , VexRm_Lx , E(66MAP6,4E,_,_,_,0,4,FV ), 0 , 185, 0 , 430, 146), // #1625 + INST(Vrsqrtph , VexRm_Lx , E(66MAP6,4E,_,_,_,0,4,FV ), 0 , 185, 0 , 429, 146), // #1625 INST(Vrsqrtps , VexRm_Lx , V(000F00,52,_,x,I,_,_,_ ), 0 , 74 , 0 , 305, 149), // #1626 - INST(Vrsqrtsh , VexRvm , E(66MAP6,4F,_,_,_,0,1,T1S), 0 , 187, 0 , 431, 148), // #1627 - INST(Vrsqrtss , VexRvm , V(F30F00,52,_,I,I,_,_,_ ), 0 , 201, 0 , 432, 149), // #1628 - INST(Vscalefpd , VexRvm_Lx , E(660F38,2C,_,x,_,1,4,FV ), 0 , 114, 0 , 438, 152), // #1629 + INST(Vrsqrtsh , VexRvm , E(66MAP6,4F,_,_,_,0,1,T1S), 0 , 187, 0 , 430, 148), // #1627 + INST(Vrsqrtss , VexRvm , V(F30F00,52,_,I,I,_,_,_ ), 0 , 201, 0 , 431, 149), // #1628 + INST(Vscalefpd , VexRvm_Lx , E(660F38,2C,_,x,_,1,4,FV ), 0 , 114, 0 , 437, 152), // #1629 INST(Vscalefph , VexRvm_Lx , E(66MAP6,2C,_,_,_,0,4,FV ), 0 , 185, 0 , 201, 146), // #1630 INST(Vscalefps , VexRvm_Lx , E(660F38,2C,_,x,_,0,4,FV ), 0 , 115, 0 , 291, 152), // #1631 INST(Vscalefsd , VexRvm , E(660F38,2D,_,I,_,1,3,T1S), 0 , 129, 0 , 257, 78 ), // #1632 INST(Vscalefsh , VexRvm , E(66MAP6,2D,_,_,_,0,1,T1S), 0 , 187, 0 , 204, 148), // #1633 INST(Vscalefss , VexRvm , E(660F38,2D,_,I,_,0,2,T1S), 0 , 130, 0 , 265, 78 ), // #1634 - INST(Vscatterdpd , VexMr_VM , E(660F38,A2,_,x,_,1,3,T1S), 0 , 129, 0 , 411, 152), // #1635 - INST(Vscatterdps , VexMr_VM , E(660F38,A2,_,x,_,0,2,T1S), 0 , 130, 0 , 410, 152), // #1636 + INST(Vscatterdpd , VexMr_VM , E(660F38,A2,_,x,_,1,3,T1S), 0 , 129, 0 , 410, 152), // #1635 + INST(Vscatterdps , VexMr_VM , E(660F38,A2,_,x,_,0,2,T1S), 0 , 130, 0 , 409, 152), // #1636 INST(Vscatterpf0dpd , VexM_VM , E(660F38,C6,5,2,_,1,3,T1S), 0 , 232, 0 , 310, 170), // #1637 INST(Vscatterpf0dps , VexM_VM , E(660F38,C6,5,2,_,0,2,T1S), 0 , 233, 0 , 311, 170), // #1638 INST(Vscatterpf0qpd , VexM_VM , E(660F38,C7,5,2,_,1,3,T1S), 0 , 232, 0 , 312, 170), // #1639 @@ -1691,23 +1691,23 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Vscatterpf1dps , VexM_VM , E(660F38,C6,6,2,_,0,2,T1S), 0 , 235, 0 , 311, 170), // #1642 INST(Vscatterpf1qpd , VexM_VM , E(660F38,C7,6,2,_,1,3,T1S), 0 , 234, 0 , 312, 170), // #1643 INST(Vscatterpf1qps , VexM_VM , E(660F38,C7,6,2,_,0,2,T1S), 0 , 235, 0 , 312, 170), // #1644 - INST(Vscatterqpd , VexMr_VM , E(660F38,A3,_,x,_,1,3,T1S), 0 , 129, 0 , 413, 152), // #1645 - INST(Vscatterqps , VexMr_VM , E(660F38,A3,_,x,_,0,2,T1S), 0 , 130, 0 , 412, 152), // #1646 - INST(Vsha512msg1 , VexRm , V(F20F38,CC,_,1,0,_,_,_ ), 0 , 236, 0 , 439, 196), // #1647 - INST(Vsha512msg2 , VexRm , V(F20F38,CD,_,1,0,_,_,_ ), 0 , 236, 0 , 440, 196), // #1648 - INST(Vsha512rnds2 , VexRvm , V(F20F38,CB,_,1,0,_,_,_ ), 0 , 236, 0 , 441, 196), // #1649 - INST(Vshuff32x4 , VexRvmi_Lx , E(660F3A,23,_,x,_,0,4,FV ), 0 , 112, 0 , 442, 152), // #1650 - INST(Vshuff64x2 , VexRvmi_Lx , E(660F3A,23,_,x,_,1,4,FV ), 0 , 113, 0 , 443, 152), // #1651 - INST(Vshufi32x4 , VexRvmi_Lx , E(660F3A,43,_,x,_,0,4,FV ), 0 , 112, 0 , 442, 152), // #1652 - INST(Vshufi64x2 , VexRvmi_Lx , E(660F3A,43,_,x,_,1,4,FV ), 0 , 113, 0 , 443, 152), // #1653 - INST(Vshufpd , VexRvmi_Lx , V(660F00,C6,_,x,I,1,4,FV ), 0 , 104, 0 , 444, 145), // #1654 - INST(Vshufps , VexRvmi_Lx , V(000F00,C6,_,x,I,0,4,FV ), 0 , 106, 0 , 445, 145), // #1655 - INST(Vsm3msg1 , VexRvm , V(000F38,DA,_,0,0,_,_,_ ), 0 , 11 , 0 , 446, 197), // #1656 - INST(Vsm3msg2 , VexRvm , V(660F38,DA,_,0,0,_,_,_ ), 0 , 30 , 0 , 446, 197), // #1657 + INST(Vscatterqpd , VexMr_VM , E(660F38,A3,_,x,_,1,3,T1S), 0 , 129, 0 , 412, 152), // #1645 + INST(Vscatterqps , VexMr_VM , E(660F38,A3,_,x,_,0,2,T1S), 0 , 130, 0 , 411, 152), // #1646 + INST(Vsha512msg1 , VexRm , V(F20F38,CC,_,1,0,_,_,_ ), 0 , 236, 0 , 438, 196), // #1647 + INST(Vsha512msg2 , VexRm , V(F20F38,CD,_,1,0,_,_,_ ), 0 , 236, 0 , 439, 196), // #1648 + INST(Vsha512rnds2 , VexRvm , V(F20F38,CB,_,1,0,_,_,_ ), 0 , 236, 0 , 440, 196), // #1649 + INST(Vshuff32x4 , VexRvmi_Lx , E(660F3A,23,_,x,_,0,4,FV ), 0 , 112, 0 , 441, 152), // #1650 + INST(Vshuff64x2 , VexRvmi_Lx , E(660F3A,23,_,x,_,1,4,FV ), 0 , 113, 0 , 442, 152), // #1651 + INST(Vshufi32x4 , VexRvmi_Lx , E(660F3A,43,_,x,_,0,4,FV ), 0 , 112, 0 , 441, 152), // #1652 + INST(Vshufi64x2 , VexRvmi_Lx , E(660F3A,43,_,x,_,1,4,FV ), 0 , 113, 0 , 442, 152), // #1653 + INST(Vshufpd , VexRvmi_Lx , V(660F00,C6,_,x,I,1,4,FV ), 0 , 104, 0 , 443, 145), // #1654 + INST(Vshufps , VexRvmi_Lx , V(000F00,C6,_,x,I,0,4,FV ), 0 , 106, 0 , 444, 145), // #1655 + INST(Vsm3msg1 , VexRvm , V(000F38,DA,_,0,0,_,_,_ ), 0 , 11 , 0 , 445, 197), // #1656 + INST(Vsm3msg2 , VexRvm , V(660F38,DA,_,0,0,_,_,_ ), 0 , 30 , 0 , 445, 197), // #1657 INST(Vsm3rnds2 , VexRvmi , V(660F3A,DE,_,0,0,_,_,_ ), 0 , 75 , 0 , 283, 197), // #1658 INST(Vsm4key4 , VexRvm_Lx , V(F30F38,DA,_,x,0,_,_,_ ), 0 , 89 , 0 , 206, 198), // #1659 INST(Vsm4rnds4 , VexRvm_Lx , V(F20F38,DA,_,x,0,_,_,_ ), 0 , 85 , 0 , 206, 198), // #1660 - INST(Vsqrtpd , VexRm_Lx , V(660F00,51,_,x,I,1,4,FV ), 0 , 104, 0 , 447, 145), // #1661 + INST(Vsqrtpd , VexRm_Lx , V(660F00,51,_,x,I,1,4,FV ), 0 , 104, 0 , 446, 145), // #1661 INST(Vsqrtph , VexRm_Lx , E(00MAP5,51,_,_,_,0,4,FV ), 0 , 105, 0 , 252, 146), // #1662 INST(Vsqrtps , VexRm_Lx , V(000F00,51,_,x,I,0,4,FV ), 0 , 106, 0 , 240, 145), // #1663 INST(Vsqrtsd , VexRvm , V(F20F00,51,_,I,I,1,3,T1S), 0 , 107, 0 , 203, 147), // #1664 @@ -1729,23 +1729,23 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Vunpckhps , VexRvm_Lx , V(000F00,15,_,x,I,0,4,FV ), 0 , 106, 0 , 213, 145), // #1680 INST(Vunpcklpd , VexRvm_Lx , V(660F00,14,_,x,I,1,4,FV ), 0 , 104, 0 , 212, 145), // #1681 INST(Vunpcklps , VexRvm_Lx , V(000F00,14,_,x,I,0,4,FV ), 0 , 106, 0 , 213, 145), // #1682 - INST(Vxorpd , VexRvm_Lx , V(660F00,57,_,x,I,1,4,FV ), 0 , 104, 0 , 425, 153), // #1683 - INST(Vxorps , VexRvm_Lx , V(000F00,57,_,x,I,0,4,FV ), 0 , 106, 0 , 424, 153), // #1684 - INST(Vzeroall , VexOp , V(000F00,77,_,1,I,_,_,_ ), 0 , 70 , 0 , 448, 149), // #1685 - INST(Vzeroupper , VexOp , V(000F00,77,_,0,I,_,_,_ ), 0 , 74 , 0 , 448, 149), // #1686 + INST(Vxorpd , VexRvm_Lx , V(660F00,57,_,x,I,1,4,FV ), 0 , 104, 0 , 424, 153), // #1683 + INST(Vxorps , VexRvm_Lx , V(000F00,57,_,x,I,0,4,FV ), 0 , 106, 0 , 423, 153), // #1684 + INST(Vzeroall , VexOp , V(000F00,77,_,1,I,_,_,_ ), 0 , 70 , 0 , 447, 149), // #1685 + INST(Vzeroupper , VexOp , V(000F00,77,_,0,I,_,_,_ ), 0 , 74 , 0 , 447, 149), // #1686 INST(Wbinvd , X86Op , O(000F00,09,_,_,_,_,_,_ ), 0 , 5 , 0 , 31 , 45 ), // #1687 INST(Wbnoinvd , X86Op , O(F30F00,09,_,_,_,_,_,_ ), 0 , 7 , 0 , 31 , 199), // #1688 INST(Wrfsbase , X86M , O(F30F00,AE,2,_,x,_,_,_ ), 0 , 238, 0 , 177, 122), // #1689 INST(Wrgsbase , X86M , O(F30F00,AE,3,_,x,_,_,_ ), 0 , 239, 0 , 177, 122), // #1690 INST(Wrmsr , X86Op , O(000F00,30,_,_,_,_,_,_ ), 0 , 5 , 0 , 178, 123), // #1691 - INST(Wrssd , X86Mr , O(000F38,F6,_,_,_,_,_,_ ), 0 , 1 , 0 , 449, 65 ), // #1692 - INST(Wrssq , X86Mr , O(000F38,F6,_,_,1,_,_,_ ), 0 , 240, 0 , 450, 65 ), // #1693 - INST(Wrussd , X86Mr , O(660F38,F5,_,_,_,_,_,_ ), 0 , 2 , 0 , 449, 65 ), // #1694 - INST(Wrussq , X86Mr , O(660F38,F5,_,_,1,_,_,_ ), 0 , 241, 0 , 450, 65 ), // #1695 + INST(Wrssd , X86Mr , O(000F38,F6,_,_,_,_,_,_ ), 0 , 1 , 0 , 448, 65 ), // #1692 + INST(Wrssq , X86Mr , O(000F38,F6,_,_,1,_,_,_ ), 0 , 240, 0 , 449, 65 ), // #1693 + INST(Wrussd , X86Mr , O(660F38,F5,_,_,_,_,_,_ ), 0 , 2 , 0 , 448, 65 ), // #1694 + INST(Wrussq , X86Mr , O(660F38,F5,_,_,1,_,_,_ ), 0 , 241, 0 , 449, 65 ), // #1695 INST(Xabort , X86Op_Mod11RM_I8 , O(000000,C6,7,_,_,_,_,_ ), 0 , 29 , 0 , 84 , 200), // #1696 - INST(Xadd , X86Xadd , O(000F00,C0,_,_,x,_,_,_ ), 0 , 5 , 0 , 451, 40 ), // #1697 - INST(Xbegin , X86JmpRel , O(000000,C7,7,_,_,_,_,_ ), 0 , 29 , 0 , 452, 200), // #1698 - INST(Xchg , X86Xchg , O(000000,86,_,_,x,_,_,_ ), 0 , 0 , 0 , 453, 0 ), // #1699 + INST(Xadd , X86Xadd , O(000F00,C0,_,_,x,_,_,_ ), 0 , 5 , 0 , 450, 40 ), // #1697 + INST(Xbegin , X86JmpRel , O(000000,C7,7,_,_,_,_,_ ), 0 , 29 , 0 , 451, 200), // #1698 + INST(Xchg , X86Xchg , O(000000,86,_,_,x,_,_,_ ), 0 , 0 , 0 , 452, 0 ), // #1699 INST(Xend , X86Op , O(000F01,D5,_,_,_,_,_,_ ), 0 , 23 , 0 , 31 , 200), // #1700 INST(Xgetbv , X86Op , O(000F01,D0,_,_,_,_,_,_ ), 0 , 23 , 0 , 178, 201), // #1701 INST(Xlatb , X86Op , O(000000,D7,_,_,_,_,_,_ ), 0 , 0 , 0 , 31 , 0 ), // #1702 @@ -1753,18 +1753,18 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Xorpd , ExtRm , O(660F00,57,_,_,_,_,_,_ ), 0 , 4 , 0 , 155, 5 ), // #1704 INST(Xorps , ExtRm , O(000F00,57,_,_,_,_,_,_ ), 0 , 5 , 0 , 155, 6 ), // #1705 INST(Xresldtrk , X86Op , O(F20F01,E9,_,_,_,_,_,_ ), 0 , 93 , 0 , 31 , 202), // #1706 - INST(Xrstor , X86M_Only_EDX_EAX , O(000F00,AE,5,_,_,_,_,_ ), 0 , 79 , 0 , 454, 201), // #1707 - INST(Xrstor64 , X86M_Only_EDX_EAX , O(000F00,AE,5,_,1,_,_,_ ), 0 , 242, 0 , 455, 201), // #1708 - INST(Xrstors , X86M_Only_EDX_EAX , O(000F00,C7,3,_,_,_,_,_ ), 0 , 80 , 0 , 454, 203), // #1709 - INST(Xrstors64 , X86M_Only_EDX_EAX , O(000F00,C7,3,_,1,_,_,_ ), 0 , 243, 0 , 455, 203), // #1710 - INST(Xsave , X86M_Only_EDX_EAX , O(000F00,AE,4,_,_,_,_,_ ), 0 , 98 , 0 , 454, 201), // #1711 - INST(Xsave64 , X86M_Only_EDX_EAX , O(000F00,AE,4,_,1,_,_,_ ), 0 , 244, 0 , 455, 201), // #1712 - INST(Xsavec , X86M_Only_EDX_EAX , O(000F00,C7,4,_,_,_,_,_ ), 0 , 98 , 0 , 454, 204), // #1713 - INST(Xsavec64 , X86M_Only_EDX_EAX , O(000F00,C7,4,_,1,_,_,_ ), 0 , 244, 0 , 455, 204), // #1714 - INST(Xsaveopt , X86M_Only_EDX_EAX , O(000F00,AE,6,_,_,_,_,_ ), 0 , 82 , 0 , 454, 205), // #1715 - INST(Xsaveopt64 , X86M_Only_EDX_EAX , O(000F00,AE,6,_,1,_,_,_ ), 0 , 245, 0 , 455, 205), // #1716 - INST(Xsaves , X86M_Only_EDX_EAX , O(000F00,C7,5,_,_,_,_,_ ), 0 , 79 , 0 , 454, 203), // #1717 - INST(Xsaves64 , X86M_Only_EDX_EAX , O(000F00,C7,5,_,1,_,_,_ ), 0 , 242, 0 , 455, 203), // #1718 + INST(Xrstor , X86M_Only_EDX_EAX , O(000F00,AE,5,_,_,_,_,_ ), 0 , 79 , 0 , 453, 201), // #1707 + INST(Xrstor64 , X86M_Only_EDX_EAX , O(000F00,AE,5,_,1,_,_,_ ), 0 , 242, 0 , 454, 201), // #1708 + INST(Xrstors , X86M_Only_EDX_EAX , O(000F00,C7,3,_,_,_,_,_ ), 0 , 80 , 0 , 453, 203), // #1709 + INST(Xrstors64 , X86M_Only_EDX_EAX , O(000F00,C7,3,_,1,_,_,_ ), 0 , 243, 0 , 454, 203), // #1710 + INST(Xsave , X86M_Only_EDX_EAX , O(000F00,AE,4,_,_,_,_,_ ), 0 , 98 , 0 , 453, 201), // #1711 + INST(Xsave64 , X86M_Only_EDX_EAX , O(000F00,AE,4,_,1,_,_,_ ), 0 , 244, 0 , 454, 201), // #1712 + INST(Xsavec , X86M_Only_EDX_EAX , O(000F00,C7,4,_,_,_,_,_ ), 0 , 98 , 0 , 453, 204), // #1713 + INST(Xsavec64 , X86M_Only_EDX_EAX , O(000F00,C7,4,_,1,_,_,_ ), 0 , 244, 0 , 454, 204), // #1714 + INST(Xsaveopt , X86M_Only_EDX_EAX , O(000F00,AE,6,_,_,_,_,_ ), 0 , 82 , 0 , 453, 205), // #1715 + INST(Xsaveopt64 , X86M_Only_EDX_EAX , O(000F00,AE,6,_,1,_,_,_ ), 0 , 245, 0 , 454, 205), // #1716 + INST(Xsaves , X86M_Only_EDX_EAX , O(000F00,C7,5,_,_,_,_,_ ), 0 , 79 , 0 , 453, 203), // #1717 + INST(Xsaves64 , X86M_Only_EDX_EAX , O(000F00,C7,5,_,1,_,_,_ ), 0 , 242, 0 , 454, 203), // #1718 INST(Xsetbv , X86Op , O(000F01,D1,_,_,_,_,_,_ ), 0 , 23 , 0 , 178, 201), // #1719 INST(Xsusldtrk , X86Op , O(F20F01,E8,_,_,_,_,_,_ ), 0 , 93 , 0 , 31 , 202), // #1720 INST(Xtest , X86Op , O(000F01,D6,_,_,_,_,_,_ ), 0 , 23 , 0 , 31 , 206) // #1721 @@ -2187,8 +2187,8 @@ const uint32_t InstDB::_altOpcodeTable[] = { #define SAME_REG_HINT(VAL) uint8_t(InstSameRegHint::k##VAL) const InstDB::CommonInfo InstDB::_commonInfoTable[] = { { 0 , 0 , 0 , 0 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #0 [ref=1x] - { 0 , 0 , 457, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #1 [ref=4x] - { 0 , 0 , 458, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #2 [ref=2x] + { 0 , 0 , 455, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #1 [ref=4x] + { 0 , 0 , 456, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #2 [ref=2x] { 0 , 0 , 108, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #3 [ref=6x] { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 20 , 13, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #4 [ref=2x] { 0 , 0 , 50 , 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #5 [ref=2x] @@ -2199,199 +2199,199 @@ const InstDB::CommonInfo InstDB::_commonInfoTable[] = { { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 33 , 12, CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #10 [ref=1x] { F(Vex) , 0 , 325, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #11 [ref=3x] { F(Vec) , 0 , 72 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #12 [ref=12x] - { 0 , 0 , 459, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #13 [ref=1x] + { 0 , 0 , 457, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #13 [ref=1x] { F(Vex) , 0 , 327, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #14 [ref=5x] { F(Vex) , 0 , 50 , 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #15 [ref=12x] - { F(Vec) , 0 , 460, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #16 [ref=4x] + { F(Vec) , 0 , 458, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #16 [ref=4x] { 0 , 0 , 329, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #17 [ref=3x] - { F(Mib) , 0 , 461, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #18 [ref=1x] - { 0 , 0 , 462, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #19 [ref=1x] + { F(Mib) , 0 , 459, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #18 [ref=1x] + { 0 , 0 , 460, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #19 [ref=1x] { 0 , 0 , 331, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #20 [ref=1x] - { F(Mib) , 0 , 463, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #21 [ref=1x] + { F(Mib) , 0 , 461, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #21 [ref=1x] { 0 , 0 , 333, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #22 [ref=1x] { 0 , 0 , 49 , 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #23 [ref=35x] { 0 , 0 , 335, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #24 [ref=3x] { 0 , 0 , 134, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #25 [ref=1x] { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 134, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #26 [ref=3x] { F(Rep)|F(RepIgnored) , 0 , 235, 3 , CONTROL_FLOW(Call), SAME_REG_HINT(None)}, // #27 [ref=1x] - { 0 , 0 , 464, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #28 [ref=1x] - { 0 , 0 , 465, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #29 [ref=2x] + { 0 , 0 , 462, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #28 [ref=1x] + { 0 , 0 , 463, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #29 [ref=2x] { 0 , 0 , 436, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #30 [ref=1x] - { 0 , 0 , 110, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #31 [ref=87x] - { 0 , 0 , 466, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #32 [ref=24x] - { 0 , 0 , 467, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #33 [ref=6x] - { 0 , 0 , 468, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #34 [ref=14x] - { 0 , 0 , 469, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #35 [ref=1x] + { 0 , 0 , 110, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #31 [ref=88x] + { 0 , 0 , 464, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #32 [ref=24x] + { 0 , 0 , 465, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #33 [ref=6x] + { 0 , 0 , 466, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #34 [ref=14x] + { 0 , 0 , 467, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #35 [ref=1x] { 0 , 0 , 20 , 13, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #36 [ref=1x] { F(Vex) , 0 , 337, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #37 [ref=16x] { F(Rep) , 0 , 179, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #38 [ref=1x] - { F(Vec) , 0 , 470, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #39 [ref=2x] - { F(Vec) , 0 , 471, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #40 [ref=3x] + { F(Vec) , 0 , 468, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #39 [ref=2x] + { F(Vec) , 0 , 469, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #40 [ref=3x] { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 183, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #41 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 472, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #42 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 473, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #43 [ref=1x] - { 0 , 0 , 474, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #44 [ref=1x] - { 0 , 0 , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #45 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 470, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #42 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 471, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #43 [ref=1x] + { 0 , 0 , 472, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #44 [ref=1x] + { 0 , 0 , 473, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #45 [ref=1x] { 0 , 0 , 339, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #46 [ref=1x] - { F(Mmx)|F(Vec) , 0 , 476, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #47 [ref=2x] - { F(Mmx)|F(Vec) , 0 , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #48 [ref=2x] - { F(Mmx)|F(Vec) , 0 , 478, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #49 [ref=2x] + { F(Mmx)|F(Vec) , 0 , 474, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #47 [ref=2x] + { F(Mmx)|F(Vec) , 0 , 475, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #48 [ref=2x] + { F(Mmx)|F(Vec) , 0 , 476, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #49 [ref=2x] { F(Vec) , 0 , 341, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #50 [ref=2x] { F(Vec) , 0 , 343, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #51 [ref=1x] { F(Vec) , 0 , 345, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #52 [ref=1x] { F(Vec) , 0 , 347, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #53 [ref=1x] { F(Vec) , 0 , 349, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #54 [ref=1x] - { 0 , 0 , 479, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #55 [ref=1x] - { 0 , 0 , 480, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #56 [ref=3x] + { 0 , 0 , 477, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #55 [ref=1x] + { 0 , 0 , 478, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #56 [ref=3x] { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 238, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #57 [ref=1x] { 0 , 0 , 45 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #58 [ref=3x] { F(Mmx) , 0 , 110, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #59 [ref=1x] { 0 , 0 , 351, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #60 [ref=2x] - { 0 , 0 , 481, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #61 [ref=1x] - { F(Vec) , 0 , 482, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #62 [ref=2x] + { 0 , 0 , 479, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #61 [ref=1x] + { F(Vec) , 0 , 480, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #62 [ref=2x] { F(Vec) , 0 , 353, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #63 [ref=1x] { F(FpuM32)|F(FpuM64) , 0 , 241, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #64 [ref=6x] { 0 , 0 , 355, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #65 [ref=9x] - { F(FpuM80) , 0 , 483, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #66 [ref=2x] + { F(FpuM80) , 0 , 481, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #66 [ref=2x] { 0 , 0 , 356, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #67 [ref=13x] { F(FpuM32)|F(FpuM64) , 0 , 357, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #68 [ref=2x] - { F(FpuM16)|F(FpuM32) , 0 , 484, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #69 [ref=9x] - { F(FpuM16)|F(FpuM32)|F(FpuM64) , 0 , 485, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #70 [ref=3x] - { F(FpuM32)|F(FpuM64)|F(FpuM80) , 0 , 486, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #71 [ref=2x] - { F(FpuM16) , 0 , 487, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #72 [ref=3x] - { F(FpuM16) , 0 , 488, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #73 [ref=2x] + { F(FpuM16)|F(FpuM32) , 0 , 482, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #69 [ref=9x] + { F(FpuM16)|F(FpuM32)|F(FpuM64) , 0 , 483, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #70 [ref=3x] + { F(FpuM32)|F(FpuM64)|F(FpuM80) , 0 , 484, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #71 [ref=2x] + { F(FpuM16) , 0 , 485, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #72 [ref=3x] + { F(FpuM16) , 0 , 486, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #73 [ref=2x] { F(FpuM32)|F(FpuM64) , 0 , 358, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #74 [ref=1x] - { 0 , 0 , 489, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #75 [ref=4x] - { 0 , 0 , 490, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #76 [ref=1x] + { 0 , 0 , 487, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #75 [ref=4x] + { 0 , 0 , 488, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #76 [ref=1x] { 0 , 0 , 45 , 10, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #77 [ref=1x] - { 0 , 0 , 491, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #78 [ref=1x] + { 0 , 0 , 489, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #78 [ref=1x] { F(Lock) , 0 , 238, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #79 [ref=1x] { 0 , 0 , 379, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #80 [ref=2x] { 0 , 0 , 336, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #81 [ref=3x] - { F(Rep) , 0 , 492, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #82 [ref=1x] + { F(Rep) , 0 , 490, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #82 [ref=1x] { F(Vec) , 0 , 359, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #83 [ref=1x] - { 0 , 0 , 493, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #84 [ref=2x] - { 0 , 0 , 494, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #85 [ref=8x] + { 0 , 0 , 491, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #84 [ref=2x] + { 0 , 0 , 492, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #85 [ref=8x] { 0 , 0 , 361, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #86 [ref=3x] { 0 , 0 , 363, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #87 [ref=1x] { 0 , 0 , 365, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #88 [ref=1x] { 0 , 0 , 110, 1 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #89 [ref=2x] - { 0 , 0 , 468, 1 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #90 [ref=1x] + { 0 , 0 , 466, 1 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #90 [ref=1x] { F(Rep) , 0 , 244, 1 , CONTROL_FLOW(Branch), SAME_REG_HINT(None)}, // #91 [ref=30x] { F(Rep) , 0 , 367, 2 , CONTROL_FLOW(Branch), SAME_REG_HINT(None)}, // #92 [ref=1x] { F(Rep) , 0 , 244, 3 , CONTROL_FLOW(Jump), SAME_REG_HINT(None)}, // #93 [ref=1x] - { F(Vex) , 0 , 495, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #94 [ref=19x] + { F(Vex) , 0 , 493, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #94 [ref=19x] { F(Vex) , 0 , 369, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #95 [ref=1x] { F(Vex) , 0 , 371, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #96 [ref=1x] { F(Vex) , 0 , 187, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #97 [ref=1x] { F(Vex) , 0 , 373, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #98 [ref=1x] - { F(Vex) , 0 , 496, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #99 [ref=12x] - { F(Vex) , 0 , 497, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #100 [ref=8x] - { F(Vex) , 0 , 495, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #101 [ref=8x] - { 0 , 0 , 498, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #102 [ref=2x] + { F(Vex) , 0 , 494, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #99 [ref=12x] + { F(Vex) , 0 , 495, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #100 [ref=8x] + { F(Vex) , 0 , 493, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #101 [ref=8x] + { 0 , 0 , 496, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #102 [ref=2x] { 0 , 0 , 253, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #103 [ref=1x] { 0 , 0 , 247, 3 , CONTROL_FLOW(Call), SAME_REG_HINT(None)}, // #104 [ref=1x] { F(Vec) , 0 , 169, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #105 [ref=2x] - { 0 , 0 , 499, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #106 [ref=2x] + { 0 , 0 , 497, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #106 [ref=2x] { 0 , 0 , 375, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #107 [ref=2x] - { F(Vex) , 0 , 500, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #108 [ref=2x] + { F(Vex) , 0 , 498, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #108 [ref=2x] { 0 , 0 , 377, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #109 [ref=1x] { 0 , 0 , 250, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #110 [ref=3x] { 0 , 0 , 247, 3 , CONTROL_FLOW(Jump), SAME_REG_HINT(None)}, // #111 [ref=1x] - { 0 , 0 , 501, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #112 [ref=5x] + { 0 , 0 , 499, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #112 [ref=5x] { F(Vex) , 0 , 379, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #113 [ref=2x] { F(Rep) , 0 , 191, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #114 [ref=1x] { 0 , 0 , 367, 2 , CONTROL_FLOW(Branch), SAME_REG_HINT(None)}, // #115 [ref=3x] { 0 , 0 , 253, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #116 [ref=1x] { F(Vex) , 0 , 381, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #117 [ref=2x] - { F(Vec) , 0 , 502, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #118 [ref=1x] - { F(Mmx) , 0 , 503, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #119 [ref=1x] - { 0 , 0 , 504, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #120 [ref=2x] + { F(Vec) , 0 , 500, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #118 [ref=1x] + { F(Mmx) , 0 , 501, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #119 [ref=1x] + { 0 , 0 , 502, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #120 [ref=2x] { F(XRelease) , 0 , 0 , 20, CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #121 [ref=1x] { 0 , 0 , 55 , 9 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #122 [ref=1x] { F(Vec) , 0 , 72 , 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #123 [ref=6x] { 0 , 0 , 104, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #124 [ref=1x] { F(Mmx)|F(Vec) , 0 , 383, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #125 [ref=1x] { 0 , 0 , 385, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #126 [ref=1x] - { F(Mmx)|F(Vec) , 0 , 505, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #127 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 503, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #127 [ref=1x] { F(Vec) , 0 , 354, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #128 [ref=2x] { F(Vec) , 0 , 80 , 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #129 [ref=4x] - { F(Vec) , 0 , 506, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #130 [ref=2x] + { F(Vec) , 0 , 504, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #130 [ref=2x] { F(Vec) , 0 , 73 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #131 [ref=3x] - { F(Mmx) , 0 , 507, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #132 [ref=1x] + { F(Mmx) , 0 , 505, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #132 [ref=1x] { F(Vec) , 0 , 80 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #133 [ref=1x] { F(Vec) , 0 , 88 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #134 [ref=1x] { F(Mmx)|F(Vec) , 0 , 139, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #135 [ref=1x] - { F(Mmx)|F(Vec) , 0 , 508, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #136 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 506, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #136 [ref=1x] { F(Rep) , 0 , 195, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #137 [ref=1x] { F(Vec) , 0 , 387, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #138 [ref=1x] { F(Vec) , 0 , 389, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #139 [ref=1x] { 0 , 0 , 256, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #140 [ref=2x] { 0 , 0 , 391, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #141 [ref=1x] { F(Vex) , 0 , 393, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #142 [ref=1x] - { 0 , 0 , 509, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #143 [ref=1x] - { 0 , 0 , 510, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #144 [ref=1x] + { 0 , 0 , 507, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #143 [ref=1x] + { 0 , 0 , 508, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #144 [ref=1x] { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 239, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #145 [ref=2x] { 0 , 0 , 110, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #146 [ref=1x] { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 20 , 13, CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #147 [ref=1x] - { 0 , 0 , 511, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #148 [ref=1x] - { F(Rep) , 0 , 512, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #149 [ref=1x] + { 0 , 0 , 509, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #148 [ref=1x] + { F(Rep) , 0 , 510, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #149 [ref=1x] { F(Mmx)|F(Vec) , 0 , 395, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #150 [ref=37x] { F(Mmx)|F(Vec) , 0 , 397, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #151 [ref=1x] { F(Mmx)|F(Vec) , 0 , 395, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #152 [ref=6x] { F(Mmx)|F(Vec) , 0 , 395, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #153 [ref=16x] { F(Mmx) , 0 , 139, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #154 [ref=26x] { F(Vec) , 0 , 72 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #155 [ref=4x] - { F(Vec) , 0 , 513, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #156 [ref=1x] - { F(Vec) , 0 , 514, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #157 [ref=1x] - { F(Vec) , 0 , 515, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #158 [ref=1x] - { F(Vec) , 0 , 516, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #159 [ref=1x] - { F(Vec) , 0 , 517, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #160 [ref=1x] - { F(Vec) , 0 , 518, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #161 [ref=1x] + { F(Vec) , 0 , 511, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #156 [ref=1x] + { F(Vec) , 0 , 512, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #157 [ref=1x] + { F(Vec) , 0 , 513, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #158 [ref=1x] + { F(Vec) , 0 , 514, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #159 [ref=1x] + { F(Vec) , 0 , 515, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #160 [ref=1x] + { F(Vec) , 0 , 516, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #161 [ref=1x] { F(Mmx)|F(Vec) , 0 , 399, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #162 [ref=1x] - { F(Vec) , 0 , 519, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #163 [ref=1x] - { F(Vec) , 0 , 520, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #164 [ref=1x] - { F(Vec) , 0 , 521, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #165 [ref=1x] - { F(Mmx)|F(Vec) , 0 , 522, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #166 [ref=1x] - { F(Mmx)|F(Vec) , 0 , 523, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #167 [ref=1x] + { F(Vec) , 0 , 517, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #163 [ref=1x] + { F(Vec) , 0 , 518, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #164 [ref=1x] + { F(Vec) , 0 , 519, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #165 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 520, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #166 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 521, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #167 [ref=1x] { F(Vec) , 0 , 313, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #168 [ref=2x] { 0 , 0 , 144, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #169 [ref=1x] { F(Mmx) , 0 , 397, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #170 [ref=1x] { F(Mmx)|F(Vec) , 0 , 401, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #171 [ref=8x] - { F(Vec) , 0 , 524, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #172 [ref=2x] + { F(Vec) , 0 , 522, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #172 [ref=2x] { 0 , 0 , 403, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #173 [ref=1x] { F(Mmx)|F(Vec) , 0 , 405, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #174 [ref=3x] { 0 , 0 , 149, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #175 [ref=1x] { 0 , 0 , 407, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #176 [ref=8x] - { 0 , 0 , 525, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #177 [ref=4x] - { 0 , 0 , 526, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #178 [ref=8x] + { 0 , 0 , 523, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #177 [ref=4x] + { 0 , 0 , 524, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #178 [ref=8x] { 0 , 0 , 409, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #179 [ref=1x] { F(Rep)|F(RepIgnored) , 0 , 411, 2 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #180 [ref=1x] { 0 , 0 , 411, 2 , CONTROL_FLOW(Return), SAME_REG_HINT(None)}, // #181 [ref=1x] { F(Vex) , 0 , 413, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #182 [ref=1x] { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 20 , 13, CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #183 [ref=3x] { F(Rep) , 0 , 199, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #184 [ref=1x] - { 0 , 0 , 527, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #185 [ref=30x] + { 0 , 0 , 525, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #185 [ref=30x] { 0 , 0 , 259, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #186 [ref=2x] { 0 , 0 , 415, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #187 [ref=3x] { F(Rep) , 0 , 203, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #188 [ref=1x] - { F(Vex) , 0 , 528, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #189 [ref=8x] + { F(Vex) , 0 , 526, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #189 [ref=8x] { 0 , 0 , 64 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #190 [ref=1x] - { F(Tsib)|F(Vex) , 0 , 529, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #191 [ref=2x] - { F(Vex) , 0 , 468, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #192 [ref=1x] - { F(Tsib)|F(Vex) , 0 , 530, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #193 [ref=1x] - { F(Vex) , 0 , 531, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #194 [ref=1x] - { 0 , 0 , 532, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #195 [ref=2x] + { F(Tsib)|F(Vex) , 0 , 527, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #191 [ref=2x] + { F(Vex) , 0 , 466, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #192 [ref=1x] + { F(Tsib)|F(Vex) , 0 , 528, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #193 [ref=1x] + { F(Vex) , 0 , 529, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #194 [ref=1x] + { 0 , 0 , 530, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #195 [ref=2x] { 0 , 0 , 50 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #196 [ref=2x] { 0 , 0 , 417, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #197 [ref=1x] - { F(Evex)|F(Vec) , X(K)|X(T4X)|X(Z) , 533, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #198 [ref=4x] - { F(Evex)|F(Vec) , X(K)|X(T4X)|X(Z) , 534, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #199 [ref=2x] + { F(Evex)|F(Vec) , X(K)|X(T4X)|X(Z) , 531, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #198 [ref=4x] + { F(Evex)|F(Vec) , X(K)|X(T4X)|X(Z) , 532, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #199 [ref=2x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #200 [ref=22x] { F(Evex)|F(Vec) , X(B16)|X(ER)|X(K)|X(SAE)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #201 [ref=23x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #202 [ref=22x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(ER)|X(K)|X(SAE)|X(Z) , 535, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #203 [ref=18x] - { F(Evex)|F(Vec) , X(ER)|X(K)|X(SAE)|X(Z) , 536, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #204 [ref=18x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(ER)|X(K)|X(SAE)|X(Z) , 537, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #205 [ref=17x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(ER)|X(K)|X(SAE)|X(Z) , 533, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #203 [ref=18x] + { F(Evex)|F(Vec) , X(ER)|X(K)|X(SAE)|X(Z) , 534, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #204 [ref=18x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(ER)|X(K)|X(SAE)|X(Z) , 535, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #205 [ref=17x] { F(Vec)|F(Vex) , 0 , 262, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #206 [ref=29x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #207 [ref=5x] { F(Vec)|F(Vex) , 0 , 72 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #208 [ref=17x] @@ -2402,24 +2402,24 @@ const InstDB::CommonInfo InstDB::_commonInfoTable[] = { { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #213 [ref=12x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #214 [ref=2x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #215 [ref=6x] - { F(Vec)|F(Vex) , 0 , 538, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #216 [ref=2x] + { F(Vec)|F(Vex) , 0 , 536, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #216 [ref=2x] { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #217 [ref=17x] { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #218 [ref=12x] { F(Vec)|F(Vex) , 0 , 265, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #219 [ref=6x] { F(Vec)|F(Vex) , 0 , 419, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #220 [ref=3x] - { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 539, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #221 [ref=2x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 540, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #222 [ref=1x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 541, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #223 [ref=4x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 542, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #224 [ref=4x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 447, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #225 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 540, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #226 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 543, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #227 [ref=1x] - { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(B64)|X(K)|X(SAE) , 268, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #228 [ref=1x] - { F(Evex)|F(Vec) , X(B16)|X(K)|X(SAE) , 271, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #229 [ref=1x] - { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(B32)|X(K)|X(SAE) , 268, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #230 [ref=1x] - { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(K)|X(SAE) , 544, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #231 [ref=1x] - { F(Evex)|F(Vec) , X(K)|X(SAE) , 545, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #232 [ref=1x] - { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(K)|X(SAE) , 546, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #233 [ref=1x] + { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 537, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #221 [ref=2x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 538, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #222 [ref=1x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 539, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #223 [ref=4x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 540, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #224 [ref=4x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 445, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #225 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 538, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #226 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 541, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #227 [ref=1x] + { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(B64)|X(ImplicitZ)|X(K)|X(SAE), 268, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #228 [ref=1x] + { F(Evex)|F(Vec) , X(B16)|X(ImplicitZ)|X(K)|X(SAE), 271, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #229 [ref=1x] + { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(B32)|X(ImplicitZ)|X(K)|X(SAE), 268, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #230 [ref=1x] + { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(ImplicitZ)|X(K)|X(SAE) , 542, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #231 [ref=1x] + { F(Evex)|F(Vec) , X(ImplicitZ)|X(K)|X(SAE) , 543, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #232 [ref=1x] + { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(ImplicitZ)|X(K)|X(SAE) , 544, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #233 [ref=1x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(SAE) , 143, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #234 [ref=2x] { F(Evex)|F(Vec) , X(SAE) , 313, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #235 [ref=2x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(SAE) , 283, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #236 [ref=2x] @@ -2430,7 +2430,7 @@ const InstDB::CommonInfo InstDB::_commonInfoTable[] = { { F(Vec)|F(Vex) , 0 , 169, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #241 [ref=5x] { F(Evex)|F(EvexCompat)|F(PreferEvex)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 421, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #242 [ref=1x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 421, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #243 [ref=2x] - { F(Evex)|F(Vec) , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 547, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #244 [ref=3x] + { F(Evex)|F(Vec) , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 545, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #244 [ref=3x] { F(Evex)|F(Vec) , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #245 [ref=4x] { F(Evex)|F(Vec) , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 421, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #246 [ref=3x] { F(Evex)|F(Vec) , X(B16)|X(ER)|X(K)|X(SAE)|X(Z) , 277, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #247 [ref=2x] @@ -2443,15 +2443,15 @@ const InstDB::CommonInfo InstDB::_commonInfoTable[] = { { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(SAE)|X(Z) , 286, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #254 [ref=1x] { F(Evex)|F(Vec) , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 277, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #255 [ref=2x] { F(Evex)|F(Vec) , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #256 [ref=2x] - { F(Evex)|F(Vec) , X(ER)|X(K)|X(SAE)|X(Z) , 535, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #257 [ref=2x] + { F(Evex)|F(Vec) , X(ER)|X(K)|X(SAE)|X(Z) , 533, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #257 [ref=2x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(ER)|X(SAE) , 341, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #258 [ref=1x] { F(Evex)|F(Vec) , X(ER)|X(SAE) , 341, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #259 [ref=1x] - { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 536, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #260 [ref=5x] + { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 534, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #260 [ref=5x] { F(Evex)|F(Vec) , X(ER)|X(SAE) , 423, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #261 [ref=2x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(ER)|X(SAE) , 425, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #262 [ref=2x] { F(Evex)|F(Vec) , X(ER)|X(SAE) , 427, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #263 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(SAE)|X(Z) , 537, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #264 [ref=3x] - { F(Evex)|F(Vec) , X(ER)|X(K)|X(SAE)|X(Z) , 537, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #265 [ref=6x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(SAE)|X(Z) , 535, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #264 [ref=3x] + { F(Evex)|F(Vec) , X(ER)|X(K)|X(SAE)|X(Z) , 535, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #265 [ref=6x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(ER)|X(SAE) , 347, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #266 [ref=1x] { F(Evex)|F(Vec) , X(ER)|X(SAE) , 347, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #267 [ref=1x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(SAE)|X(Z) , 421, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #268 [ref=1x] @@ -2474,174 +2474,173 @@ const InstDB::CommonInfo InstDB::_commonInfoTable[] = { { F(Evex)|F(Vec) , X(B32)|X(K)|X(SAE)|X(Z) , 78 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #285 [ref=3x] { F(Evex)|F(Vec) , X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #286 [ref=8x] { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 287, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #287 [ref=2x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 548, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #288 [ref=4x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 546, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #288 [ref=4x] { F(Evex)|F(Vec) , X(K)|X(Z) , 288, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #289 [ref=4x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 482, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #290 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 480, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #290 [ref=2x] { F(Evex)|F(Vec) , X(B32)|X(ER)|X(K)|X(SAE)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #291 [ref=5x] { F(Evex)|F(Vec) , X(B64)|X(K)|X(SAE)|X(Z) , 265, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #292 [ref=2x] { F(Evex)|F(Vec) , X(B32)|X(K)|X(SAE)|X(Z) , 265, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #293 [ref=2x] - { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 549, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #294 [ref=4x] - { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 550, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #295 [ref=4x] + { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 547, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #294 [ref=4x] + { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 548, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #295 [ref=4x] { F(Vec)|F(Vex) , 0 , 207, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #296 [ref=13x] { F(Vec)|F(Vex) , 0 , 429, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #297 [ref=4x] { F(Vec)|F(Vex) , 0 , 431, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #298 [ref=4x] - { F(Evex)|F(Vec) , X(B64)|X(K) , 551, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #299 [ref=1x] - { F(Evex)|F(Vec) , X(B16)|X(K) , 551, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #300 [ref=1x] - { F(Evex)|F(Vec) , X(B32)|X(K) , 551, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #301 [ref=1x] - { F(Evex)|F(Vec) , X(K) , 552, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #302 [ref=1x] - { F(Evex)|F(Vec) , X(K) , 553, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #303 [ref=1x] - { F(Evex)|F(Vec) , X(K) , 554, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #304 [ref=1x] + { F(Evex)|F(Vec) , X(B64)|X(ImplicitZ)|X(K) , 549, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #299 [ref=1x] + { F(Evex)|F(Vec) , X(B16)|X(K) , 549, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #300 [ref=1x] + { F(Evex)|F(Vec) , X(B32)|X(ImplicitZ)|X(K) , 549, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #301 [ref=1x] + { F(Evex)|F(Vec) , X(ImplicitZ)|X(K) , 550, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #302 [ref=1x] + { F(Evex)|F(Vec) , X(K) , 551, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #303 [ref=1x] + { F(Evex)|F(Vec) , X(ImplicitZ)|X(K) , 552, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #304 [ref=1x] { F(Vec)|F(Vex) , 0 , 280, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #305 [ref=7x] { F(Vec)|F(Vex) , 0 , 143, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #306 [ref=1x] { F(Vec)|F(Vex) , 0 , 283, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #307 [ref=1x] { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib) , X(K) , 211, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #308 [ref=2x] { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib) , X(K) , 154, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #309 [ref=2x] - { F(Evex)|F(Vsib) , X(K) , 555, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #310 [ref=4x] - { F(Evex)|F(Vsib) , X(K) , 556, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #311 [ref=4x] - { F(Evex)|F(Vsib) , X(K) , 557, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #312 [ref=8x] + { F(Evex)|F(Vsib) , X(K) , 553, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #310 [ref=4x] + { F(Evex)|F(Vsib) , X(K) , 554, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #311 [ref=4x] + { F(Evex)|F(Vsib) , X(K) , 555, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #312 [ref=8x] { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib) , X(K) , 159, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #313 [ref=2x] { F(Evex)|F(EvexTwoOp)|F(Vec)|F(Vex)|F(Vsib) , X(K) , 289, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #314 [ref=2x] - { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 535, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #315 [ref=3x] - { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 537, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #316 [ref=3x] + { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 533, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #315 [ref=3x] + { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 535, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #316 [ref=3x] { F(Evex)|F(Vec) , X(B64)|X(K)|X(SAE)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #317 [ref=2x] { F(Evex)|F(Vec) , X(B16)|X(K)|X(SAE)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #318 [ref=3x] { F(Evex)|F(Vec) , X(B32)|X(K)|X(SAE)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #319 [ref=2x] - { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 558, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #320 [ref=3x] + { F(Evex)|F(Vec) , X(K)|X(SAE)|X(Z) , 556, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #320 [ref=3x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 265, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #321 [ref=3x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #322 [ref=22x] { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 433, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #323 [ref=2x] { F(Evex)|F(Vec) , X(K)|X(Z) , 433, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #324 [ref=4x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 559, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #325 [ref=4x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 550, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #326 [ref=1x] - { F(Vex) , 0 , 499, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #327 [ref=2x] - { F(Vec)|F(Vex) , 0 , 502, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #328 [ref=1x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 557, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #325 [ref=4x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 548, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #326 [ref=1x] + { F(Vex) , 0 , 497, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #327 [ref=2x] + { F(Vec)|F(Vex) , 0 , 500, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #328 [ref=1x] { F(Vec)|F(Vex) , 0 , 215, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #329 [ref=4x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(SAE)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #330 [ref=2x] { F(Evex)|F(Vec) , X(B16)|X(K)|X(SAE)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #331 [ref=2x] { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(SAE)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #332 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(SAE)|X(Z) , 535, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #333 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(SAE)|X(Z) , 533, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #333 [ref=2x] { 0 , 0 , 435, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #334 [ref=3x] - { 0 , 0 , 437, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #335 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 72 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #336 [ref=4x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 439, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #337 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 295, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #338 [ref=1x] - { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 72 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #339 [ref=2x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 116, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #340 [ref=6x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 82 , 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #341 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 219, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #342 [ref=4x] - { F(Vec)|F(Vex) , 0 , 560, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #343 [ref=3x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 164, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #344 [ref=3x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 169, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #345 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 174, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #346 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 80 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #347 [ref=1x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 223, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #348 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #349 [ref=4x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 88 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #350 [ref=1x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 441, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #351 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 72 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #335 [ref=4x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 437, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #336 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 295, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #337 [ref=1x] + { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 72 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #338 [ref=2x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 116, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #339 [ref=6x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 82 , 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #340 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 219, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #341 [ref=4x] + { F(Vec)|F(Vex) , 0 , 558, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #342 [ref=3x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 164, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #343 [ref=3x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 169, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #344 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 174, 5 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #345 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 80 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #346 [ref=1x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 223, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #347 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #348 [ref=4x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 88 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #349 [ref=1x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 439, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #350 [ref=1x] + { 0 , 0 , 441, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #351 [ref=1x] { 0 , 0 , 443, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #352 [ref=1x] - { 0 , 0 , 445, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #353 [ref=1x] - { F(Evex)|F(Vec) , X(B32) , 298, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #354 [ref=1x] - { F(Evex)|F(Vec) , X(B64) , 298, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #355 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #356 [ref=1x] - { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #357 [ref=5x] - { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 262, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #358 [ref=2x] - { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #359 [ref=2x] - { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 262, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #360 [ref=2x] - { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #361 [ref=2x] - { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #362 [ref=2x] - { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #363 [ref=2x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #364 [ref=13x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 561, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #365 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 562, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #366 [ref=1x] - { F(Evex)|F(Vec) , 0 , 563, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #367 [ref=6x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 447, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #368 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 564, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #369 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 265, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #370 [ref=1x] - { F(Evex)|F(Vec) , X(K) , 271, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #371 [ref=2x] - { F(Evex)|F(Vec) , X(B32)|X(K) , 271, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #372 [ref=2x] - { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(K) , 301, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #373 [ref=4x] - { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(B32)|X(K) , 301, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #374 [ref=2x] - { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(B64)|X(K) , 301, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #375 [ref=2x] - { F(Vec)|F(Vex) , 0 , 513, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #376 [ref=1x] - { F(Vec)|F(Vex) , 0 , 514, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #377 [ref=1x] - { F(Vec)|F(Vex) , 0 , 515, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #378 [ref=1x] - { F(Vec)|F(Vex) , 0 , 516, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #379 [ref=1x] - { F(Evex)|F(Vec) , X(B64)|X(K) , 271, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #380 [ref=4x] - { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #381 [ref=6x] - { F(Evex)|F(EvexCompat)|F(PreferEvex)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #382 [ref=4x] - { F(Vec)|F(Vex) , 0 , 266, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #383 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 263, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #384 [ref=2x] - { F(Vec)|F(Vex) , 0 , 227, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #385 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 96 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #386 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 96 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #387 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 231, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #388 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 517, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #389 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 518, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #390 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 565, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #391 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 566, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #392 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 567, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #393 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 568, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #394 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 569, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #395 [ref=1x] - { F(Vec)|F(Vex) , 0 , 419, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #396 [ref=12x] - { F(Evex)|F(EvexCompat)|F(PreferEvex)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #397 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #398 [ref=8x] - { F(Evex)|F(Vec) , 0 , 570, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #399 [ref=4x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 304, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #400 [ref=6x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 307, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #401 [ref=9x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 310, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #402 [ref=3x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 283, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #403 [ref=4x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 313, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #404 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 277, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #405 [ref=6x] - { F(Vec)|F(Vex) , 0 , 207, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #406 [ref=1x] - { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #407 [ref=3x] - { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #408 [ref=3x] - { F(Vec)|F(Vex) , 0 , 449, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #409 [ref=4x] - { F(Evex)|F(Vec)|F(Vsib) , X(K) , 316, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #410 [ref=2x] + { F(Evex)|F(Vec) , X(B32) , 298, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #353 [ref=1x] + { F(Evex)|F(Vec) , X(B64) , 298, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #354 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #355 [ref=1x] + { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #356 [ref=5x] + { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 262, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #357 [ref=2x] + { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #358 [ref=2x] + { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 262, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #359 [ref=2x] + { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #360 [ref=2x] + { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #361 [ref=2x] + { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #362 [ref=2x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #363 [ref=13x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 559, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #364 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 560, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #365 [ref=1x] + { F(Evex)|F(Vec) , 0 , 561, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #366 [ref=6x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 445, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #367 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 562, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #368 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 265, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #369 [ref=1x] + { F(Evex)|F(Vec) , X(ImplicitZ)|X(K) , 271, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #370 [ref=2x] + { F(Evex)|F(Vec) , X(B32)|X(ImplicitZ)|X(K) , 271, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #371 [ref=2x] + { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(ImplicitZ)|X(K) , 301, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #372 [ref=4x] + { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(B32)|X(ImplicitZ)|X(K) , 301, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #373 [ref=2x] + { F(Evex)|F(EvexKReg)|F(Vec)|F(Vex) , X(B64)|X(ImplicitZ)|X(K) , 301, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #374 [ref=2x] + { F(Vec)|F(Vex) , 0 , 511, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #375 [ref=1x] + { F(Vec)|F(Vex) , 0 , 512, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #376 [ref=1x] + { F(Vec)|F(Vex) , 0 , 513, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #377 [ref=1x] + { F(Vec)|F(Vex) , 0 , 514, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #378 [ref=1x] + { F(Evex)|F(Vec) , X(B64)|X(ImplicitZ)|X(K) , 271, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #379 [ref=4x] + { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #380 [ref=6x] + { F(Evex)|F(EvexCompat)|F(PreferEvex)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #381 [ref=4x] + { F(Vec)|F(Vex) , 0 , 266, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #382 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 263, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #383 [ref=2x] + { F(Vec)|F(Vex) , 0 , 227, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #384 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 96 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #385 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 96 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #386 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 231, 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #387 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 515, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #388 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 516, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #389 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 563, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #390 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 564, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #391 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 565, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #392 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 566, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #393 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 567, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #394 [ref=1x] + { F(Vec)|F(Vex) , 0 , 419, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #395 [ref=12x] + { F(Evex)|F(EvexCompat)|F(PreferEvex)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #396 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #397 [ref=8x] + { F(Evex)|F(Vec) , 0 , 568, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #398 [ref=4x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 304, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #399 [ref=6x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 307, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #400 [ref=9x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 310, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #401 [ref=3x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 283, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #402 [ref=4x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 313, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #403 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 277, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #404 [ref=6x] + { F(Vec)|F(Vex) , 0 , 207, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #405 [ref=1x] + { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #406 [ref=3x] + { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #407 [ref=3x] + { F(Vec)|F(Vex) , 0 , 447, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #408 [ref=4x] + { F(Evex)|F(Vec)|F(Vsib) , X(K) , 316, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #409 [ref=2x] + { F(Evex)|F(Vec)|F(Vsib) , X(K) , 449, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #410 [ref=2x] { F(Evex)|F(Vec)|F(Vsib) , X(K) , 451, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #411 [ref=2x] - { F(Evex)|F(Vec)|F(Vsib) , X(K) , 453, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #412 [ref=2x] - { F(Evex)|F(Vec)|F(Vsib) , X(K) , 319, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #413 [ref=2x] - { F(Vec)|F(Vex) , 0 , 455, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #414 [ref=8x] - { F(Evex)|F(Vec) , X(K) , 322, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #415 [ref=5x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #416 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #417 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 122, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #418 [ref=3x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #419 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 122, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #420 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 122, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #421 [ref=3x] - { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 128, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #422 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #423 [ref=6x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #424 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #425 [ref=2x] - { F(Evex)|F(Vec) , X(B32)|X(K) , 322, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #426 [ref=2x] - { F(Evex)|F(Vec) , X(B64)|X(K) , 322, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #427 [ref=2x] + { F(Evex)|F(Vec)|F(Vsib) , X(K) , 319, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #412 [ref=2x] + { F(Vec)|F(Vex) , 0 , 453, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #413 [ref=8x] + { F(Evex)|F(Vec) , X(ImplicitZ)|X(K) , 322, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #414 [ref=5x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #415 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #416 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 122, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #417 [ref=3x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , 0 , 292, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #418 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 122, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #419 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 122, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #420 [ref=3x] + { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 128, 6 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #421 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #422 [ref=6x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #423 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(WO)}, // #424 [ref=2x] + { F(Evex)|F(Vec) , X(B32)|X(ImplicitZ)|X(K) , 322, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #425 [ref=2x] + { F(Evex)|F(Vec) , X(B64)|X(ImplicitZ)|X(K) , 322, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #426 [ref=2x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 533, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #427 [ref=2x] { F(Evex)|F(Vec) , X(K)|X(Z) , 535, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #428 [ref=2x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 537, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #429 [ref=2x] - { F(Evex)|F(Vec) , X(B16)|X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #430 [ref=2x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 536, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #431 [ref=2x] - { F(Vec)|F(Vex) , 0 , 537, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #432 [ref=2x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 549, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #433 [ref=1x] - { F(Evex)|F(Vec) , X(K)|X(Z) , 550, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #434 [ref=1x] - { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 292, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #435 [ref=2x] - { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 549, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #436 [ref=1x] - { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 550, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #437 [ref=1x] - { F(Evex)|F(Vec) , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #438 [ref=1x] - { F(Vec)|F(Vex) , 0 , 571, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #439 [ref=1x] - { F(Vec)|F(Vex) , 0 , 572, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #440 [ref=1x] - { F(Vec)|F(Vex) , 0 , 573, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #441 [ref=1x] - { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 266, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #442 [ref=2x] - { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 266, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #443 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 265, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #444 [ref=1x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 265, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #445 [ref=1x] - { F(Vec)|F(Vex) , 0 , 262, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #446 [ref=2x] - { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #447 [ref=1x] - { F(Vec)|F(Vex) , 0 , 110, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #448 [ref=2x] - { 0 , 0 , 27 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #449 [ref=2x] - { 0 , 0 , 28 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #450 [ref=2x] - { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 25 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #451 [ref=1x] - { 0 , 0 , 236, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #452 [ref=1x] - { F(XAcquire) , 0 , 25 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #453 [ref=1x] - { 0 , 0 , 574, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #454 [ref=6x] - { 0 , 0 , 575, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)} // #455 [ref=6x] + { F(Evex)|F(Vec) , X(B16)|X(K)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #429 [ref=2x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 534, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #430 [ref=2x] + { F(Vec)|F(Vex) , 0 , 535, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #431 [ref=2x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 547, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #432 [ref=1x] + { F(Evex)|F(Vec) , X(K)|X(Z) , 548, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #433 [ref=1x] + { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 292, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #434 [ref=2x] + { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 547, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #435 [ref=1x] + { F(EvexTransformable)|F(Vec)|F(Vex) , 0 , 548, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #436 [ref=1x] + { F(Evex)|F(Vec) , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 262, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #437 [ref=1x] + { F(Vec)|F(Vex) , 0 , 569, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #438 [ref=1x] + { F(Vec)|F(Vex) , 0 , 570, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #439 [ref=1x] + { F(Vec)|F(Vex) , 0 , 571, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #440 [ref=1x] + { F(Evex)|F(Vec) , X(B32)|X(K)|X(Z) , 266, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #441 [ref=2x] + { F(Evex)|F(Vec) , X(B64)|X(K)|X(Z) , 266, 2 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #442 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(K)|X(Z) , 265, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #443 [ref=1x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B32)|X(K)|X(Z) , 265, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #444 [ref=1x] + { F(Vec)|F(Vex) , 0 , 262, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #445 [ref=2x] + { F(Evex)|F(EvexCompat)|F(Vec)|F(Vex) , X(B64)|X(ER)|X(K)|X(SAE)|X(Z) , 280, 3 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #446 [ref=1x] + { F(Vec)|F(Vex) , 0 , 110, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #447 [ref=2x] + { 0 , 0 , 27 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #448 [ref=2x] + { 0 , 0 , 28 , 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #449 [ref=2x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 25 , 4 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #450 [ref=1x] + { 0 , 0 , 236, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #451 [ref=1x] + { F(XAcquire) , 0 , 25 , 8 , CONTROL_FLOW(Regular), SAME_REG_HINT(RO)}, // #452 [ref=1x] + { 0 , 0 , 572, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)}, // #453 [ref=6x] + { 0 , 0 , 573, 1 , CONTROL_FLOW(Regular), SAME_REG_HINT(None)} // #454 [ref=6x] }; #undef SAME_REG_HINT #undef CONTROL_FLOW @@ -5160,145 +5159,143 @@ const InstDB::InstSignature InstDB::_instSignatureTable[] = { ROW(4, 1, 1, 0, 55 , 55 , 50 , 10 , 0 , 0 ), // {zmm, zmm, xmm|m128|mem, i8|u8} ROW(1, 1, 0, 1, 39 , 0 , 0 , 0 , 0 , 0 ), // #435 {} ROW(1, 0, 1, 1, 41 , 0 , 0 , 0 , 0 , 0 ), // #436 {} - ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #437 {} - ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // {} - ROW(2, 1, 1, 0, 28 , 49 , 0 , 0 , 0 , 0 ), // #439 {r32|m32|mem, xmm} + ROW(2, 1, 1, 0, 28 , 49 , 0 , 0 , 0 , 0 ), // #437 {r32|m32|mem, xmm} ROW(2, 1, 1, 0, 49 , 28 , 0 , 0 , 0 , 0 ), // {xmm, r32|m32|mem} - ROW(2, 1, 1, 0, 111, 49 , 0 , 0 , 0 , 0 ), // #441 {r32|m16|mem, xmm} + ROW(2, 1, 1, 0, 111, 49 , 0 , 0 , 0 , 0 ), // #439 {r32|m16|mem, xmm} ROW(2, 1, 1, 0, 49 , 111, 0 , 0 , 0 , 0 ), // {xmm, r32|m16|mem} - ROW(2, 1, 0, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // #443 {r32|m32|mem, r32} + ROW(2, 1, 0, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // #441 {r32|m32|mem, r32} ROW(2, 0, 1, 0, 29 , 8 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, r64} - ROW(2, 1, 0, 0, 6 , 28 , 0 , 0 , 0 , 0 ), // #445 {r32, r32|m32|mem} + ROW(2, 1, 0, 0, 6 , 28 , 0 , 0 , 0 , 0 ), // #443 {r32, r32|m32|mem} ROW(2, 0, 1, 0, 8 , 29 , 0 , 0 , 0 , 0 ), // {r64, r64|m64|mem} - ROW(2, 1, 1, 0, 144, 64 , 0 , 0 , 0 , 0 ), // #447 {xmm|ymm|zmm, xmm|m64|mem} + ROW(2, 1, 1, 0, 144, 64 , 0 , 0 , 0 , 0 ), // #445 {xmm|ymm|zmm, xmm|m64|mem} ROW(2, 0, 1, 0, 144, 8 , 0 , 0 , 0 , 0 ), // {xmm|ymm|zmm, r64} - ROW(3, 1, 1, 0, 49 , 49 , 58 , 0 , 0 , 0 ), // #449 {xmm, xmm, xmm|m128|mem|i8|u8} + ROW(3, 1, 1, 0, 49 , 49 , 58 , 0 , 0 , 0 ), // #447 {xmm, xmm, xmm|m128|mem|i8|u8} ROW(3, 1, 1, 0, 49 , 51 , 145, 0 , 0 , 0 ), // {xmm, m128|mem, i8|u8|xmm} - ROW(2, 1, 1, 0, 71 , 96 , 0 , 0 , 0 , 0 ), // #451 {vm32x, xmm|ymm} + ROW(2, 1, 1, 0, 71 , 96 , 0 , 0 , 0 , 0 ), // #449 {vm32x, xmm|ymm} ROW(2, 1, 1, 0, 72 , 55 , 0 , 0 , 0 , 0 ), // {vm32y, zmm} - ROW(2, 1, 1, 0, 118, 49 , 0 , 0 , 0 , 0 ), // #453 {vm64x|vm64y, xmm} + ROW(2, 1, 1, 0, 118, 49 , 0 , 0 , 0 , 0 ), // #451 {vm64x|vm64y, xmm} ROW(2, 1, 1, 0, 76 , 52 , 0 , 0 , 0 , 0 ), // {vm64z, ymm} - ROW(3, 1, 1, 0, 49 , 49 , 50 , 0 , 0 , 0 ), // #455 {xmm, xmm, xmm|m128|mem} + ROW(3, 1, 1, 0, 49 , 49 , 50 , 0 , 0 , 0 ), // #453 {xmm, xmm, xmm|m128|mem} ROW(3, 1, 1, 0, 49 , 51 , 49 , 0 , 0 , 0 ), // {xmm, m128|mem, xmm} - ROW(1, 1, 0, 1, 36 , 0 , 0 , 0 , 0 , 0 ), // #457 {} - ROW(2, 1, 0, 1, 36 , 10 , 0 , 0 , 0 , 0 ), // #458 {, i8|u8} - ROW(2, 1, 0, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // #459 {r16|m16|mem, r16} - ROW(3, 1, 1, 1, 49 , 50 , 146, 0 , 0 , 0 ), // #460 {xmm, xmm|m128|mem, } - ROW(2, 1, 1, 0, 120, 147, 0 , 0 , 0 , 0 ), // #461 {bnd, mib} - ROW(2, 1, 1, 0, 120, 122, 0 , 0 , 0 , 0 ), // #462 {bnd, mem} - ROW(2, 1, 1, 0, 147, 120, 0 , 0 , 0 , 0 ), // #463 {mib, bnd} - ROW(1, 1, 1, 1, 36 , 0 , 0 , 0 , 0 , 0 ), // #464 {} - ROW(2, 1, 1, 2, 38 , 39 , 0 , 0 , 0 , 0 ), // #465 {, } - ROW(1, 1, 1, 0, 122, 0 , 0 , 0 , 0 , 0 ), // #466 {mem} - ROW(1, 1, 1, 0, 31 , 0 , 0 , 0 , 0 , 0 ), // #467 {m64|mem} - ROW(0, 0, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #468 {} - ROW(1, 1, 1, 1, 148, 0 , 0 , 0 , 0 , 0 ), // #469 {} - ROW(3, 1, 1, 0, 49 , 64 , 10 , 0 , 0 , 0 ), // #470 {xmm, xmm|m64|mem, i8|u8} - ROW(3, 1, 1, 0, 49 , 117, 10 , 0 , 0 , 0 ), // #471 {xmm, xmm|m32|mem, i8|u8} - ROW(5, 0, 1, 4, 51 , 40 , 41 , 149, 150, 0 ), // #472 {m128|mem, , , , } - ROW(5, 1, 1, 4, 31 , 38 , 39 , 126, 151, 0 ), // #473 {m64|mem, , , , } - ROW(4, 1, 1, 4, 39 , 151, 126, 38 , 0 , 0 ), // #474 {, , , } - ROW(2, 0, 1, 2, 40 , 41 , 0 , 0 , 0 , 0 ), // #475 {, } - ROW(2, 1, 1, 0, 61 , 50 , 0 , 0 , 0 , 0 ), // #476 {mm, xmm|m128|mem} - ROW(2, 1, 1, 0, 49 , 62 , 0 , 0 , 0 , 0 ), // #477 {xmm, mm|m64|mem} - ROW(2, 1, 1, 0, 61 , 64 , 0 , 0 , 0 , 0 ), // #478 {mm, xmm|m64|mem} - ROW(2, 1, 1, 2, 37 , 36 , 0 , 0 , 0 , 0 ), // #479 {, } - ROW(1, 1, 1, 1, 39 , 0 , 0 , 0 , 0 , 0 ), // #480 {} - ROW(2, 1, 1, 0, 12 , 10 , 0 , 0 , 0 , 0 ), // #481 {i16|u16, i8|u8} - ROW(3, 1, 1, 0, 28 , 49 , 10 , 0 , 0 , 0 ), // #482 {r32|m32|mem, xmm, i8|u8} - ROW(1, 1, 1, 0, 109, 0 , 0 , 0 , 0 , 0 ), // #483 {m80|mem} - ROW(1, 1, 1, 0, 152, 0 , 0 , 0 , 0 , 0 ), // #484 {m16|m32} - ROW(1, 1, 1, 0, 153, 0 , 0 , 0 , 0 , 0 ), // #485 {m16|m32|m64} - ROW(1, 1, 1, 0, 154, 0 , 0 , 0 , 0 , 0 ), // #486 {m32|m64|m80|st} - ROW(1, 1, 1, 0, 21 , 0 , 0 , 0 , 0 , 0 ), // #487 {m16|mem} - ROW(1, 1, 1, 0, 155, 0 , 0 , 0 , 0 , 0 ), // #488 {ax|m16|mem} - ROW(1, 0, 1, 0, 122, 0 , 0 , 0 , 0 , 0 ), // #489 {mem} - ROW(2, 1, 1, 1, 10 , 39 , 0 , 0 , 0 , 0 ), // #490 {i8|u8, } - ROW(2, 1, 1, 0, 156, 157, 0 , 0 , 0 , 0 ), // #491 {al|ax|eax, i8|u8|dx} - ROW(2, 1, 1, 0, 158, 159, 0 , 0 , 0 , 0 ), // #492 {es:[memBase|zdi|m8|m16|m32], dx} - ROW(1, 1, 1, 0, 10 , 0 , 0 , 0 , 0 , 0 ), // #493 {i8|u8} - ROW(0, 1, 0, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #494 {} - ROW(3, 1, 1, 0, 86 , 86 , 86 , 0 , 0 , 0 ), // #495 {k, k, k} - ROW(2, 1, 1, 0, 86 , 86 , 0 , 0 , 0 , 0 ), // #496 {k, k} - ROW(3, 1, 1, 0, 86 , 86 , 10 , 0 , 0 , 0 ), // #497 {k, k, i8|u8} - ROW(1, 1, 1, 1, 160, 0 , 0 , 0 , 0 , 0 ), // #498 {} - ROW(1, 1, 1, 0, 30 , 0 , 0 , 0 , 0 , 0 ), // #499 {m32|mem} - ROW(1, 0, 1, 0, 57 , 0 , 0 , 0 , 0 , 0 ), // #500 {m512|mem} - ROW(1, 1, 1, 0, 27 , 0 , 0 , 0 , 0 , 0 ), // #501 {r16|m16|mem} - ROW(3, 1, 1, 1, 49 , 49 , 161, 0 , 0 , 0 ), // #502 {xmm, xmm, } - ROW(3, 1, 1, 1, 61 , 61 , 162, 0 , 0 , 0 ), // #503 {mm, mm, } - ROW(3, 1, 1, 3, 163, 126, 38 , 0 , 0 , 0 ), // #504 {, , } - ROW(2, 1, 1, 0, 61 , 49 , 0 , 0 , 0 , 0 ), // #505 {mm, xmm} - ROW(2, 1, 1, 0, 6 , 49 , 0 , 0 , 0 , 0 ), // #506 {r32, xmm} - ROW(2, 1, 1, 0, 31 , 61 , 0 , 0 , 0 , 0 ), // #507 {m64|mem, mm} - ROW(2, 1, 1, 0, 49 , 61 , 0 , 0 , 0 , 0 ), // #508 {xmm, mm} - ROW(2, 1, 1, 2, 39 , 126, 0 , 0 , 0 , 0 ), // #509 {, } - ROW(3, 1, 1, 3, 39 , 126, 151, 0 , 0 , 0 ), // #510 {, , } - ROW(2, 1, 1, 0, 164, 156, 0 , 0 , 0 , 0 ), // #511 {u8|dx, al|ax|eax} - ROW(2, 1, 1, 0, 159, 165, 0 , 0 , 0 , 0 ), // #512 {dx, ds:[memBase|zsi|m8|m16|m32]} - ROW(6, 1, 1, 3, 49 , 50 , 10 , 126, 39 , 38 ), // #513 {xmm, xmm|m128|mem, i8|u8, , , } - ROW(6, 1, 1, 3, 49 , 50 , 10 , 146, 39 , 38 ), // #514 {xmm, xmm|m128|mem, i8|u8, , , } - ROW(4, 1, 1, 1, 49 , 50 , 10 , 126, 0 , 0 ), // #515 {xmm, xmm|m128|mem, i8|u8, } - ROW(4, 1, 1, 1, 49 , 50 , 10 , 146, 0 , 0 ), // #516 {xmm, xmm|m128|mem, i8|u8, } - ROW(3, 1, 1, 0, 132, 49 , 10 , 0 , 0 , 0 ), // #517 {r32|m8|mem, xmm, i8|u8} - ROW(3, 0, 1, 0, 29 , 49 , 10 , 0 , 0 , 0 ), // #518 {r64|m64|mem, xmm, i8|u8} - ROW(3, 1, 1, 0, 49 , 132, 10 , 0 , 0 , 0 ), // #519 {xmm, r32|m8|mem, i8|u8} - ROW(3, 1, 1, 0, 49 , 28 , 10 , 0 , 0 , 0 ), // #520 {xmm, r32|m32|mem, i8|u8} - ROW(3, 0, 1, 0, 49 , 29 , 10 , 0 , 0 , 0 ), // #521 {xmm, r64|m64|mem, i8|u8} - ROW(3, 1, 1, 0, 63 , 111, 10 , 0 , 0 , 0 ), // #522 {mm|xmm, r32|m16|mem, i8|u8} - ROW(2, 1, 1, 0, 6 , 63 , 0 , 0 , 0 , 0 ), // #523 {r32, mm|xmm} - ROW(2, 1, 1, 0, 49 , 10 , 0 , 0 , 0 , 0 ), // #524 {xmm, i8|u8} - ROW(1, 0, 1, 0, 136, 0 , 0 , 0 , 0 , 0 ), // #525 {r32|r64} - ROW(3, 1, 1, 3, 38 , 39 , 126, 0 , 0 , 0 ), // #526 {, , } - ROW(1, 1, 1, 0, 1 , 0 , 0 , 0 , 0 , 0 ), // #527 {r8lo|r8hi|m8|mem} - ROW(3, 0, 1, 0, 166, 166, 166, 0 , 0 , 0 ), // #528 {tmm, tmm, tmm} - ROW(2, 0, 1, 0, 166, 167, 0 , 0 , 0 , 0 ), // #529 {tmm, tmem} - ROW(2, 0, 1, 0, 167, 166, 0 , 0 , 0 , 0 ), // #530 {tmem, tmm} - ROW(1, 0, 1, 0, 166, 0 , 0 , 0 , 0 , 0 ), // #531 {tmm} - ROW(3, 1, 1, 2, 6 , 38 , 39 , 0 , 0 , 0 ), // #532 {r32, , } - ROW(6, 1, 1, 0, 55 , 55 , 55 , 55 , 55 , 51 ), // #533 {zmm, zmm, zmm, zmm, zmm, m128|mem} - ROW(6, 1, 1, 0, 49 , 49 , 49 , 49 , 49 , 51 ), // #534 {xmm, xmm, xmm, xmm, xmm, m128|mem} - ROW(3, 1, 1, 0, 49 , 49 , 64 , 0 , 0 , 0 ), // #535 {xmm, xmm, xmm|m64|mem} - ROW(3, 1, 1, 0, 49 , 49 , 119, 0 , 0 , 0 ), // #536 {xmm, xmm, xmm|m16|mem} - ROW(3, 1, 1, 0, 49 , 49 , 117, 0 , 0 , 0 ), // #537 {xmm, xmm, xmm|m32|mem} - ROW(2, 1, 1, 0, 96 , 21 , 0 , 0 , 0 , 0 ), // #538 {xmm|ymm, m16|mem} - ROW(2, 1, 1, 0, 52 , 51 , 0 , 0 , 0 , 0 ), // #539 {ymm, m128|mem} - ROW(2, 1, 1, 0, 168, 64 , 0 , 0 , 0 , 0 ), // #540 {ymm|zmm, xmm|m64|mem} - ROW(2, 1, 1, 0, 168, 51 , 0 , 0 , 0 , 0 ), // #541 {ymm|zmm, m128|mem} - ROW(2, 1, 1, 0, 55 , 54 , 0 , 0 , 0 , 0 ), // #542 {zmm, m256|mem} - ROW(2, 1, 1, 0, 144, 117, 0 , 0 , 0 , 0 ), // #543 {xmm|ymm|zmm, m32|mem|xmm} - ROW(4, 1, 1, 0, 115, 49 , 64 , 10 , 0 , 0 ), // #544 {xmm|k, xmm, xmm|m64|mem, i8|u8} - ROW(4, 1, 1, 0, 86 , 49 , 119, 10 , 0 , 0 ), // #545 {k, xmm, xmm|m16|mem, i8|u8} - ROW(4, 1, 1, 0, 115, 49 , 117, 10 , 0 , 0 ), // #546 {xmm|k, xmm, xmm|m32|mem, i8|u8} - ROW(2, 1, 1, 0, 49 , 169, 0 , 0 , 0 , 0 ), // #547 {xmm, xmm|m128|ymm|m256|zmm|m512} - ROW(3, 1, 1, 0, 50 , 168, 10 , 0 , 0 , 0 ), // #548 {xmm|m128|mem, ymm|zmm, i8|u8} - ROW(4, 1, 1, 0, 49 , 49 , 64 , 10 , 0 , 0 ), // #549 {xmm, xmm, xmm|m64|mem, i8|u8} - ROW(4, 1, 1, 0, 49 , 49 , 117, 10 , 0 , 0 ), // #550 {xmm, xmm, xmm|m32|mem, i8|u8} - ROW(3, 1, 1, 0, 86 , 169, 10 , 0 , 0 , 0 ), // #551 {k, xmm|m128|ymm|m256|zmm|m512, i8|u8} - ROW(3, 1, 1, 0, 86 , 64 , 10 , 0 , 0 , 0 ), // #552 {k, xmm|m64|mem, i8|u8} - ROW(3, 1, 1, 0, 86 , 119, 10 , 0 , 0 , 0 ), // #553 {k, xmm|m16|mem, i8|u8} - ROW(3, 1, 1, 0, 86 , 117, 10 , 0 , 0 , 0 ), // #554 {k, xmm|m32|mem, i8|u8} - ROW(1, 1, 1, 0, 72 , 0 , 0 , 0 , 0 , 0 ), // #555 {vm32y} - ROW(1, 1, 1, 0, 73 , 0 , 0 , 0 , 0 , 0 ), // #556 {vm32z} - ROW(1, 1, 1, 0, 76 , 0 , 0 , 0 , 0 , 0 ), // #557 {vm64z} - ROW(4, 1, 1, 0, 49 , 49 , 119, 10 , 0 , 0 ), // #558 {xmm, xmm, xmm|m16|mem, i8|u8} - ROW(4, 1, 1, 0, 55 , 55 , 53 , 10 , 0 , 0 ), // #559 {zmm, zmm, ymm|m256|mem, i8|u8} - ROW(2, 1, 1, 0, 6 , 96 , 0 , 0 , 0 , 0 ), // #560 {r32, xmm|ymm} - ROW(2, 1, 1, 0, 144, 170, 0 , 0 , 0 , 0 ), // #561 {xmm|ymm|zmm, xmm|m8|mem|r32} - ROW(2, 1, 1, 0, 144, 171, 0 , 0 , 0 , 0 ), // #562 {xmm|ymm|zmm, xmm|m32|mem|r32} - ROW(2, 1, 1, 0, 144, 86 , 0 , 0 , 0 , 0 ), // #563 {xmm|ymm|zmm, k} - ROW(2, 1, 1, 0, 144, 172, 0 , 0 , 0 , 0 ), // #564 {xmm|ymm|zmm, xmm|m16|mem|r32} - ROW(3, 1, 1, 0, 111, 49 , 10 , 0 , 0 , 0 ), // #565 {r32|m16|mem, xmm, i8|u8} - ROW(4, 1, 1, 0, 49 , 49 , 132, 10 , 0 , 0 ), // #566 {xmm, xmm, r32|m8|mem, i8|u8} - ROW(4, 1, 1, 0, 49 , 49 , 28 , 10 , 0 , 0 ), // #567 {xmm, xmm, r32|m32|mem, i8|u8} - ROW(4, 0, 1, 0, 49 , 49 , 29 , 10 , 0 , 0 ), // #568 {xmm, xmm, r64|m64|mem, i8|u8} - ROW(4, 1, 1, 0, 49 , 49 , 111, 10 , 0 , 0 ), // #569 {xmm, xmm, r32|m16|mem, i8|u8} - ROW(2, 1, 1, 0, 86 , 144, 0 , 0 , 0 , 0 ), // #570 {k, xmm|ymm|zmm} - ROW(2, 1, 1, 0, 52 , 49 , 0 , 0 , 0 , 0 ), // #571 {ymm, xmm} - ROW(2, 1, 1, 0, 52 , 52 , 0 , 0 , 0 , 0 ), // #572 {ymm, ymm} - ROW(3, 1, 1, 0, 52 , 52 , 49 , 0 , 0 , 0 ), // #573 {ymm, ymm, xmm} - ROW(3, 1, 1, 2, 122, 38 , 39 , 0 , 0 , 0 ), // #574 {mem, , } - ROW(3, 0, 1, 2, 122, 38 , 39 , 0 , 0 , 0 ) // #575 {mem, , } + ROW(1, 1, 0, 1, 36 , 0 , 0 , 0 , 0 , 0 ), // #455 {} + ROW(2, 1, 0, 1, 36 , 10 , 0 , 0 , 0 , 0 ), // #456 {, i8|u8} + ROW(2, 1, 0, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // #457 {r16|m16|mem, r16} + ROW(3, 1, 1, 1, 49 , 50 , 146, 0 , 0 , 0 ), // #458 {xmm, xmm|m128|mem, } + ROW(2, 1, 1, 0, 120, 147, 0 , 0 , 0 , 0 ), // #459 {bnd, mib} + ROW(2, 1, 1, 0, 120, 122, 0 , 0 , 0 , 0 ), // #460 {bnd, mem} + ROW(2, 1, 1, 0, 147, 120, 0 , 0 , 0 , 0 ), // #461 {mib, bnd} + ROW(1, 1, 1, 1, 36 , 0 , 0 , 0 , 0 , 0 ), // #462 {} + ROW(2, 1, 1, 2, 38 , 39 , 0 , 0 , 0 , 0 ), // #463 {, } + ROW(1, 1, 1, 0, 122, 0 , 0 , 0 , 0 , 0 ), // #464 {mem} + ROW(1, 1, 1, 0, 31 , 0 , 0 , 0 , 0 , 0 ), // #465 {m64|mem} + ROW(0, 0, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #466 {} + ROW(1, 1, 1, 1, 148, 0 , 0 , 0 , 0 , 0 ), // #467 {} + ROW(3, 1, 1, 0, 49 , 64 , 10 , 0 , 0 , 0 ), // #468 {xmm, xmm|m64|mem, i8|u8} + ROW(3, 1, 1, 0, 49 , 117, 10 , 0 , 0 , 0 ), // #469 {xmm, xmm|m32|mem, i8|u8} + ROW(5, 0, 1, 4, 51 , 40 , 41 , 149, 150, 0 ), // #470 {m128|mem, , , , } + ROW(5, 1, 1, 4, 31 , 38 , 39 , 126, 151, 0 ), // #471 {m64|mem, , , , } + ROW(4, 1, 1, 4, 39 , 151, 126, 38 , 0 , 0 ), // #472 {, , , } + ROW(2, 0, 1, 2, 40 , 41 , 0 , 0 , 0 , 0 ), // #473 {, } + ROW(2, 1, 1, 0, 61 , 50 , 0 , 0 , 0 , 0 ), // #474 {mm, xmm|m128|mem} + ROW(2, 1, 1, 0, 49 , 62 , 0 , 0 , 0 , 0 ), // #475 {xmm, mm|m64|mem} + ROW(2, 1, 1, 0, 61 , 64 , 0 , 0 , 0 , 0 ), // #476 {mm, xmm|m64|mem} + ROW(2, 1, 1, 2, 37 , 36 , 0 , 0 , 0 , 0 ), // #477 {, } + ROW(1, 1, 1, 1, 39 , 0 , 0 , 0 , 0 , 0 ), // #478 {} + ROW(2, 1, 1, 0, 12 , 10 , 0 , 0 , 0 , 0 ), // #479 {i16|u16, i8|u8} + ROW(3, 1, 1, 0, 28 , 49 , 10 , 0 , 0 , 0 ), // #480 {r32|m32|mem, xmm, i8|u8} + ROW(1, 1, 1, 0, 109, 0 , 0 , 0 , 0 , 0 ), // #481 {m80|mem} + ROW(1, 1, 1, 0, 152, 0 , 0 , 0 , 0 , 0 ), // #482 {m16|m32} + ROW(1, 1, 1, 0, 153, 0 , 0 , 0 , 0 , 0 ), // #483 {m16|m32|m64} + ROW(1, 1, 1, 0, 154, 0 , 0 , 0 , 0 , 0 ), // #484 {m32|m64|m80|st} + ROW(1, 1, 1, 0, 21 , 0 , 0 , 0 , 0 , 0 ), // #485 {m16|mem} + ROW(1, 1, 1, 0, 155, 0 , 0 , 0 , 0 , 0 ), // #486 {ax|m16|mem} + ROW(1, 0, 1, 0, 122, 0 , 0 , 0 , 0 , 0 ), // #487 {mem} + ROW(2, 1, 1, 1, 10 , 39 , 0 , 0 , 0 , 0 ), // #488 {i8|u8, } + ROW(2, 1, 1, 0, 156, 157, 0 , 0 , 0 , 0 ), // #489 {al|ax|eax, i8|u8|dx} + ROW(2, 1, 1, 0, 158, 159, 0 , 0 , 0 , 0 ), // #490 {es:[memBase|zdi|m8|m16|m32], dx} + ROW(1, 1, 1, 0, 10 , 0 , 0 , 0 , 0 , 0 ), // #491 {i8|u8} + ROW(0, 1, 0, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #492 {} + ROW(3, 1, 1, 0, 86 , 86 , 86 , 0 , 0 , 0 ), // #493 {k, k, k} + ROW(2, 1, 1, 0, 86 , 86 , 0 , 0 , 0 , 0 ), // #494 {k, k} + ROW(3, 1, 1, 0, 86 , 86 , 10 , 0 , 0 , 0 ), // #495 {k, k, i8|u8} + ROW(1, 1, 1, 1, 160, 0 , 0 , 0 , 0 , 0 ), // #496 {} + ROW(1, 1, 1, 0, 30 , 0 , 0 , 0 , 0 , 0 ), // #497 {m32|mem} + ROW(1, 0, 1, 0, 57 , 0 , 0 , 0 , 0 , 0 ), // #498 {m512|mem} + ROW(1, 1, 1, 0, 27 , 0 , 0 , 0 , 0 , 0 ), // #499 {r16|m16|mem} + ROW(3, 1, 1, 1, 49 , 49 , 161, 0 , 0 , 0 ), // #500 {xmm, xmm, } + ROW(3, 1, 1, 1, 61 , 61 , 162, 0 , 0 , 0 ), // #501 {mm, mm, } + ROW(3, 1, 1, 3, 163, 126, 38 , 0 , 0 , 0 ), // #502 {, , } + ROW(2, 1, 1, 0, 61 , 49 , 0 , 0 , 0 , 0 ), // #503 {mm, xmm} + ROW(2, 1, 1, 0, 6 , 49 , 0 , 0 , 0 , 0 ), // #504 {r32, xmm} + ROW(2, 1, 1, 0, 31 , 61 , 0 , 0 , 0 , 0 ), // #505 {m64|mem, mm} + ROW(2, 1, 1, 0, 49 , 61 , 0 , 0 , 0 , 0 ), // #506 {xmm, mm} + ROW(2, 1, 1, 2, 39 , 126, 0 , 0 , 0 , 0 ), // #507 {, } + ROW(3, 1, 1, 3, 39 , 126, 151, 0 , 0 , 0 ), // #508 {, , } + ROW(2, 1, 1, 0, 164, 156, 0 , 0 , 0 , 0 ), // #509 {u8|dx, al|ax|eax} + ROW(2, 1, 1, 0, 159, 165, 0 , 0 , 0 , 0 ), // #510 {dx, ds:[memBase|zsi|m8|m16|m32]} + ROW(6, 1, 1, 3, 49 , 50 , 10 , 126, 39 , 38 ), // #511 {xmm, xmm|m128|mem, i8|u8, , , } + ROW(6, 1, 1, 3, 49 , 50 , 10 , 146, 39 , 38 ), // #512 {xmm, xmm|m128|mem, i8|u8, , , } + ROW(4, 1, 1, 1, 49 , 50 , 10 , 126, 0 , 0 ), // #513 {xmm, xmm|m128|mem, i8|u8, } + ROW(4, 1, 1, 1, 49 , 50 , 10 , 146, 0 , 0 ), // #514 {xmm, xmm|m128|mem, i8|u8, } + ROW(3, 1, 1, 0, 132, 49 , 10 , 0 , 0 , 0 ), // #515 {r32|m8|mem, xmm, i8|u8} + ROW(3, 0, 1, 0, 29 , 49 , 10 , 0 , 0 , 0 ), // #516 {r64|m64|mem, xmm, i8|u8} + ROW(3, 1, 1, 0, 49 , 132, 10 , 0 , 0 , 0 ), // #517 {xmm, r32|m8|mem, i8|u8} + ROW(3, 1, 1, 0, 49 , 28 , 10 , 0 , 0 , 0 ), // #518 {xmm, r32|m32|mem, i8|u8} + ROW(3, 0, 1, 0, 49 , 29 , 10 , 0 , 0 , 0 ), // #519 {xmm, r64|m64|mem, i8|u8} + ROW(3, 1, 1, 0, 63 , 111, 10 , 0 , 0 , 0 ), // #520 {mm|xmm, r32|m16|mem, i8|u8} + ROW(2, 1, 1, 0, 6 , 63 , 0 , 0 , 0 , 0 ), // #521 {r32, mm|xmm} + ROW(2, 1, 1, 0, 49 , 10 , 0 , 0 , 0 , 0 ), // #522 {xmm, i8|u8} + ROW(1, 0, 1, 0, 136, 0 , 0 , 0 , 0 , 0 ), // #523 {r32|r64} + ROW(3, 1, 1, 3, 38 , 39 , 126, 0 , 0 , 0 ), // #524 {, , } + ROW(1, 1, 1, 0, 1 , 0 , 0 , 0 , 0 , 0 ), // #525 {r8lo|r8hi|m8|mem} + ROW(3, 0, 1, 0, 166, 166, 166, 0 , 0 , 0 ), // #526 {tmm, tmm, tmm} + ROW(2, 0, 1, 0, 166, 167, 0 , 0 , 0 , 0 ), // #527 {tmm, tmem} + ROW(2, 0, 1, 0, 167, 166, 0 , 0 , 0 , 0 ), // #528 {tmem, tmm} + ROW(1, 0, 1, 0, 166, 0 , 0 , 0 , 0 , 0 ), // #529 {tmm} + ROW(3, 1, 1, 2, 6 , 38 , 39 , 0 , 0 , 0 ), // #530 {r32, , } + ROW(6, 1, 1, 0, 55 , 55 , 55 , 55 , 55 , 51 ), // #531 {zmm, zmm, zmm, zmm, zmm, m128|mem} + ROW(6, 1, 1, 0, 49 , 49 , 49 , 49 , 49 , 51 ), // #532 {xmm, xmm, xmm, xmm, xmm, m128|mem} + ROW(3, 1, 1, 0, 49 , 49 , 64 , 0 , 0 , 0 ), // #533 {xmm, xmm, xmm|m64|mem} + ROW(3, 1, 1, 0, 49 , 49 , 119, 0 , 0 , 0 ), // #534 {xmm, xmm, xmm|m16|mem} + ROW(3, 1, 1, 0, 49 , 49 , 117, 0 , 0 , 0 ), // #535 {xmm, xmm, xmm|m32|mem} + ROW(2, 1, 1, 0, 96 , 21 , 0 , 0 , 0 , 0 ), // #536 {xmm|ymm, m16|mem} + ROW(2, 1, 1, 0, 52 , 51 , 0 , 0 , 0 , 0 ), // #537 {ymm, m128|mem} + ROW(2, 1, 1, 0, 168, 64 , 0 , 0 , 0 , 0 ), // #538 {ymm|zmm, xmm|m64|mem} + ROW(2, 1, 1, 0, 168, 51 , 0 , 0 , 0 , 0 ), // #539 {ymm|zmm, m128|mem} + ROW(2, 1, 1, 0, 55 , 54 , 0 , 0 , 0 , 0 ), // #540 {zmm, m256|mem} + ROW(2, 1, 1, 0, 144, 117, 0 , 0 , 0 , 0 ), // #541 {xmm|ymm|zmm, m32|mem|xmm} + ROW(4, 1, 1, 0, 115, 49 , 64 , 10 , 0 , 0 ), // #542 {xmm|k, xmm, xmm|m64|mem, i8|u8} + ROW(4, 1, 1, 0, 86 , 49 , 119, 10 , 0 , 0 ), // #543 {k, xmm, xmm|m16|mem, i8|u8} + ROW(4, 1, 1, 0, 115, 49 , 117, 10 , 0 , 0 ), // #544 {xmm|k, xmm, xmm|m32|mem, i8|u8} + ROW(2, 1, 1, 0, 49 , 169, 0 , 0 , 0 , 0 ), // #545 {xmm, xmm|m128|ymm|m256|zmm|m512} + ROW(3, 1, 1, 0, 50 , 168, 10 , 0 , 0 , 0 ), // #546 {xmm|m128|mem, ymm|zmm, i8|u8} + ROW(4, 1, 1, 0, 49 , 49 , 64 , 10 , 0 , 0 ), // #547 {xmm, xmm, xmm|m64|mem, i8|u8} + ROW(4, 1, 1, 0, 49 , 49 , 117, 10 , 0 , 0 ), // #548 {xmm, xmm, xmm|m32|mem, i8|u8} + ROW(3, 1, 1, 0, 86 , 169, 10 , 0 , 0 , 0 ), // #549 {k, xmm|m128|ymm|m256|zmm|m512, i8|u8} + ROW(3, 1, 1, 0, 86 , 64 , 10 , 0 , 0 , 0 ), // #550 {k, xmm|m64|mem, i8|u8} + ROW(3, 1, 1, 0, 86 , 119, 10 , 0 , 0 , 0 ), // #551 {k, xmm|m16|mem, i8|u8} + ROW(3, 1, 1, 0, 86 , 117, 10 , 0 , 0 , 0 ), // #552 {k, xmm|m32|mem, i8|u8} + ROW(1, 1, 1, 0, 72 , 0 , 0 , 0 , 0 , 0 ), // #553 {vm32y} + ROW(1, 1, 1, 0, 73 , 0 , 0 , 0 , 0 , 0 ), // #554 {vm32z} + ROW(1, 1, 1, 0, 76 , 0 , 0 , 0 , 0 , 0 ), // #555 {vm64z} + ROW(4, 1, 1, 0, 49 , 49 , 119, 10 , 0 , 0 ), // #556 {xmm, xmm, xmm|m16|mem, i8|u8} + ROW(4, 1, 1, 0, 55 , 55 , 53 , 10 , 0 , 0 ), // #557 {zmm, zmm, ymm|m256|mem, i8|u8} + ROW(2, 1, 1, 0, 6 , 96 , 0 , 0 , 0 , 0 ), // #558 {r32, xmm|ymm} + ROW(2, 1, 1, 0, 144, 170, 0 , 0 , 0 , 0 ), // #559 {xmm|ymm|zmm, xmm|m8|mem|r32} + ROW(2, 1, 1, 0, 144, 171, 0 , 0 , 0 , 0 ), // #560 {xmm|ymm|zmm, xmm|m32|mem|r32} + ROW(2, 1, 1, 0, 144, 86 , 0 , 0 , 0 , 0 ), // #561 {xmm|ymm|zmm, k} + ROW(2, 1, 1, 0, 144, 172, 0 , 0 , 0 , 0 ), // #562 {xmm|ymm|zmm, xmm|m16|mem|r32} + ROW(3, 1, 1, 0, 111, 49 , 10 , 0 , 0 , 0 ), // #563 {r32|m16|mem, xmm, i8|u8} + ROW(4, 1, 1, 0, 49 , 49 , 132, 10 , 0 , 0 ), // #564 {xmm, xmm, r32|m8|mem, i8|u8} + ROW(4, 1, 1, 0, 49 , 49 , 28 , 10 , 0 , 0 ), // #565 {xmm, xmm, r32|m32|mem, i8|u8} + ROW(4, 0, 1, 0, 49 , 49 , 29 , 10 , 0 , 0 ), // #566 {xmm, xmm, r64|m64|mem, i8|u8} + ROW(4, 1, 1, 0, 49 , 49 , 111, 10 , 0 , 0 ), // #567 {xmm, xmm, r32|m16|mem, i8|u8} + ROW(2, 1, 1, 0, 86 , 144, 0 , 0 , 0 , 0 ), // #568 {k, xmm|ymm|zmm} + ROW(2, 1, 1, 0, 52 , 49 , 0 , 0 , 0 , 0 ), // #569 {ymm, xmm} + ROW(2, 1, 1, 0, 52 , 52 , 0 , 0 , 0 , 0 ), // #570 {ymm, ymm} + ROW(3, 1, 1, 0, 52 , 52 , 49 , 0 , 0 , 0 ), // #571 {ymm, ymm, xmm} + ROW(3, 1, 1, 2, 122, 38 , 39 , 0 , 0 , 0 ), // #572 {mem, , } + ROW(3, 0, 1, 2, 122, 38 , 39 , 0 , 0 , 0 ) // #573 {mem, , } }; #undef ROW @@ -5629,26 +5626,26 @@ const uint8_t InstDB::rwInfoIndexB[Inst::_kIdCount] = { 3, 3, 3, 3, 3, 104, 3, 0, 0, 0, 0, 0, 0, 3, 130, 105, 105, 3, 3, 3, 3, 69, 70, 3, 3, 3, 3, 71, 72, 105, 105, 105, 105, 105, 105, 118, 118, 0, 0, 0, 0, 118, 118, 118, 118, 118, 118, 0, 0, 124, 124, 124, 124, 124, 124, 124, 124, 124, - 124, 124, 124, 124, 124, 124, 124, 161, 161, 3, 3, 3, 124, 3, 3, 124, 124, 130, - 130, 162, 162, 162, 3, 162, 3, 124, 124, 124, 124, 124, 3, 0, 0, 0, 0, 73, 23, - 74, 163, 140, 139, 141, 140, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, - 0, 3, 0, 3, 3, 0, 164, 103, 101, 102, 0, 0, 165, 165, 165, 165, 165, 165, 165, - 165, 165, 165, 165, 165, 124, 124, 3, 3, 148, 148, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 0, 0, 0, 0, 3, 3, 3, 166, 86, 86, 3, 3, 86, 86, 3, 3, 167, 167, 167, - 167, 3, 0, 0, 0, 0, 167, 167, 167, 167, 167, 167, 3, 3, 124, 124, 124, 3, 167, - 167, 3, 3, 124, 124, 124, 3, 3, 105, 86, 86, 86, 3, 3, 3, 168, 169, 168, 3, - 3, 3, 170, 168, 171, 3, 3, 3, 170, 168, 169, 168, 3, 3, 3, 170, 3, 3, 3, 3, - 3, 3, 3, 3, 172, 172, 0, 105, 105, 105, 105, 105, 105, 105, 105, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 142, 144, 0, 0, 142, 144, 0, 0, 142, 144, 0, 0, 143, - 144, 86, 86, 86, 142, 143, 144, 86, 86, 86, 142, 143, 144, 86, 86, 142, 144, - 0, 0, 142, 144, 0, 0, 142, 144, 0, 0, 143, 144, 3, 3, 3, 101, 102, 103, 0, 0, - 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 173, 3, 3, 3, 3, 3, 3, 174, 174, 174, - 3, 3, 0, 0, 0, 142, 143, 144, 94, 3, 3, 3, 101, 102, 103, 0, 0, 0, 0, 0, 3, - 3, 3, 3, 3, 3, 0, 0, 0, 0, 58, 58, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, - 0, 0, 0, 176, 176, 176, 176, 177, 177, 177, 177, 177, 177, 177, 177, 175, 0, - 0 + 124, 124, 124, 124, 124, 124, 124, 161, 161, 3, 3, 124, 124, 3, 3, 124, 124, 130, + 130, 162, 162, 162, 3, 162, 124, 124, 124, 124, 124, 124, 3, 0, 0, 0, 0, 73, + 23, 74, 163, 140, 139, 141, 140, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 0, 0, 3, 0, 3, 3, 0, 164, 103, 101, 102, 0, 0, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 124, 124, 3, 3, 148, 148, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, 3, 3, 166, 86, 86, 3, 3, 86, 86, 3, 3, 167, 167, + 167, 167, 3, 0, 0, 0, 0, 167, 167, 167, 167, 167, 167, 3, 3, 124, 124, 124, + 3, 167, 167, 3, 3, 124, 124, 124, 3, 3, 105, 86, 86, 86, 3, 3, 3, 168, 169, 168, + 3, 3, 3, 170, 168, 171, 3, 3, 3, 170, 168, 169, 168, 3, 3, 3, 170, 3, 3, 3, + 3, 3, 3, 3, 3, 172, 172, 0, 105, 105, 105, 105, 105, 105, 105, 105, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 142, 144, 0, 0, 142, 144, 0, 0, 142, 144, 0, 0, + 143, 144, 86, 86, 86, 142, 143, 144, 86, 86, 86, 142, 143, 144, 86, 86, 142, 144, + 0, 0, 142, 144, 0, 0, 142, 144, 0, 0, 143, 144, 3, 3, 3, 101, 102, 103, 0, + 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 173, 3, 3, 3, 3, 3, 3, 174, 174, + 174, 3, 3, 0, 0, 0, 142, 143, 144, 94, 3, 3, 3, 101, 102, 103, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 58, 58, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 0, + 0, 0, 0, 0, 176, 176, 176, 176, 177, 177, 177, 177, 177, 177, 177, 177, 175, + 0, 0 }; const InstDB::RWInfo InstDB::rwInfoA[] = { @@ -5798,7 +5795,7 @@ const InstDB::RWInfo InstDB::rwInfoB[] = { { InstDB::RWInfo::kCategoryGeneric , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #0 [ref=791x] { InstDB::RWInfo::kCategoryGeneric , 0 , { 1 , 0 , 0 , 0 , 0 , 0 } }, // #1 [ref=5x] { InstDB::RWInfo::kCategoryGeneric , 3 , { 10, 5 , 0 , 0 , 0 , 0 } }, // #2 [ref=7x] - { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 3 , 3 , 0 , 0 , 0 } }, // #3 [ref=195x] + { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 3 , 3 , 0 , 0 , 0 } }, // #3 [ref=193x] { InstDB::RWInfo::kCategoryGeneric , 2 , { 11, 3 , 3 , 0 , 0 , 0 } }, // #4 [ref=5x] { InstDB::RWInfo::kCategoryGeneric , 3 , { 4 , 5 , 0 , 0 , 0 , 0 } }, // #5 [ref=14x] { InstDB::RWInfo::kCategoryGeneric , 3 , { 4 , 5 , 14, 0 , 0 , 0 } }, // #6 [ref=4x] @@ -5919,7 +5916,7 @@ const InstDB::RWInfo InstDB::rwInfoB[] = { { InstDB::RWInfo::kCategoryGeneric , 53, { 81, 44, 0 , 0 , 0 , 0 } }, // #121 [ref=4x] { InstDB::RWInfo::kCategoryGeneric , 6 , { 84, 3 , 3 , 0 , 0 , 0 } }, // #122 [ref=4x] { InstDB::RWInfo::kCategoryGeneric , 42, { 85, 5 , 5 , 0 , 0 , 0 } }, // #123 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 6 , { 2 , 3 , 3 , 0 , 0 , 0 } }, // #124 [ref=88x] + { InstDB::RWInfo::kCategoryGeneric , 6 , { 2 , 3 , 3 , 0 , 0 , 0 } }, // #124 [ref=90x] { InstDB::RWInfo::kCategoryGeneric , 40, { 4 , 64, 7 , 0 , 0 , 0 } }, // #125 [ref=1x] { InstDB::RWInfo::kCategoryGeneric , 42, { 4 , 83, 9 , 0 , 0 , 0 } }, // #126 [ref=1x] { InstDB::RWInfo::kCategoryGeneric , 40, { 6 , 7 , 7 , 0 , 0 , 0 } }, // #127 [ref=11x] @@ -5978,7 +5975,7 @@ const InstDB::RWInfo InstDB::rwInfoB[] = { const InstDB::RWInfoOp InstDB::rwInfoOp[] = { { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kNone }, // #0 [ref=17086x] { 0x0000000000000003u, 0x0000000000000003u, 0x00, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kRegPhysId }, // #1 [ref=10x] - { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #2 [ref=280x] + { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #2 [ref=282x] { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #3 [ref=1132x] { 0x000000000000FFFFu, 0x000000000000FFFFu, 0xFF, 0, { 0 }, OpRWFlags::kRW | OpRWFlags::kZExt }, // #4 [ref=107x] { 0x000000000000FFFFu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #5 [ref=356x] @@ -5987,7 +5984,7 @@ const InstDB::RWInfoOp InstDB::rwInfoOp[] = { { 0x000000000000000Fu, 0x000000000000000Fu, 0xFF, 0, { 0 }, OpRWFlags::kRW }, // #8 [ref=18x] { 0x000000000000000Fu, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #9 [ref=135x] { 0x0000000000000000u, 0x000000000000FFFFu, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #10 [ref=184x] - { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #11 [ref=461x] + { 0x0000000000000000u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kWrite | OpRWFlags::kZExt }, // #11 [ref=459x] { 0x0000000000000003u, 0x0000000000000003u, 0xFF, 0, { 0 }, OpRWFlags::kRW }, // #12 [ref=1x] { 0x0000000000000003u, 0x0000000000000000u, 0xFF, 0, { 0 }, OpRWFlags::kRead }, // #13 [ref=65x] { 0x000000000000FFFFu, 0x0000000000000000u, 0x00, 0, { 0 }, OpRWFlags::kRead | OpRWFlags::kRegPhysId }, // #14 [ref=4x] diff --git a/3rdparty/asmjit/src/asmjit/x86/x86operand.h b/3rdparty/asmjit/src/asmjit/x86/x86operand.h index ac56731c982..8510a9310b5 100644 --- a/3rdparty/asmjit/src/asmjit/x86/x86operand.h +++ b/3rdparty/asmjit/src/asmjit/x86/x86operand.h @@ -871,6 +871,9 @@ public: //! distinguish between 8-bit, 16-bit, 32-bit, and 64-bit increments. ASMJIT_INLINE_NODEBUG constexpr uint32_t size() const noexcept { return _signature.getField(); } + //! Sets the memory operand size (in bytes). + ASMJIT_INLINE_NODEBUG void setSize(uint32_t size) noexcept { _signature.setField(size); } + //! \} //! \name Address Type diff --git a/3rdparty/asmjit/src/asmjit/x86/x86rapass.cpp b/3rdparty/asmjit/src/asmjit/x86/x86rapass.cpp index 88a8b39a0b0..c106c6c7e1d 100644 --- a/3rdparty/asmjit/src/asmjit/x86/x86rapass.cpp +++ b/3rdparty/asmjit/src/asmjit/x86/x86rapass.cpp @@ -477,6 +477,20 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& i } } } + else if (opCount == 4 && inst->op(3).isImm()) { + const Imm& imm = inst->op(3).as(); + + switch (inst->id()) { + case Inst::kIdVpternlogd: + case Inst::kIdVpternlogq: { + uint32_t predicate = uint32_t(imm.value() & 0xFFu); + if (predicate == 0x00u || predicate == 0xFFu) { + ib[0]->makeWriteOnly(); + } + break; + } + } + } switch (sameRegHint) { case InstSameRegHint::kNone: @@ -1309,6 +1323,54 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no } } + // If one operand was rewritten from Reg to Mem, we have to ensure that we are using the correct instruction. + if (raInst->isRegToMemPatched()) { + switch (inst->id()) { + case Inst::kIdKmovb: { + if (operands[0].isGp() && operands[1].isMem()) { + // Transform from [V]MOVD to MOV. + operands[1].as().setSize(1); + inst->setId(Inst::kIdMovzx); + } + break; + } + + case Inst::kIdVmovw: { + if (operands[0].isGp() && operands[1].isMem()) { + // Transform from [V]MOVD to MOV. + operands[1].as().setSize(2); + inst->setId(Inst::kIdMovzx); + } + break; + } + + case Inst::kIdMovd: + case Inst::kIdVmovd: + case Inst::kIdKmovd: { + if (operands[0].isGp() && operands[1].isMem()) { + // Transform from [V]MOVD to MOV. + operands[1].as().setSize(4); + inst->setId(Inst::kIdMov); + } + break; + } + + case Inst::kIdMovq: + case Inst::kIdVmovq: + case Inst::kIdKmovq: { + if (operands[0].isGp() && operands[1].isMem()) { + // Transform from [V]MOVQ to MOV. + operands[1].as().setSize(8); + inst->setId(Inst::kIdMov); + } + break; + } + + default: + break; + } + } + // Transform VEX instruction to EVEX when necessary. if (raInst->isTransformable()) { if (maxRegId > 15) { diff --git a/3rdparty/asmjit/test/asmjit_test_assembler_a64.cpp b/3rdparty/asmjit/test/asmjit_test_assembler_a64.cpp index 8da77b3ff50..4bb471040a3 100644 --- a/3rdparty/asmjit/test/asmjit_test_assembler_a64.cpp +++ b/3rdparty/asmjit/test/asmjit_test_assembler_a64.cpp @@ -42,6 +42,7 @@ static void ASMJIT_NOINLINE testA64AssemblerBase(AssemblerTester TEST_INSTRUCTION("E103038B", add(x1, xzr, x3)); TEST_INSTRUCTION("5F00030B", add(wzr, w2, w3)); TEST_INSTRUCTION("5F00038B", add(xzr, x2, x3)); + TEST_INSTRUCTION("4140238B", add(x1, x2, w3, uxtw(0))); TEST_INSTRUCTION("83004011", add(w3, w4, 0, lsl(12))); TEST_INSTRUCTION("83004091", add(x3, x4, 0, lsl(12))); TEST_INSTRUCTION("83005011", add(w3, w4, 1024, lsl(12))); @@ -210,7 +211,8 @@ static void ASMJIT_NOINLINE testA64AssemblerBase(AssemblerTester TEST_INSTRUCTION("3F00022B", cmn(w1, w2)); TEST_INSTRUCTION("3F0002AB", cmn(x1, x2)); TEST_INSTRUCTION("3F08222B", cmn(w1, w2, uxtb(2))); - TEST_INSTRUCTION("3F0822AB", cmn(x1, x2, uxtb(2))); + TEST_INSTRUCTION("3F0822AB", cmn(x1, w2, uxtb(2))); + TEST_INSTRUCTION("5F4023AB", cmn(x2, w3, uxtw(0))); TEST_INSTRUCTION("FF43212B", cmn(wsp, w1)); TEST_INSTRUCTION("FF07212B", cmn(wsp, w1, uxtb(1))); TEST_INSTRUCTION("FF6321AB", cmn(sp, x1)); @@ -224,7 +226,8 @@ static void ASMJIT_NOINLINE testA64AssemblerBase(AssemblerTester TEST_INSTRUCTION("3F00026B", cmp(w1, w2)); TEST_INSTRUCTION("3F0002EB", cmp(x1, x2)); TEST_INSTRUCTION("3F08226B", cmp(w1, w2, uxtb(2))); - TEST_INSTRUCTION("3F0822EB", cmp(x1, x2, uxtb(2))); + TEST_INSTRUCTION("3F0822EB", cmp(x1, w2, uxtb(2))); + TEST_INSTRUCTION("5F4023EB", cmp(x2, w3, uxtw(0))); TEST_INSTRUCTION("FF43216B", cmp(wsp, w1)); TEST_INSTRUCTION("FF07216B", cmp(wsp, w1, uxtb(1))); TEST_INSTRUCTION("FF6321EB", cmp(sp, x1)); diff --git a/3rdparty/asmjit/test/asmjit_test_compiler_x86.cpp b/3rdparty/asmjit/test/asmjit_test_compiler_x86.cpp index 54442efb915..9ff6d893855 100644 --- a/3rdparty/asmjit/test/asmjit_test_compiler_x86.cpp +++ b/3rdparty/asmjit/test/asmjit_test_compiler_x86.cpp @@ -1047,643 +1047,6 @@ public: } }; -// x86::Compiler - X86Test_AllocImul1 -// ================================== - -class X86Test_AllocImul1 : public X86TestCase { -public: - X86Test_AllocImul1() : X86TestCase("AllocImul1") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocImul1()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp dstHi = cc.newIntPtr("dstHi"); - x86::Gp dstLo = cc.newIntPtr("dstLo"); - - x86::Gp vHi = cc.newInt32("vHi"); - x86::Gp vLo = cc.newInt32("vLo"); - x86::Gp src = cc.newInt32("src"); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, dstHi); - funcNode->setArg(1, dstLo); - funcNode->setArg(2, vLo); - funcNode->setArg(3, src); - - cc.imul(vHi, vLo, src); - cc.mov(x86::dword_ptr(dstHi), vHi); - cc.mov(x86::dword_ptr(dstLo), vLo); - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef void (*Func)(int*, int*, int, int); - Func func = ptr_as_func(_func); - - int v0 = 4; - int v1 = 4; - - int resultHi = 0; - int resultLo = 0; - - int expectHi = 0; - int expectLo = v0 * v1; - - func(&resultHi, &resultLo, v0, v1); - - result.assignFormat("hi=%d, lo=%d", resultHi, resultLo); - expect.assignFormat("hi=%d, lo=%d", expectHi, expectLo); - - return resultHi == expectHi && resultLo == expectLo; - } -}; - -// x86::Compiler - X86Test_AllocImul2 -// ================================== - -class X86Test_AllocImul2 : public X86TestCase { -public: - X86Test_AllocImul2() : X86TestCase("AllocImul2") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocImul2()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp dst = cc.newIntPtr("dst"); - x86::Gp src = cc.newIntPtr("src"); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, dst); - funcNode->setArg(1, src); - - for (unsigned int i = 0; i < 4; i++) { - x86::Gp x = cc.newInt32("x"); - x86::Gp y = cc.newInt32("y"); - x86::Gp hi = cc.newInt32("hi"); - - cc.mov(x, x86::dword_ptr(src, 0)); - cc.mov(y, x86::dword_ptr(src, 4)); - - cc.imul(hi, x, y); - cc.add(x86::dword_ptr(dst, 0), hi); - cc.add(x86::dword_ptr(dst, 4), x); - } - - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef void (*Func)(int*, const int*); - Func func = ptr_as_func(_func); - - int src[2] = { 4, 9 }; - int resultRet[2] = { 0, 0 }; - int expectRet[2] = { 0, (4 * 9) * 4 }; - - func(resultRet, src); - - result.assignFormat("ret={%d, %d}", resultRet[0], resultRet[1]); - expect.assignFormat("ret={%d, %d}", expectRet[0], expectRet[1]); - - return resultRet[0] == expectRet[0] && resultRet[1] == expectRet[1]; - } -}; - -// x86::Compiler - X86Test_AllocIdiv1 -// ================================== - -class X86Test_AllocIdiv1 : public X86TestCase { -public: - X86Test_AllocIdiv1() : X86TestCase("AllocIdiv1") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocIdiv1()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp a = cc.newInt32("a"); - x86::Gp b = cc.newInt32("b"); - x86::Gp dummy = cc.newInt32("dummy"); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, a); - funcNode->setArg(1, b); - - cc.xor_(dummy, dummy); - cc.idiv(dummy, a, b); - - cc.ret(a); - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef int (*Func)(int, int); - Func func = ptr_as_func(_func); - - int v0 = 2999; - int v1 = 245; - - int resultRet = func(v0, v1); - int expectRet = 2999 / 245; - - result.assignFormat("result=%d", resultRet); - expect.assignFormat("result=%d", expectRet); - - return result == expect; - } -}; - -// x86::Compiler - X86Test_AllocSetz -// ================================= - -class X86Test_AllocSetz : public X86TestCase { -public: - X86Test_AllocSetz() : X86TestCase("AllocSetz") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocSetz()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp src0 = cc.newInt32("src0"); - x86::Gp src1 = cc.newInt32("src1"); - x86::Gp dst0 = cc.newIntPtr("dst0"); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, src0); - funcNode->setArg(1, src1); - funcNode->setArg(2, dst0); - - cc.cmp(src0, src1); - cc.setz(x86::byte_ptr(dst0)); - - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef void (*Func)(int, int, char*); - Func func = ptr_as_func(_func); - - char resultBuf[4] {}; - char expectBuf[4] = { 1, 0, 0, 1 }; - - func(0, 0, &resultBuf[0]); // We are expecting 1 (0 == 0). - func(0, 1, &resultBuf[1]); // We are expecting 0 (0 != 1). - func(1, 0, &resultBuf[2]); // We are expecting 0 (1 != 0). - func(1, 1, &resultBuf[3]); // We are expecting 1 (1 == 1). - - result.assignFormat("out={%d, %d, %d, %d}", resultBuf[0], resultBuf[1], resultBuf[2], resultBuf[3]); - expect.assignFormat("out={%d, %d, %d, %d}", expectBuf[0], expectBuf[1], expectBuf[2], expectBuf[3]); - - return resultBuf[0] == expectBuf[0] && - resultBuf[1] == expectBuf[1] && - resultBuf[2] == expectBuf[2] && - resultBuf[3] == expectBuf[3] ; - } -}; - -// x86::Compiler - X86Test_AllocShlRor -// =================================== - -class X86Test_AllocShlRor : public X86TestCase { -public: - X86Test_AllocShlRor() : X86TestCase("AllocShlRor") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocShlRor()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp dst = cc.newIntPtr("dst"); - x86::Gp var = cc.newInt32("var"); - x86::Gp vShlParam = cc.newInt32("vShlParam"); - x86::Gp vRorParam = cc.newInt32("vRorParam"); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, dst); - funcNode->setArg(1, var); - funcNode->setArg(2, vShlParam); - funcNode->setArg(3, vRorParam); - - cc.shl(var, vShlParam); - cc.ror(var, vRorParam); - cc.mov(x86::dword_ptr(dst), var); - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef void (*Func)(int*, int, int, int); - Func func = ptr_as_func(_func); - - int v0 = 0x000000FF; - - int resultRet = 0; - int expectRet = 0x0000FF00; - - func(&resultRet, v0, 16, 8); - - result.assignFormat("ret=%d", resultRet); - expect.assignFormat("ret=%d", expectRet); - - return result == expect; - } -}; - -// x86::Compiler - X86Test_AllocGpbLo -// ================================== - -class X86Test_AllocGpbLo1 : public X86TestCase { -public: - X86Test_AllocGpbLo1() : X86TestCase("AllocGpbLo1") {} - - enum : uint32_t { kCount = 32 }; - - static void add(TestApp& app) { - app.add(new X86Test_AllocGpbLo1()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp rPtr = cc.newUIntPtr("rPtr"); - x86::Gp rSum = cc.newUInt32("rSum"); - x86::Gp x[kCount]; - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, rPtr); - - for (uint32_t i = 0; i < kCount; i++) { - x[i] = cc.newUInt32("x%u", i); - } - - // Init pseudo-regs with values from our array. - for (uint32_t i = 0; i < kCount; i++) { - cc.mov(x[i], x86::dword_ptr(rPtr, int(i * 4))); - } - - for (uint32_t i = 2; i < kCount; i++) { - // Add and truncate to 8 bit; no purpose, just mess with jit. - cc.add (x[i ], x[i-1]); - cc.movzx(x[i ], x[i ].r8()); - cc.movzx(x[i-2], x[i-1].r8()); - cc.movzx(x[i-1], x[i-2].r8()); - } - - // Sum up all computed values. - cc.mov(rSum, 0); - for (uint32_t i = 0; i < kCount; i++) { - cc.add(rSum, x[i]); - } - - // Return the sum. - cc.ret(rSum); - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef uint32_t (*Func)(uint32_t*); - Func func = ptr_as_func(_func); - - uint32_t i; - uint32_t buf[kCount]; - uint32_t resultRet = 0; - uint32_t expectRet = 0; - - for (i = 0; i < kCount; i++) { - buf[i] = 1; - } - - for (i = 2; i < kCount; i++) { - buf[i ]+= buf[i-1]; - buf[i ] = buf[i ] & 0xFF; - buf[i-2] = buf[i-1] & 0xFF; - buf[i-1] = buf[i-2] & 0xFF; - } - - for (i = 0; i < kCount; i++) { - expectRet += buf[i]; - } - - for (i = 0; i < kCount; i++) { - buf[i] = 1; - } - resultRet = func(buf); - - result.assignFormat("ret=%d", resultRet); - expect.assignFormat("ret=%d", expectRet); - - return result == expect; - } -}; - -// x86::Compiler - X86Test_AllocGpbLo2 -// =================================== - -class X86Test_AllocGpbLo2 : public X86TestCase { -public: - X86Test_AllocGpbLo2() : X86TestCase("AllocGpbLo2") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocGpbLo2()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp v = cc.newUInt32("v"); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, v); - - cc.mov(v.r8(), 0xFF); - cc.ret(v); - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef uint32_t (*Func)(uint32_t); - Func func = ptr_as_func(_func); - - uint32_t resultRet = func(0x12345678u); - uint32_t expectRet = 0x123456FFu; - - result.assignFormat("ret=%d", resultRet); - expect.assignFormat("ret=%d", expectRet); - - return result == expect; - } -}; - -// x86::Compiler - X86Test_AllocRepMovsb -// ===================================== - -class X86Test_AllocRepMovsb : public X86TestCase { -public: - X86Test_AllocRepMovsb() : X86TestCase("AllocRepMovsb") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocRepMovsb()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp dst = cc.newIntPtr("dst"); - x86::Gp src = cc.newIntPtr("src"); - x86::Gp cnt = cc.newIntPtr("cnt"); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, dst); - funcNode->setArg(1, src); - funcNode->setArg(2, cnt); - - cc.rep(cnt).movs(x86::byte_ptr(dst), x86::byte_ptr(src)); - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef void (*Func)(void*, void*, size_t); - Func func = ptr_as_func(_func); - - char dst[20] = { 0 }; - char src[20] = "Hello AsmJit!"; - func(dst, src, strlen(src) + 1); - - result.assignFormat("ret=\"%s\"", dst); - expect.assignFormat("ret=\"%s\"", src); - - return result == expect; - } -}; - -// x86::Compiler - X86Test_AllocIfElse1 -// ==================================== - -class X86Test_AllocIfElse1 : public X86TestCase { -public: - X86Test_AllocIfElse1() : X86TestCase("AllocIfElse1") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocIfElse1()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp v1 = cc.newInt32("v1"); - x86::Gp v2 = cc.newInt32("v2"); - - Label L_1 = cc.newLabel(); - Label L_2 = cc.newLabel(); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, v1); - funcNode->setArg(1, v2); - - cc.cmp(v1, v2); - cc.jg(L_1); - - cc.mov(v1, 1); - cc.jmp(L_2); - - cc.bind(L_1); - cc.mov(v1, 2); - - cc.bind(L_2); - cc.ret(v1); - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef int (*Func)(int, int); - Func func = ptr_as_func(_func); - - int a = func(0, 1); - int b = func(1, 0); - - result.appendFormat("ret={%d, %d}", a, b); - expect.appendFormat("ret={%d, %d}", 1, 2); - - return result == expect; - } -}; - -// x86::Compiler - X86Test_AllocIfElse2 -// ==================================== - -class X86Test_AllocIfElse2 : public X86TestCase { -public: - X86Test_AllocIfElse2() : X86TestCase("AllocIfElse2") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocIfElse2()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp v1 = cc.newInt32("v1"); - x86::Gp v2 = cc.newInt32("v2"); - - Label L_1 = cc.newLabel(); - Label L_2 = cc.newLabel(); - Label L_3 = cc.newLabel(); - Label L_4 = cc.newLabel(); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, v1); - funcNode->setArg(1, v2); - - cc.jmp(L_1); - cc.bind(L_2); - cc.jmp(L_4); - cc.bind(L_1); - - cc.cmp(v1, v2); - cc.jg(L_3); - - cc.mov(v1, 1); - cc.jmp(L_2); - - cc.bind(L_3); - cc.mov(v1, 2); - cc.jmp(L_2); - - cc.bind(L_4); - - cc.ret(v1); - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef int (*Func)(int, int); - Func func = ptr_as_func(_func); - - int a = func(0, 1); - int b = func(1, 0); - - result.appendFormat("ret={%d, %d}", a, b); - expect.appendFormat("ret={%d, %d}", 1, 2); - - return result == expect; - } -}; - -// x86::Compiler - X86Test_AllocIfElse3 -// ==================================== - -class X86Test_AllocIfElse3 : public X86TestCase { -public: - X86Test_AllocIfElse3() : X86TestCase("AllocIfElse3") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocIfElse3()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp v1 = cc.newInt32("v1"); - x86::Gp v2 = cc.newInt32("v2"); - x86::Gp counter = cc.newInt32("counter"); - - Label L_1 = cc.newLabel(); - Label L_Loop = cc.newLabel(); - Label L_Exit = cc.newLabel(); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, v1); - funcNode->setArg(1, v2); - - cc.cmp(v1, v2); - cc.jg(L_1); - - cc.mov(counter, 0); - - cc.bind(L_Loop); - cc.mov(v1, counter); - - cc.inc(counter); - cc.cmp(counter, 1); - cc.jle(L_Loop); - cc.jmp(L_Exit); - - cc.bind(L_1); - cc.mov(v1, 2); - - cc.bind(L_Exit); - cc.ret(v1); - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef int (*Func)(int, int); - Func func = ptr_as_func(_func); - - int a = func(0, 1); - int b = func(1, 0); - - result.appendFormat("ret={%d, %d}", a, b); - expect.appendFormat("ret={%d, %d}", 1, 2); - - return result == expect; - } -}; - -// x86::Compiler - X86Test_AllocIfElse4 -// ==================================== - -class X86Test_AllocIfElse4 : public X86TestCase { -public: - X86Test_AllocIfElse4() : X86TestCase("AllocIfElse4") {} - - static void add(TestApp& app) { - app.add(new X86Test_AllocIfElse4()); - } - - virtual void compile(x86::Compiler& cc) { - x86::Gp v1 = cc.newInt32("v1"); - x86::Gp v2 = cc.newInt32("v2"); - x86::Gp counter = cc.newInt32("counter"); - - Label L_1 = cc.newLabel(); - Label L_Loop1 = cc.newLabel(); - Label L_Loop2 = cc.newLabel(); - Label L_Exit = cc.newLabel(); - - FuncNode* funcNode = cc.addFunc(FuncSignature::build()); - funcNode->setArg(0, v1); - funcNode->setArg(1, v2); - - cc.mov(counter, 0); - cc.cmp(v1, v2); - cc.jg(L_1); - - cc.bind(L_Loop1); - cc.mov(v1, counter); - - cc.inc(counter); - cc.cmp(counter, 1); - cc.jle(L_Loop1); - cc.jmp(L_Exit); - - cc.bind(L_1); - cc.bind(L_Loop2); - cc.mov(v1, counter); - cc.inc(counter); - cc.cmp(counter, 2); - cc.jle(L_Loop2); - - cc.bind(L_Exit); - cc.ret(v1); - cc.endFunc(); - } - - virtual bool run(void* _func, String& result, String& expect) { - typedef int (*Func)(int, int); - Func func = ptr_as_func(_func); - - int a = func(0, 1); - int b = func(1, 0); - - result.appendFormat("ret={%d, %d}", a, b); - expect.appendFormat("ret={%d, %d}", 1, 2); - - return result == expect; - } -}; - // x86::Compiler - X86Test_AllocInt8 // ================================= @@ -2185,17 +1548,654 @@ public: } }; -// x86::Compiler - X86Test_AllocMemcpy -// =================================== +// x86::Compiler - X86Test_Imul1 +// ============================= -class X86Test_AllocMemcpy : public X86TestCase { +class X86Test_Imul1 : public X86TestCase { public: - X86Test_AllocMemcpy() : X86TestCase("AllocMemcpy") {} + X86Test_Imul1() : X86TestCase("Imul1") {} + + static void add(TestApp& app) { + app.add(new X86Test_Imul1()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp dstHi = cc.newIntPtr("dstHi"); + x86::Gp dstLo = cc.newIntPtr("dstLo"); + + x86::Gp vHi = cc.newInt32("vHi"); + x86::Gp vLo = cc.newInt32("vLo"); + x86::Gp src = cc.newInt32("src"); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, dstHi); + funcNode->setArg(1, dstLo); + funcNode->setArg(2, vLo); + funcNode->setArg(3, src); + + cc.imul(vHi, vLo, src); + cc.mov(x86::dword_ptr(dstHi), vHi); + cc.mov(x86::dword_ptr(dstLo), vLo); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(int*, int*, int, int); + Func func = ptr_as_func(_func); + + int v0 = 4; + int v1 = 4; + + int resultHi = 0; + int resultLo = 0; + + int expectHi = 0; + int expectLo = v0 * v1; + + func(&resultHi, &resultLo, v0, v1); + + result.assignFormat("hi=%d, lo=%d", resultHi, resultLo); + expect.assignFormat("hi=%d, lo=%d", expectHi, expectLo); + + return resultHi == expectHi && resultLo == expectLo; + } +}; + +// x86::Compiler - X86Test_Imul2 +// ============================= + +class X86Test_Imul2 : public X86TestCase { +public: + X86Test_Imul2() : X86TestCase("Imul2") {} + + static void add(TestApp& app) { + app.add(new X86Test_Imul2()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp dst = cc.newIntPtr("dst"); + x86::Gp src = cc.newIntPtr("src"); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, dst); + funcNode->setArg(1, src); + + for (unsigned int i = 0; i < 4; i++) { + x86::Gp x = cc.newInt32("x"); + x86::Gp y = cc.newInt32("y"); + x86::Gp hi = cc.newInt32("hi"); + + cc.mov(x, x86::dword_ptr(src, 0)); + cc.mov(y, x86::dword_ptr(src, 4)); + + cc.imul(hi, x, y); + cc.add(x86::dword_ptr(dst, 0), hi); + cc.add(x86::dword_ptr(dst, 4), x); + } + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(int*, const int*); + Func func = ptr_as_func(_func); + + int src[2] = { 4, 9 }; + int resultRet[2] = { 0, 0 }; + int expectRet[2] = { 0, (4 * 9) * 4 }; + + func(resultRet, src); + + result.assignFormat("ret={%d, %d}", resultRet[0], resultRet[1]); + expect.assignFormat("ret={%d, %d}", expectRet[0], expectRet[1]); + + return resultRet[0] == expectRet[0] && resultRet[1] == expectRet[1]; + } +}; + +// x86::Compiler - X86Test_Idiv1 +// ============================= + +class X86Test_Idiv1 : public X86TestCase { +public: + X86Test_Idiv1() : X86TestCase("Idiv1") {} + + static void add(TestApp& app) { + app.add(new X86Test_Idiv1()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp a = cc.newInt32("a"); + x86::Gp b = cc.newInt32("b"); + x86::Gp dummy = cc.newInt32("dummy"); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, a); + funcNode->setArg(1, b); + + cc.xor_(dummy, dummy); + cc.idiv(dummy, a, b); + + cc.ret(a); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int v0 = 2999; + int v1 = 245; + + int resultRet = func(v0, v1); + int expectRet = 2999 / 245; + + result.assignFormat("result=%d", resultRet); + expect.assignFormat("result=%d", expectRet); + + return result == expect; + } +}; + +// x86::Compiler - X86Test_Setz +// ============================ + +class X86Test_Setz : public X86TestCase { +public: + X86Test_Setz() : X86TestCase("Setz") {} + + static void add(TestApp& app) { + app.add(new X86Test_Setz()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp src0 = cc.newInt32("src0"); + x86::Gp src1 = cc.newInt32("src1"); + x86::Gp dst0 = cc.newIntPtr("dst0"); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, src0); + funcNode->setArg(1, src1); + funcNode->setArg(2, dst0); + + cc.cmp(src0, src1); + cc.setz(x86::byte_ptr(dst0)); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(int, int, char*); + Func func = ptr_as_func(_func); + + char resultBuf[4] {}; + char expectBuf[4] = { 1, 0, 0, 1 }; + + func(0, 0, &resultBuf[0]); // We are expecting 1 (0 == 0). + func(0, 1, &resultBuf[1]); // We are expecting 0 (0 != 1). + func(1, 0, &resultBuf[2]); // We are expecting 0 (1 != 0). + func(1, 1, &resultBuf[3]); // We are expecting 1 (1 == 1). + + result.assignFormat("out={%d, %d, %d, %d}", resultBuf[0], resultBuf[1], resultBuf[2], resultBuf[3]); + expect.assignFormat("out={%d, %d, %d, %d}", expectBuf[0], expectBuf[1], expectBuf[2], expectBuf[3]); + + return resultBuf[0] == expectBuf[0] && + resultBuf[1] == expectBuf[1] && + resultBuf[2] == expectBuf[2] && + resultBuf[3] == expectBuf[3] ; + } +}; + +// x86::Compiler - X86Test_ShlRor +// ============================== + +class X86Test_ShlRor : public X86TestCase { +public: + X86Test_ShlRor() : X86TestCase("ShlRor") {} + + static void add(TestApp& app) { + app.add(new X86Test_ShlRor()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp dst = cc.newIntPtr("dst"); + x86::Gp var = cc.newInt32("var"); + x86::Gp vShlParam = cc.newInt32("vShlParam"); + x86::Gp vRorParam = cc.newInt32("vRorParam"); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, dst); + funcNode->setArg(1, var); + funcNode->setArg(2, vShlParam); + funcNode->setArg(3, vRorParam); + + cc.shl(var, vShlParam); + cc.ror(var, vRorParam); + cc.mov(x86::dword_ptr(dst), var); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(int*, int, int, int); + Func func = ptr_as_func(_func); + + int v0 = 0x000000FF; + + int resultRet = 0; + int expectRet = 0x0000FF00; + + func(&resultRet, v0, 16, 8); + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return result == expect; + } +}; + +// x86::Compiler - X86Test_GpbLo +// ============================= + +class X86Test_GpbLo1 : public X86TestCase { +public: + X86Test_GpbLo1() : X86TestCase("GpbLo1") {} + + enum : uint32_t { kCount = 32 }; + + static void add(TestApp& app) { + app.add(new X86Test_GpbLo1()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp rPtr = cc.newUIntPtr("rPtr"); + x86::Gp rSum = cc.newUInt32("rSum"); + x86::Gp x[kCount]; + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, rPtr); + + for (uint32_t i = 0; i < kCount; i++) { + x[i] = cc.newUInt32("x%u", i); + } + + // Init pseudo-regs with values from our array. + for (uint32_t i = 0; i < kCount; i++) { + cc.mov(x[i], x86::dword_ptr(rPtr, int(i * 4))); + } + + for (uint32_t i = 2; i < kCount; i++) { + // Add and truncate to 8 bit; no purpose, just mess with jit. + cc.add (x[i ], x[i-1]); + cc.movzx(x[i ], x[i ].r8()); + cc.movzx(x[i-2], x[i-1].r8()); + cc.movzx(x[i-1], x[i-2].r8()); + } + + // Sum up all computed values. + cc.mov(rSum, 0); + for (uint32_t i = 0; i < kCount; i++) { + cc.add(rSum, x[i]); + } + + // Return the sum. + cc.ret(rSum); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef uint32_t (*Func)(uint32_t*); + Func func = ptr_as_func(_func); + + uint32_t i; + uint32_t buf[kCount]; + uint32_t resultRet = 0; + uint32_t expectRet = 0; + + for (i = 0; i < kCount; i++) { + buf[i] = 1; + } + + for (i = 2; i < kCount; i++) { + buf[i ]+= buf[i-1]; + buf[i ] = buf[i ] & 0xFF; + buf[i-2] = buf[i-1] & 0xFF; + buf[i-1] = buf[i-2] & 0xFF; + } + + for (i = 0; i < kCount; i++) { + expectRet += buf[i]; + } + + for (i = 0; i < kCount; i++) { + buf[i] = 1; + } + resultRet = func(buf); + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return result == expect; + } +}; + +// x86::Compiler - X86Test_GpbLo2 +// ============================== + +class X86Test_GpbLo2 : public X86TestCase { +public: + X86Test_GpbLo2() : X86TestCase("GpbLo2") {} + + static void add(TestApp& app) { + app.add(new X86Test_GpbLo2()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp v = cc.newUInt32("v"); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, v); + + cc.mov(v.r8(), 0xFF); + cc.ret(v); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef uint32_t (*Func)(uint32_t); + Func func = ptr_as_func(_func); + + uint32_t resultRet = func(0x12345678u); + uint32_t expectRet = 0x123456FFu; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return result == expect; + } +}; + +// x86::Compiler - X86Test_RepMovsb +// ================================ + +class X86Test_RepMovsb : public X86TestCase { +public: + X86Test_RepMovsb() : X86TestCase("RepMovsb") {} + + static void add(TestApp& app) { + app.add(new X86Test_RepMovsb()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp dst = cc.newIntPtr("dst"); + x86::Gp src = cc.newIntPtr("src"); + x86::Gp cnt = cc.newIntPtr("cnt"); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, dst); + funcNode->setArg(1, src); + funcNode->setArg(2, cnt); + + cc.rep(cnt).movs(x86::byte_ptr(dst), x86::byte_ptr(src)); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(void*, void*, size_t); + Func func = ptr_as_func(_func); + + char dst[20] = { 0 }; + char src[20] = "Hello AsmJit!"; + func(dst, src, strlen(src) + 1); + + result.assignFormat("ret=\"%s\"", dst); + expect.assignFormat("ret=\"%s\"", src); + + return result == expect; + } +}; + +// x86::Compiler - X86Test_IfElse1 +// =============================== + +class X86Test_IfElse1 : public X86TestCase { +public: + X86Test_IfElse1() : X86TestCase("IfElse1") {} + + static void add(TestApp& app) { + app.add(new X86Test_IfElse1()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp v1 = cc.newInt32("v1"); + x86::Gp v2 = cc.newInt32("v2"); + + Label L_1 = cc.newLabel(); + Label L_2 = cc.newLabel(); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, v1); + funcNode->setArg(1, v2); + + cc.cmp(v1, v2); + cc.jg(L_1); + + cc.mov(v1, 1); + cc.jmp(L_2); + + cc.bind(L_1); + cc.mov(v1, 2); + + cc.bind(L_2); + cc.ret(v1); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int a = func(0, 1); + int b = func(1, 0); + + result.appendFormat("ret={%d, %d}", a, b); + expect.appendFormat("ret={%d, %d}", 1, 2); + + return result == expect; + } +}; + +// x86::Compiler - X86Test_IfElse2 +// =============================== + +class X86Test_IfElse2 : public X86TestCase { +public: + X86Test_IfElse2() : X86TestCase("IfElse2") {} + + static void add(TestApp& app) { + app.add(new X86Test_IfElse2()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp v1 = cc.newInt32("v1"); + x86::Gp v2 = cc.newInt32("v2"); + + Label L_1 = cc.newLabel(); + Label L_2 = cc.newLabel(); + Label L_3 = cc.newLabel(); + Label L_4 = cc.newLabel(); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, v1); + funcNode->setArg(1, v2); + + cc.jmp(L_1); + cc.bind(L_2); + cc.jmp(L_4); + cc.bind(L_1); + + cc.cmp(v1, v2); + cc.jg(L_3); + + cc.mov(v1, 1); + cc.jmp(L_2); + + cc.bind(L_3); + cc.mov(v1, 2); + cc.jmp(L_2); + + cc.bind(L_4); + + cc.ret(v1); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int a = func(0, 1); + int b = func(1, 0); + + result.appendFormat("ret={%d, %d}", a, b); + expect.appendFormat("ret={%d, %d}", 1, 2); + + return result == expect; + } +}; + +// x86::Compiler - X86Test_IfElse3 +// =============================== + +class X86Test_IfElse3 : public X86TestCase { +public: + X86Test_IfElse3() : X86TestCase("IfElse3") {} + + static void add(TestApp& app) { + app.add(new X86Test_IfElse3()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp v1 = cc.newInt32("v1"); + x86::Gp v2 = cc.newInt32("v2"); + x86::Gp counter = cc.newInt32("counter"); + + Label L_1 = cc.newLabel(); + Label L_Loop = cc.newLabel(); + Label L_Exit = cc.newLabel(); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, v1); + funcNode->setArg(1, v2); + + cc.cmp(v1, v2); + cc.jg(L_1); + + cc.mov(counter, 0); + + cc.bind(L_Loop); + cc.mov(v1, counter); + + cc.inc(counter); + cc.cmp(counter, 1); + cc.jle(L_Loop); + cc.jmp(L_Exit); + + cc.bind(L_1); + cc.mov(v1, 2); + + cc.bind(L_Exit); + cc.ret(v1); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int a = func(0, 1); + int b = func(1, 0); + + result.appendFormat("ret={%d, %d}", a, b); + expect.appendFormat("ret={%d, %d}", 1, 2); + + return result == expect; + } +}; + +// x86::Compiler - X86Test_IfElse4 +// =============================== + +class X86Test_IfElse4 : public X86TestCase { +public: + X86Test_IfElse4() : X86TestCase("IfElse4") {} + + static void add(TestApp& app) { + app.add(new X86Test_IfElse4()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp v1 = cc.newInt32("v1"); + x86::Gp v2 = cc.newInt32("v2"); + x86::Gp counter = cc.newInt32("counter"); + + Label L_1 = cc.newLabel(); + Label L_Loop1 = cc.newLabel(); + Label L_Loop2 = cc.newLabel(); + Label L_Exit = cc.newLabel(); + + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + funcNode->setArg(0, v1); + funcNode->setArg(1, v2); + + cc.mov(counter, 0); + cc.cmp(v1, v2); + cc.jg(L_1); + + cc.bind(L_Loop1); + cc.mov(v1, counter); + + cc.inc(counter); + cc.cmp(counter, 1); + cc.jle(L_Loop1); + cc.jmp(L_Exit); + + cc.bind(L_1); + cc.bind(L_Loop2); + cc.mov(v1, counter); + cc.inc(counter); + cc.cmp(counter, 2); + cc.jle(L_Loop2); + + cc.bind(L_Exit); + cc.ret(v1); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int a = func(0, 1); + int b = func(1, 0); + + result.appendFormat("ret={%d, %d}", a, b); + expect.appendFormat("ret={%d, %d}", 1, 2); + + return result == expect; + } +}; + +// x86::Compiler - X86Test_Memcpy +// ============================== + +class X86Test_Memcpy : public X86TestCase { +public: + X86Test_Memcpy() : X86TestCase("Memcpy") {} enum { kCount = 32 }; static void add(TestApp& app) { - app.add(new X86Test_AllocMemcpy()); + app.add(new X86Test_Memcpy()); } virtual void compile(x86::Compiler& cc) { @@ -2266,15 +2266,15 @@ public: } }; -// x86::Compiler - X86Test_AllocExtraBlock -// ======================================= +// x86::Compiler - X86Test_ExtraBlock +// ================================== -class X86Test_AllocExtraBlock : public X86TestCase { +class X86Test_ExtraBlock : public X86TestCase { public: - X86Test_AllocExtraBlock() : X86TestCase("AllocExtraBlock") {} + X86Test_ExtraBlock() : X86TestCase("ExtraBlock") {} static void add(TestApp& app) { - app.add(new X86Test_AllocExtraBlock()); + app.add(new X86Test_ExtraBlock()); } virtual void compile(x86::Compiler& cc) { @@ -2328,17 +2328,17 @@ public: } }; -// x86::Compiler - X86Test_AllocAlphaBlend -// ======================================= +// x86::Compiler - X86Test_AlphaBlend +// ================================== -class X86Test_AllocAlphaBlend : public X86TestCase { +class X86Test_AlphaBlend : public X86TestCase { public: - X86Test_AllocAlphaBlend() : X86TestCase("AllocAlphaBlend") {} + X86Test_AlphaBlend() : X86TestCase("AlphaBlend") {} enum { kCount = 17 }; static void add(TestApp& app) { - app.add(new X86Test_AllocAlphaBlend()); + app.add(new X86Test_AlphaBlend()); } static uint32_t blendSrcOver(uint32_t d, uint32_t s) { @@ -2406,6 +2406,117 @@ public: } }; +// x86::Compiler - X86Test_AVX512_KK +// ================================= + +class X86Test_AVX512_KK : public X86TestCase { +public: + X86Test_AVX512_KK() : X86TestCase("AVX512_KK") {} + + static void add(TestApp& app) { + const CpuInfo& cpuInfo = CpuInfo::host(); + + if (cpuInfo.features().x86().hasAVX512_F()) { + app.add(new X86Test_AVX512_KK()); + } + } + + virtual void compile(x86::Compiler& cc) { + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + + x86::Gp a = cc.newIntPtr("a"); + x86::Gp b = cc.newIntPtr("b"); + x86::Gp pred = cc.newInt32("pred"); + x86::Gp result = cc.newInt32("result"); + + x86::Vec va = cc.newZmm("va"); + x86::Vec vb = cc.newZmm("vb"); + x86::KReg kIn = cc.newKd("k_in"); + x86::KReg kOut = cc.newKd("k_out"); + + funcNode->setArg(0, a); + funcNode->setArg(1, b); + funcNode->setArg(2, pred); + + cc.vmovdqu32(va, x86::ptr(a)); + cc.vmovdqu32(vb, x86::ptr(b)); + cc.kmovd(kIn, pred); + cc.k(kIn).vpcmpeqd(kOut, va, vb); + cc.kmovd(result, kOut); + cc.ret(result); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef uint32_t (*Func)(const void*, const void*, uint32_t prevK); + Func func = ptr_as_func(_func); + + static const uint32_t srcA[16] = { 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }; + static const uint32_t srcB[16] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1 }; + + uint32_t ret = func(srcA, srcB, 0xF0F0); + + result.assignFormat("0x%08X", ret); + expect.assignFormat("0x%08X", 0xA040u); + + return result == expect; + } +}; + +// x86::Compiler - X86Test_AVX512_TernLog +// ====================================== + +class X86Test_AVX512_TernLog : public X86TestCase { +public: + X86Test_AVX512_TernLog() : X86TestCase("AVX512_TernLog") {} + + static void add(TestApp& app) { + const CpuInfo& cpuInfo = CpuInfo::host(); + + if (cpuInfo.features().x86().hasAVX512_F()) { + app.add(new X86Test_AVX512_TernLog()); + } + } + + virtual void compile(x86::Compiler& cc) { + FuncNode* funcNode = cc.addFunc(FuncSignature::build()); + + x86::Gp out = cc.newIntPtr("outPtr"); + x86::Vec vec = cc.newZmm("vec"); + + funcNode->setArg(0, out); + + cc.vpternlogd(vec, vec, vec, 0xFFu); + cc.vmovdqu8(x86::ptr(out), vec); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(void*); + Func func = ptr_as_func(_func); + + uint32_t out[16]; + func(out); + + result.assign("{"); + expect.assign("{"); + + for (uint32_t i = 0; i < 16; i++) { + if (i) { + result.append(", "); + expect.append(", "); + } + result.appendFormat("0x%08X", out[i]); + expect.appendFormat("0x%08X", 0xFFFFFFFFu); + } + + result.append("}"); + expect.append("}"); + + return result == expect; + } +}; + // x86::Compiler - X86Test_FuncArgInt8 // =================================== @@ -4024,8 +4135,9 @@ public: static void add(TestApp& app) { const CpuInfo& cpuInfo = CpuInfo::host(); - if (cpuInfo.features().x86().hasAVX2() && sizeof(void*) == 8) + if (cpuInfo.features().x86().hasAVX2() && sizeof(void*) == 8) { app.add(new X86Test_FuncCallAVXClobber()); + } } virtual void compile(x86::Compiler& cc) { @@ -4132,6 +4244,60 @@ public: } }; +// x86::Compiler - X86Test_VecToScalar +// =================================== + +class X86Test_VecToScalar : public X86TestCase { +public: + static constexpr uint32_t kVecCount = 64; + + X86Test_VecToScalar() : X86TestCase("VecToScalar") {} + + static void add(TestApp& app) { + app.add(new X86Test_VecToScalar()); + } + + virtual void compile(x86::Compiler& cc) { + FuncNode* func = cc.addFunc(FuncSignature::build()); + + x86::Gp x = cc.newInt32("x"); + x86::Gp t = cc.newInt32("t"); + x86::Xmm v[kVecCount]; + + func->setArg(0, x); + + for (size_t i = 0; i < kVecCount; i++) { + v[i] = cc.newXmm("v%d", i); + if (i != 0) + cc.add(x, 1); + cc.movd(v[i], x); + } + + cc.xor_(x, x); + + for (size_t i = 0; i < kVecCount; i++) { + cc.movd(t, v[i]); + cc.add(x, t); + } + + cc.ret(x); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef uint32_t (*Func)(uint32_t); + Func func = ptr_as_func(_func); + + uint32_t resultRet = func(1); + uint32_t expectRet = 2080; // 1 + 2 + 3 + ... + 64 + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return result == expect; + } +}; + // x86::Compiler - X86Test_MiscLocalConstPool // ========================================== @@ -4445,22 +4611,10 @@ void compiler_add_x86_tests(TestApp& app) { app.addT(); app.addT(); - // Alloc tests. + // Alloc and instruction tests. app.addT(); app.addT(); app.addT(); - app.addT(); - app.addT(); - app.addT(); - app.addT(); - app.addT(); - app.addT(); - app.addT(); - app.addT(); - app.addT(); - app.addT(); - app.addT(); - app.addT(); app.addT(); app.addT(); app.addT(); @@ -4474,9 +4628,23 @@ void compiler_add_x86_tests(TestApp& app) { app.addT(); app.addT(); app.addT(); - app.addT(); - app.addT(); - app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); // Function arguments handling tests. app.addT(); @@ -4512,6 +4680,7 @@ void compiler_add_x86_tests(TestApp& app) { app.addT(); // Miscellaneous tests. + app.addT(); app.addT(); app.addT(); app.addT(); diff --git a/3rdparty/asmjit/test/asmjit_test_instinfo.cpp b/3rdparty/asmjit/test/asmjit_test_instinfo.cpp index 466d8a55951..c8cf058513c 100644 --- a/3rdparty/asmjit/test/asmjit_test_instinfo.cpp +++ b/3rdparty/asmjit/test/asmjit_test_instinfo.cpp @@ -151,6 +151,7 @@ static void testX86Arch() { Arch arch = Arch::kX64; printInfoSimple(arch, Inst::kIdAdd, InstOptions::kNone, eax, ebx); + printInfoSimple(arch, Inst::kIdXor, InstOptions::kNone, eax, eax); printInfoSimple(arch, Inst::kIdLods, InstOptions::kNone, eax, dword_ptr(rsi)); printInfoSimple(arch, Inst::kIdPshufd, InstOptions::kNone, xmm0, xmm1, imm(0)); @@ -167,6 +168,9 @@ static void testX86Arch() { printInfoSimple(arch, Inst::kIdVaddpd, InstOptions::kNone, ymm0, ymm30, ymm31); printInfoSimple(arch, Inst::kIdVaddpd, InstOptions::kNone, zmm0, zmm1, zmm2); + printInfoSimple(arch, Inst::kIdVpternlogd, InstOptions::kNone, zmm0, zmm0, zmm0, imm(0xFF)); + printInfoSimple(arch, Inst::kIdVpternlogq, InstOptions::kNone, zmm0, zmm1, zmm2, imm(0x33)); + printInfoExtra(arch, Inst::kIdVaddpd, InstOptions::kNone, k1, zmm0, zmm1, zmm2); printInfoExtra(arch, Inst::kIdVaddpd, InstOptions::kX86_ZMask, k1, zmm0, zmm1, zmm2); #endif // !ASMJIT_NO_X86 diff --git a/3rdparty/asmjit/test/asmjit_test_perf_a64.cpp b/3rdparty/asmjit/test/asmjit_test_perf_a64.cpp index 36d23a42d9b..938aa9b2c50 100644 --- a/3rdparty/asmjit/test/asmjit_test_perf_a64.cpp +++ b/3rdparty/asmjit/test/asmjit_test_perf_a64.cpp @@ -177,13 +177,13 @@ static void generateGpSequenceInternal( cc.cmn(wA, wB); cc.cmn(xA, xB); cc.cmn(wA, wB, uxtb(2)); - cc.cmn(xA, xB, uxtb(2)); + cc.cmn(xA, wB, uxtb(2)); cc.cmp(wA, 33); cc.cmp(xA, 33); cc.cmp(wA, wB); cc.cmp(xA, xB); cc.cmp(wA, wB, uxtb(2)); - cc.cmp(xA, xB, uxtb(2)); + cc.cmp(xA, wB, uxtb(2)); cc.crc32b(wA, wB, wC); cc.crc32b(wzr, wB, wC); cc.crc32b(wA, wzr, wC); diff --git a/3rdparty/asmjit/tools/configure-makefiles.sh b/3rdparty/asmjit/tools/configure-makefiles.sh old mode 100644 new mode 100755 diff --git a/3rdparty/asmjit/tools/configure-ninja.sh b/3rdparty/asmjit/tools/configure-ninja.sh old mode 100644 new mode 100755 diff --git a/3rdparty/asmjit/tools/configure-sanitizers.sh b/3rdparty/asmjit/tools/configure-sanitizers.sh old mode 100644 new mode 100755 diff --git a/3rdparty/asmjit/tools/configure-xcode.sh b/3rdparty/asmjit/tools/configure-xcode.sh old mode 100644 new mode 100755 diff --git a/3rdparty/asmjit/tools/enumgen.sh b/3rdparty/asmjit/tools/enumgen.sh old mode 100644 new mode 100755 diff --git a/3rdparty/asmjit/tools/tablegen-a64.sh b/3rdparty/asmjit/tools/tablegen-a64.sh old mode 100644 new mode 100755 diff --git a/3rdparty/asmjit/tools/tablegen-x86.sh b/3rdparty/asmjit/tools/tablegen-x86.sh old mode 100644 new mode 100755 diff --git a/3rdparty/asmjit/tools/tablegen.sh b/3rdparty/asmjit/tools/tablegen.sh old mode 100644 new mode 100755 diff --git a/makefile b/makefile index c3ffd8c0721..176f9855a35 100644 --- a/makefile +++ b/makefile @@ -390,16 +390,15 @@ endif ifeq ($(findstring arm,$(UNAME)),arm) ARCHITECTURE := -ifndef FORCE_DRC_C_BACKEND - FORCE_DRC_C_BACKEND := 1 +ifneq ($(PLATFORM),arm64) + ifndef FORCE_DRC_C_BACKEND + FORCE_DRC_C_BACKEND := 1 + endif endif endif ifeq ($(findstring aarch64,$(UNAME)),aarch64) ARCHITECTURE := -ifndef FORCE_DRC_C_BACKEND - FORCE_DRC_C_BACKEND := 1 -endif endif ifeq ($(findstring s390x,$(UNAME)),s390x) diff --git a/scripts/genie.lua b/scripts/genie.lua index 76ffc77a6e2..89b60a6366e 100644 --- a/scripts/genie.lua +++ b/scripts/genie.lua @@ -710,16 +710,28 @@ end if not _OPTIONS["FORCE_DRC_C_BACKEND"] then if _OPTIONS["BIGENDIAN"]~="1" then - configuration { "x64" } - defines { - "NATIVE_DRC=drcbe_x64", - } - configuration { "x32" } - defines { - "NATIVE_DRC=drcbe_x86", - } - configuration { } + if (_OPTIONS["PLATFORM"]=="arm64") then + configuration { } + defines { + "NATIVE_DRC=drcbe_arm64", + } + else + configuration { "x64" } + defines { + "NATIVE_DRC=drcbe_x64", + } + configuration { "x32" } + defines { + "NATIVE_DRC=drcbe_x86", + } + configuration { } + end end + + configuration { } + defines { + "ASMJIT_STATIC", + } end defines { diff --git a/scripts/src/3rdparty.lua b/scripts/src/3rdparty.lua index 29ea1676d0b..785daa4a93b 100755 --- a/scripts/src/3rdparty.lua +++ b/scripts/src/3rdparty.lua @@ -1947,6 +1947,9 @@ project "asmjit" end configuration { } + defines { + "ASMJIT_STATIC", + } if _OPTIONS["targetos"]=="macosx" and _OPTIONS["gcc"]~=nil then if string.find(_OPTIONS["gcc"], "clang") and (version < 80000) then @@ -1956,14 +1959,16 @@ project "asmjit" end end + if (_OPTIONS["PLATFORM"]=="arm64") then + configuration { } + defines { + "ASMJIT_NO_X86", + } + end + files { MAME_DIR .. "3rdparty/asmjit/src/asmjit/a64.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/asmjit-scope-begin.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/asmjit-scope-end.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/asmjit.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64archtraits_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64assembler.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64assembler.h", @@ -1971,28 +1976,32 @@ project "asmjit" MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64builder.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64compiler.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64compiler.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64emithelper.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64emithelper_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64emithelper.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64emitter.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64formatter.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64formatter_p.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64func.cpp", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64formatter.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64func_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64func.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64globals.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64instapi.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64instapi_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64instapi.cpp", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64instdb_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64instdb.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64instdb.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64instdb_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64operand.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64operand.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64rapass.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64rapass_p.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/armformatter.cpp", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/a64rapass.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/armformatter_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/armformatter.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/armglobals.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/armoperand.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/arm/armutils.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/asmjit-scope-begin.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/asmjit-scope-end.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/asmjit.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/api-build_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/api-config.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/archcommons.h", @@ -2000,14 +2009,14 @@ project "asmjit" MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/archtraits.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/assembler.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/assembler.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/builder_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/builder.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/builder.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/builder_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/codebuffer.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/codeholder.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/codeholder.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/codewriter.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/codewriter_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/codewriter.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/compiler.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/compiler.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/compilerdefs.h", @@ -2015,29 +2024,29 @@ project "asmjit" MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/constpool.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/cpuinfo.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/cpuinfo.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/emithelper.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/emithelper_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/emithelper.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/emitter.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/emitter.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/emitterutils.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/emitterutils_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/emitterutils.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/environment.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/environment.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/errorhandler.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/errorhandler.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/formatter_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/formatter.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/formatter.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/formatter_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/func.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/func.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/funcargscontext.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/funcargscontext_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/funcargscontext.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/globals.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/globals.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/inst.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/inst.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/instdb.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/instdb_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/instdb.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/jitallocator.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/jitallocator.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/jitruntime.cpp", @@ -2047,23 +2056,23 @@ project "asmjit" MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/misc_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/operand.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/operand.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/osutils_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/osutils.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/osutils.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/osutils_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/raassignment_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/rabuilders_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/radefs_p.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/ralocal.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/ralocal_p.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/rapass.cpp", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/ralocal.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/rapass_p.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/rastack.cpp", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/rapass.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/rastack_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/rastack.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/string.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/string.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/support_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/support.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/support.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/support_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/target.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/target.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/type.cpp", @@ -2083,6 +2092,7 @@ project "asmjit" MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/zonetree.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/zonevector.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/core/zonevector.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86archtraits_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86assembler.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86assembler.h", @@ -2090,23 +2100,23 @@ project "asmjit" MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86builder.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86compiler.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86compiler.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86emithelper.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86emithelper_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86emithelper.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86emitter.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86formatter.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86formatter_p.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86func.cpp", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86formatter.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86func_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86func.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86globals.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86instapi.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86instapi_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86instapi.cpp", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86instdb_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86instdb.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86instdb.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86instdb_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86opcode_p.h", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86operand.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86operand.h", - MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86rapass.cpp", MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86rapass_p.h", + MAME_DIR .. "3rdparty/asmjit/src/asmjit/x86/x86rapass.cpp", } end diff --git a/scripts/src/cpu.lua b/scripts/src/cpu.lua index 2598688fe97..8d115a91ed0 100644 --- a/scripts/src/cpu.lua +++ b/scripts/src/cpu.lua @@ -42,12 +42,19 @@ if (CPU_INCLUDE_DRC) then MAME_DIR .. "src/devices/cpu/drcumlsh.h", } if not _OPTIONS["FORCE_DRC_C_BACKEND"] then - files { - MAME_DIR .. "src/devices/cpu/drcbex64.cpp", - MAME_DIR .. "src/devices/cpu/drcbex64.h", - MAME_DIR .. "src/devices/cpu/drcbex86.cpp", - MAME_DIR .. "src/devices/cpu/drcbex86.h", - } + if (_OPTIONS["PLATFORM"]=="arm64") then + files { + MAME_DIR .. "src/devices/cpu/drcbearm64.cpp", + MAME_DIR .. "src/devices/cpu/drcbearm64.h", + } + else + files { + MAME_DIR .. "src/devices/cpu/drcbex64.cpp", + MAME_DIR .. "src/devices/cpu/drcbex64.h", + MAME_DIR .. "src/devices/cpu/drcbex86.cpp", + MAME_DIR .. "src/devices/cpu/drcbex86.h", + } + end end if _OPTIONS["targetos"]=="macosx" and _OPTIONS["gcc"]~=nil then diff --git a/src/devices/cpu/drcbearm64.cpp b/src/devices/cpu/drcbearm64.cpp new file mode 100644 index 00000000000..bae953e01f7 --- /dev/null +++ b/src/devices/cpu/drcbearm64.cpp @@ -0,0 +1,4278 @@ +// license:BSD-3-Clause +// copyright-holders:windyfairy + +#include "emu.h" +#include "drcbearm64.h" + +#include "debug/debugcpu.h" +#include "emuopts.h" +#include "uml.h" + +#include + + +namespace drc { + +using namespace uml; + +using namespace asmjit; + +const uint32_t PTYPE_M = 1 << parameter::PTYPE_MEMORY; +const uint32_t PTYPE_I = 1 << parameter::PTYPE_IMMEDIATE; +const uint32_t PTYPE_R = 1 << parameter::PTYPE_INT_REGISTER; +const uint32_t PTYPE_F = 1 << parameter::PTYPE_FLOAT_REGISTER; +const uint32_t PTYPE_MR = PTYPE_M | PTYPE_R; +const uint32_t PTYPE_MRI = PTYPE_M | PTYPE_R | PTYPE_I; +const uint32_t PTYPE_MF = PTYPE_M | PTYPE_F; + +// Pass/receive value registers +const a64::Gp REG_PARAM1 = a64::x0; +const a64::Gp REG_PARAM2 = a64::x1; +const a64::Gp REG_PARAM3 = a64::x2; +const a64::Gp REG_PARAM4 = a64::x3; + +// Stable registers that can be assumed to be unchanged by internal functions +const a64::Gp TEMP_REG1 = a64::x9; +const a64::Gp TEMP_REG2 = a64::x10; +const a64::Gp TEMP_REG3 = a64::x11; + +// Temporary registers that should not be assumed to live between functions +const a64::Gp SCRATCH_REG1 = a64::x12; +const a64::Gp SCRATCH_REG2 = a64::x13; + +// Only to be used in an opcode level function. Should not be used in helper functions +const a64::Gp FUNC_SCRATCH_REG = a64::x15; + +// Temporary memory calculation register, should not be used outside of functions that calculate memory addresses +const a64::Gp MEM_SCRATCH_REG = a64::x14; + +const a64::Vec TEMPF_REG1 = a64::d16; +const a64::Vec TEMPF_REG2 = a64::d17; +const a64::Vec TEMPF_REG3 = a64::d18; + +// Base memory address +const a64::Gp BASE_REG = a64::x27; + +// Software emulated flags (bit 0 = FLAG_C, bit 4 = FLAG_U) +const a64::Gp FLAGS_REG = a64::x28; + + +#define ARM_CONDITION(a, condition) (condition_map[condition - COND_Z]) +#define ARM_NOT_CONDITION(a, condition) (negateCond(condition_map[condition - COND_Z])) + +#define assert_no_condition(inst) assert((inst).condition() == uml::COND_ALWAYS) +#define assert_any_condition(inst) assert((inst).condition() == uml::COND_ALWAYS || ((inst).condition() >= uml::COND_Z && (inst).condition() < uml::COND_MAX)) +#define assert_no_flags(inst) assert((inst).flags() == 0) +#define assert_flags(inst, valid) assert(((inst).flags() & ~(valid)) == 0) + + +drcbe_arm64::opcode_generate_func drcbe_arm64::s_opcode_table[OP_MAX]; + +static const a64::Gp::Id int_register_map[REG_I_COUNT] = +{ + a64::Gp::Id(a64::x19.id()), + a64::Gp::Id(a64::x20.id()), + a64::Gp::Id(a64::x21.id()), + a64::Gp::Id(a64::x22.id()), + a64::Gp::Id(a64::x23.id()), + a64::Gp::Id(a64::x24.id()), + a64::Gp::Id(a64::x25.id()), + a64::Gp::Id(a64::x26.id()), +}; + +static const a64::Gp::Id float_register_map[REG_F_COUNT] = +{ + a64::Gp::Id(a64::d8.id()), + a64::Gp::Id(a64::d9.id()), + a64::Gp::Id(a64::d10.id()), + a64::Gp::Id(a64::d11.id()), + a64::Gp::Id(a64::d12.id()), + a64::Gp::Id(a64::d13.id()), + a64::Gp::Id(a64::d14.id()), + a64::Gp::Id(a64::d15.id()), +}; + +// condition mapping table +static const a64::CondCode condition_map[uml::COND_MAX - uml::COND_Z] = +{ + a64::CondCode::kEQ, // COND_Z = 0x80, requires Z COND_E + a64::CondCode::kNE, // COND_NZ, requires Z COND_NE + a64::CondCode::kMI, // COND_S, requires S + a64::CondCode::kPL, // COND_NS, requires S + a64::CondCode::kLO, // COND_C, requires C COND_B + a64::CondCode::kHS, // COND_NC, requires C COND_AE + a64::CondCode::kVS, // COND_V, requires V + a64::CondCode::kVC, // COND_NV, requires V + a64::CondCode::kAL, // COND_U, requires U (emulated in software) + a64::CondCode::kAL, // COND_NU, requires U (emulated in software) + a64::CondCode::kHI, // COND_A, requires CZ + a64::CondCode::kLS, // COND_BE, requires CZ + a64::CondCode::kGT, // COND_G, requires SVZ + a64::CondCode::kLE, // COND_LE, requires SVZ + a64::CondCode::kLT, // COND_L, requires SV + a64::CondCode::kGE, // COND_GE, requires SV +}; + +const drcbe_arm64::opcode_table_entry drcbe_arm64::s_opcode_table_source[] = +{ + // Compile-time opcodes + { uml::OP_HANDLE, &drcbe_arm64::op_handle }, // HANDLE handle + { uml::OP_HASH, &drcbe_arm64::op_hash }, // HASH mode,pc + { uml::OP_LABEL, &drcbe_arm64::op_label }, // LABEL imm + { uml::OP_COMMENT, &drcbe_arm64::op_comment }, // COMMENT string + { uml::OP_MAPVAR, &drcbe_arm64::op_mapvar }, // MAPVAR mapvar,value + + // Control Flow Operations + { uml::OP_NOP, &drcbe_arm64::op_nop }, // NOP + { uml::OP_BREAK, &drcbe_arm64::op_break }, // BREAK + { uml::OP_DEBUG, &drcbe_arm64::op_debug }, // DEBUG pc + { uml::OP_EXIT, &drcbe_arm64::op_exit }, // EXIT src1[,c] + { uml::OP_HASHJMP, &drcbe_arm64::op_hashjmp }, // HASHJMP mode,pc,handle + { uml::OP_JMP, &drcbe_arm64::op_jmp }, // JMP imm[,c] + { uml::OP_EXH, &drcbe_arm64::op_exh }, // EXH handle,param[,c] + { uml::OP_CALLH, &drcbe_arm64::op_callh }, // CALLH handle[,c] + { uml::OP_RET, &drcbe_arm64::op_ret }, // RET [c] + { uml::OP_CALLC, &drcbe_arm64::op_callc }, // CALLC func,ptr[,c] + { uml::OP_RECOVER, &drcbe_arm64::op_recover }, // RECOVER dst,mapvar + + // Internal Register Operations + { uml::OP_SETFMOD, &drcbe_arm64::op_setfmod }, // SETFMOD src + { uml::OP_GETFMOD, &drcbe_arm64::op_getfmod }, // GETFMOD dst + { uml::OP_GETEXP, &drcbe_arm64::op_getexp }, // GETEXP dst + { uml::OP_GETFLGS, &drcbe_arm64::op_getflgs }, // GETFLGS dst[,f] + { uml::OP_SETFLGS, &drcbe_arm64::op_setflgs }, // SETFLGS dst[,f] + { uml::OP_SAVE, &drcbe_arm64::op_save }, // SAVE dst + { uml::OP_RESTORE, &drcbe_arm64::op_restore }, // RESTORE dst + + // Integer Operations + { uml::OP_LOAD, &drcbe_arm64::op_load }, // LOAD dst,base,index,size + { uml::OP_LOADS, &drcbe_arm64::op_loads }, // LOADS dst,base,index,size + { uml::OP_STORE, &drcbe_arm64::op_store }, // STORE base,index,src,size + { uml::OP_READ, &drcbe_arm64::op_read }, // READ dst,src1,spacesize + { uml::OP_READM, &drcbe_arm64::op_readm }, // READM dst,src1,mask,spacesize + { uml::OP_WRITE, &drcbe_arm64::op_write }, // WRITE dst,src1,spacesize + { uml::OP_WRITEM, &drcbe_arm64::op_writem }, // WRITEM dst,src1,spacesize + { uml::OP_CARRY, &drcbe_arm64::op_carry }, // CARRY src,bitnum + { uml::OP_SET, &drcbe_arm64::op_set }, // SET dst,c + { uml::OP_MOV, &drcbe_arm64::op_mov }, // MOV dst,src[,c] + { uml::OP_SEXT, &drcbe_arm64::op_sext }, // SEXT dst,src + { uml::OP_ROLAND, &drcbe_arm64::op_roland }, // ROLAND dst,src1,src2,src3 + { uml::OP_ROLINS, &drcbe_arm64::op_rolins }, // ROLINS dst,src1,src2,src3 + { uml::OP_ADD, &drcbe_arm64::op_add }, // ADD dst,src1,src2[,f] + { uml::OP_ADDC, &drcbe_arm64::op_add }, // ADDC dst,src1,src2[,f] + { uml::OP_SUB, &drcbe_arm64::op_sub }, // SUB dst,src1,src2[,f] + { uml::OP_SUBB, &drcbe_arm64::op_sub }, // SUBB dst,src1,src2[,f] + { uml::OP_CMP, &drcbe_arm64::op_cmp }, // CMP src1,src2[,f] + { uml::OP_MULU, &drcbe_arm64::op_mulu }, // MULU dst,edst,src1,src2[,f] + { uml::OP_MULULW, &drcbe_arm64::op_mululw }, // MULULW dst,src1,src2[,f] + { uml::OP_MULS, &drcbe_arm64::op_muls }, // MULS dst,edst,src1,src2[,f] + { uml::OP_MULSLW, &drcbe_arm64::op_mulslw }, // MULSLW dst,src1,src2[,f] + { uml::OP_DIVU, &drcbe_arm64::op_div }, // DIVU dst,edst,src1,src2[,f] + { uml::OP_DIVS, &drcbe_arm64::op_div }, // DIVS dst,edst,src1,src2[,f] + { uml::OP_AND, &drcbe_arm64::op_and }, // AND dst,src1,src2[,f] + { uml::OP_TEST, &drcbe_arm64::op_test }, // TEST src1,src2[,f] + { uml::OP_OR, &drcbe_arm64::op_or }, // OR dst,src1,src2[,f] + { uml::OP_XOR, &drcbe_arm64::op_xor }, // XOR dst,src1,src2[,f] + { uml::OP_LZCNT, &drcbe_arm64::op_lzcnt }, // LZCNT dst,src[,f] + { uml::OP_TZCNT, &drcbe_arm64::op_tzcnt }, // TZCNT dst,src[,f] + { uml::OP_BSWAP, &drcbe_arm64::op_bswap }, // BSWAP dst,src + { uml::OP_SHL, &drcbe_arm64::op_shift }, // SHL dst,src,count[,f] + { uml::OP_SHR, &drcbe_arm64::op_shift }, // SHR dst,src,count[,f] + { uml::OP_SAR, &drcbe_arm64::op_shift }, // SAR dst,src,count[,f] + { uml::OP_ROL, &drcbe_arm64::op_rol }, // ROL dst,src,count[,f] + { uml::OP_ROLC, &drcbe_arm64::op_rolc }, // ROLC dst,src,count[,f] + { uml::OP_ROR, &drcbe_arm64::op_shift }, // ROR dst,src,count[,f] + { uml::OP_RORC, &drcbe_arm64::op_rorc }, // RORC dst,src,count[,f] + + // Floating Point Operations + { uml::OP_FLOAD, &drcbe_arm64::op_fload }, // FLOAD dst,base,index + { uml::OP_FSTORE, &drcbe_arm64::op_fstore }, // FSTORE base,index,src + { uml::OP_FREAD, &drcbe_arm64::op_fread }, // FREAD dst,space,src1 + { uml::OP_FWRITE, &drcbe_arm64::op_fwrite }, // FWRITE space,dst,src1 + { uml::OP_FMOV, &drcbe_arm64::op_fmov }, // FMOV dst,src1[,c] + { uml::OP_FTOINT, &drcbe_arm64::op_ftoint }, // FTOINT dst,src1,size,round + { uml::OP_FFRINT, &drcbe_arm64::op_ffrint }, // FFRINT dst,src1,size + { uml::OP_FFRFLT, &drcbe_arm64::op_ffrflt }, // FFRFLT dst,src1,size + { uml::OP_FRNDS, &drcbe_arm64::op_frnds }, // FRNDS dst,src1 + { uml::OP_FADD, &drcbe_arm64::op_float_alu }, // FADD dst,src1,src2 + { uml::OP_FSUB, &drcbe_arm64::op_float_alu }, // FSUB dst,src1,src2 + { uml::OP_FCMP, &drcbe_arm64::op_fcmp }, // FCMP src1,src2 + { uml::OP_FMUL, &drcbe_arm64::op_float_alu }, // FMUL dst,src1,src2 + { uml::OP_FDIV, &drcbe_arm64::op_float_alu }, // FDIV dst,src1,src2 + { uml::OP_FNEG, &drcbe_arm64::op_float_alu2 }, // FNEG dst,src1 + { uml::OP_FABS, &drcbe_arm64::op_float_alu2 }, // FABS dst,src1 + { uml::OP_FSQRT, &drcbe_arm64::op_float_alu2 }, // FSQRT dst,src1 + { uml::OP_FRECIP, &drcbe_arm64::op_float_alu2 }, // FRECIP dst,src1 + { uml::OP_FRSQRT, &drcbe_arm64::op_float_alu2 }, // FRSQRT dst,src1 + { uml::OP_FCOPYI, &drcbe_arm64::op_fcopyi }, // FCOPYI dst,src + { uml::OP_ICOPYF, &drcbe_arm64::op_icopyf } // ICOPYF dst,src +}; + +class ThrowableErrorHandler : public ErrorHandler +{ +public: + void handleError(Error err, const char *message, BaseEmitter *origin) override + { + throw emu_fatalerror("asmjit error %d: %s", err, message); + } +}; + +drcbe_arm64::be_parameter::be_parameter(drcbe_arm64 &drcbe, const parameter ¶m, uint32_t allowed) +{ + int regnum; + + switch (param.type()) + { + // immediates pass through + case parameter::PTYPE_IMMEDIATE: + assert(allowed & PTYPE_I); + *this = param.immediate(); + break; + + // memory passes through + case parameter::PTYPE_MEMORY: + assert(allowed & PTYPE_M); + *this = make_memory(param.memory()); + break; + + // if a register maps to a register, keep it as a register; otherwise map it to memory + case parameter::PTYPE_INT_REGISTER: + assert(allowed & PTYPE_R); + assert(allowed & PTYPE_M); + regnum = int_register_map[param.ireg() - REG_I0]; + if (regnum != 0) + *this = make_ireg(regnum); + else + *this = make_memory(&drcbe.m_state.r[param.ireg() - REG_I0]); + break; + + // if a register maps to a register, keep it as a register; otherwise map it to memory + case parameter::PTYPE_FLOAT_REGISTER: + assert(allowed & PTYPE_F); + assert(allowed & PTYPE_M); + regnum = float_register_map[param.freg() - REG_F0]; + if (regnum != 0) + *this = make_freg(regnum); + else + *this = make_memory(&drcbe.m_state.f[param.freg() - REG_F0]); + break; + + // everything else is unexpected + default: + fatalerror("Unexpected parameter type %d\n", param.type()); + } +} + +a64::Vec drcbe_arm64::be_parameter::get_register_float(uint32_t regsize) const +{ + assert(m_type == PTYPE_FLOAT_REGISTER); + return a64::Vec::fromTypeAndId((regsize == 4) ? RegType::kARM_VecS : RegType::kARM_VecD, m_value); +} + +a64::Gp drcbe_arm64::be_parameter::get_register_int(uint32_t regsize) const +{ + assert(m_type == PTYPE_INT_REGISTER); + return a64::Gp::fromTypeAndId((regsize == 4) ? RegType::kARM_GpW : RegType::kARM_GpX, m_value); +} + +a64::Vec drcbe_arm64::be_parameter::select_register(a64::Vec const ®, uint32_t regsize) const +{ + if (m_type == PTYPE_FLOAT_REGISTER) + return get_register_float(regsize); + if (regsize == 4) + return reg.s(); + return reg.d(); +} + +a64::Gp drcbe_arm64::be_parameter::select_register(a64::Gp const ®, uint32_t regsize) const +{ + if (m_type == PTYPE_INT_REGISTER) + return get_register_int(regsize); + if (regsize == 4) + return reg.w(); + return reg.x(); +} + +a64::Vec drcbe_arm64::select_register(a64::Vec const ®, uint32_t regsize) const +{ + if (regsize == 4) + return reg.s(); + return reg.d(); +} + +a64::Gp drcbe_arm64::select_register(a64::Gp const ®, uint32_t regsize) const +{ + if (regsize == 4) + return reg.w(); + return reg.x(); +} + +bool drcbe_arm64::is_valid_immediate_mask(uint64_t val, size_t bytes) +{ + // all zeros and all ones aren't allowed, and disallow any value with bits outside of the max bit range + if (val == 0 || val == make_bitmask(bytes * 8)) + return false; + + uint32_t head = 64 - count_leading_zeros_64(val); + if (head >= (bytes * 8)) + return false; + + uint32_t tail = 0; + while (tail < head) + { + if (BIT(val, tail)) + break; + tail++; + } + + return population_count_64(val) == head - tail; +} + +bool drcbe_arm64::is_valid_immediate(uint64_t val, size_t bits) +{ + assert(bits < 64); + return val < (uint64_t(1) << bits); +} + +bool drcbe_arm64::is_valid_immediate_signed(int64_t val, size_t bits) +{ + return util::sext(val, bits) == val; +} + +arm::Mem drcbe_arm64::get_mem_absolute(a64::Assembler &a, const void *ptr) const +{ + const uint64_t codeoffs = a.code()->baseAddress() + a.offset(); + const int64_t reloffs = codeoffs - (int64_t)ptr; + if (is_valid_immediate_signed(reloffs, 21)) + { + a.adr(MEM_SCRATCH_REG, ptr); + return arm::Mem(MEM_SCRATCH_REG); + } + + const uint64_t pagebase = codeoffs & ~make_bitmask(12); + const int64_t pagerel = (int64_t)ptr - pagebase; + if (is_valid_immediate_signed(pagerel, 33)) + { + const uint64_t targetpage = (uint64_t)ptr & ~make_bitmask(12); + const uint64_t pageoffs = (uint64_t)ptr & util::make_bitmask(12); + + a.adrp(MEM_SCRATCH_REG, targetpage); + + if (is_valid_immediate_signed(pageoffs, 9)) + return arm::Mem(MEM_SCRATCH_REG, pageoffs); + else if (emit_add_optimized(a, MEM_SCRATCH_REG, MEM_SCRATCH_REG, pageoffs)) + return arm::Mem(MEM_SCRATCH_REG); + } + + a.mov(MEM_SCRATCH_REG, ptr); + return arm::Mem(MEM_SCRATCH_REG); +} + +bool drcbe_arm64::emit_add_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val) const +{ + // If the bottom 12 bits are 0s then an optimized form can be used if the remaining bits are <= 12 + if (is_valid_immediate(val, 12) || ((val & 0xfff) == 0 && is_valid_immediate(val >> 12, 12))) + { + a.add(dst, src, val); + return true; + } + + return false; +} + +bool drcbe_arm64::emit_sub_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val) const +{ + if (val < 0) + val = -val; + + // If the bottom 12 bits are 0s then an optimized form can be used if the remaining bits are <= 12 + if (is_valid_immediate(val, 12) || ((val & 0xfff) == 0 && is_valid_immediate(val >> 12, 12))) + { + a.sub(dst, src, val); + return true; + } + + return false; +} + +void drcbe_arm64::get_imm_relative(a64::Assembler &a, const a64::Gp ®, const uint64_t ptr) const +{ + // If a value can be expressed relative to the base register then it's worth using it instead of a mov + // which can be expanded to up to 4 instructions for large immediates + const int64_t diff = (int64_t)ptr - (int64_t)m_baseptr; + if (diff > 0 && emit_add_optimized(a, reg, BASE_REG, diff)) + return; + else if (diff < 0 && emit_sub_optimized(a, reg, BASE_REG, diff)) + return; + + const uint64_t codeoffs = a.code()->baseAddress() + a.offset(); + const int64_t reloffs = codeoffs - (int64_t)ptr; + if (is_valid_immediate_signed(reloffs, 21)) + { + a.adr(reg, ptr); + return; + } + + a.mov(reg, ptr); +} + +void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode, const a64::Reg ®, const void *ptr) const +{ + // If it can fit as a constant offset + const int64_t diff = (int64_t)ptr - (int64_t)m_baseptr; + if (is_valid_immediate_signed(diff, 9)) + { + a.emit(opcode, reg, arm::Mem(BASE_REG, diff)); + return; + } + + // If it can fit as an offset relative to PC + const uint64_t codeoffs = a.code()->baseAddress() + a.offset(); + const int64_t reloffs = codeoffs - (int64_t)ptr; + if (is_valid_immediate_signed(reloffs, 21)) + { + a.adr(MEM_SCRATCH_REG, ptr); + a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG)); + return; + } + + if (diff > 0 && is_valid_immediate(diff, 16)) + { + a.mov(MEM_SCRATCH_REG, diff); + a.emit(opcode, reg, arm::Mem(BASE_REG, MEM_SCRATCH_REG)); + return; + } + + if (diff > 0 && emit_add_optimized(a, MEM_SCRATCH_REG, BASE_REG, diff)) + { + a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG)); + return; + } + else if (diff < 0 && emit_sub_optimized(a, MEM_SCRATCH_REG, BASE_REG, diff)) + { + a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG)); + return; + } + + if (diff >= 0) + { + int shift = 0; + int max_shift = 0; + + if (opcode == a64::Inst::kIdLdrb || opcode == a64::Inst::kIdLdrsb) + max_shift = 0; + else if (opcode == a64::Inst::kIdLdrh || opcode == a64::Inst::kIdLdrsh) + max_shift = 1; + else if (opcode == a64::Inst::kIdLdrsw) + max_shift = 2; + else + max_shift = (reg.isGpW() || reg.isVecS()) ? 2 : 3; + + for (int i = 0; i < 64 && max_shift > 0; i++) + { + if ((uint64_t)ptr & ((uint64_t)(1) << i)) + { + shift = i; + break; + } + } + + if (shift > max_shift) + shift = max_shift; + + if (is_valid_immediate(diff >> shift, 32)) + { + a.mov(MEM_SCRATCH_REG, diff >> shift); + + if (shift) + a.emit(opcode, reg, arm::Mem(BASE_REG, MEM_SCRATCH_REG, arm::Shift(arm::ShiftOp::kLSL, shift))); + else + a.emit(opcode, reg, arm::Mem(BASE_REG, MEM_SCRATCH_REG)); + + return; + } + } + + const uint64_t pagebase = codeoffs & ~make_bitmask(12); + const int64_t pagerel = (int64_t)ptr - pagebase; + if (is_valid_immediate_signed(pagerel, 33)) + { + const uint64_t targetpage = (uint64_t)ptr & ~make_bitmask(12); + const uint64_t pageoffs = (uint64_t)ptr & util::make_bitmask(12); + + a.adrp(MEM_SCRATCH_REG, targetpage); + + if (is_valid_immediate_signed(pageoffs, 9)) + { + a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG, pageoffs)); + return; + } + else if (emit_add_optimized(a, MEM_SCRATCH_REG, MEM_SCRATCH_REG, pageoffs)) + { + a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG)); + return; + } + } + + // Can't optimize it at all, most likely becomes 4 MOV commands + a.mov(MEM_SCRATCH_REG, ptr); + a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG)); +} + +void drcbe_arm64::emit_ldr_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdr, reg, ptr); } +void drcbe_arm64::emit_ldrb_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrb, reg, ptr); } +void drcbe_arm64::emit_ldrh_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrh, reg, ptr); } +void drcbe_arm64::emit_ldrsb_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsb, reg, ptr); } +void drcbe_arm64::emit_ldrsh_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsh, reg, ptr); } +void drcbe_arm64::emit_ldrsw_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsw, reg, ptr); } +void drcbe_arm64::emit_str_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStr, reg, ptr); } +void drcbe_arm64::emit_strb_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStrb, reg, ptr); } +void drcbe_arm64::emit_strh_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStrh, reg, ptr); } + +void drcbe_arm64::emit_float_ldr_mem(a64::Assembler &a, const a64::Vec ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdr_v, reg, ptr); } +void drcbe_arm64::emit_float_str_mem(a64::Assembler &a, const a64::Vec ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStr_v, reg, ptr); } + +void drcbe_arm64::mov_reg_param(a64::Assembler &a, uint32_t regsize, const a64::Gp &dst, const be_parameter &src) const +{ + if (src.is_immediate()) + get_imm_relative(a, select_register(dst, regsize), src.immediate()); + else if (src.is_int_register() && dst.id() != src.ireg()) + a.mov(select_register(dst, regsize), src.get_register_int(regsize)); + else if (src.is_memory()) + emit_ldr_mem(a, select_register(dst, regsize), src.memory()); +} + +void drcbe_arm64::mov_mem_param(a64::Assembler &a, uint32_t regsize, void *dst, const be_parameter &src) const +{ + const a64::Gp scratch = select_register(SCRATCH_REG2, regsize); + + if (src.is_immediate()) + { + if (src.is_immediate_value(0)) + { + emit_str_mem(a, select_register(a64::xzr, regsize), dst); + } + else + { + get_imm_relative(a, scratch.x(), src.immediate()); + emit_str_mem(a, scratch, dst); + } + } + else if (src.is_memory()) + { + if (regsize == 4) + emit_ldrsw_mem(a, scratch.x(), src.memory()); + else + emit_ldr_mem(a, scratch.x(), src.memory()); + + emit_str_mem(a, scratch, dst); + } + else if (src.is_int_register()) + { + emit_str_mem(a, src.get_register_int(regsize), dst); + } +} + +void drcbe_arm64::mov_param_reg(a64::Assembler &a, uint32_t regsize, const be_parameter &dst, const a64::Gp &src) const +{ + assert(!dst.is_immediate()); + + if (dst.is_memory()) + emit_str_mem(a, select_register(src, regsize), dst.memory()); + else if (dst.is_int_register() && src.id() != dst.ireg()) + a.mov(dst.get_register_int(regsize), select_register(src, regsize)); +} + +void drcbe_arm64::mov_param_param(a64::Assembler &a, uint32_t regsize, const be_parameter &dst, const be_parameter &src) const +{ + assert(!dst.is_immediate()); + + if (src.is_memory()) + { + if (dst.is_int_register()) + { + mov_reg_param(a, regsize, dst.get_register_int(regsize), src); + } + else + { + mov_reg_param(a, regsize, SCRATCH_REG1, src); + mov_param_reg(a, regsize, dst, SCRATCH_REG1); + } + } + else if (src.is_int_register()) + { + mov_param_reg(a, regsize, dst, src.get_register_int(regsize)); + } + else if (src.is_immediate()) + { + mov_param_imm(a, regsize, dst, src.immediate()); + } +} + +void drcbe_arm64::mov_param_imm(a64::Assembler &a, uint32_t regsize, const be_parameter &dst, uint64_t src) const +{ + assert(!dst.is_immediate()); + + if (dst.is_memory()) + { + if (src == 0) + { + emit_str_mem(a, select_register(a64::xzr, regsize), dst.memory()); + } + else + { + const a64::Gp scratch = select_register(SCRATCH_REG2, regsize); + + get_imm_relative(a, scratch, src); + emit_str_mem(a, scratch, dst.memory()); + } + } + else if (dst.is_int_register()) + { + a.mov(dst.get_register_int(regsize), src); + } +} + +void drcbe_arm64::mov_signed_reg64_param32(a64::Assembler &a, const a64::Gp &dst, const be_parameter &src) const +{ + if (src.is_memory()) + { + emit_ldrsw_mem(a, dst.x(), src.memory()); + } + else if (src.is_immediate()) + { + get_imm_relative(a, dst.x(), src.immediate()); + + if (!src.is_immediate_value(0)) + a.sxtw(dst.x(), dst.w()); + } + else if (src.is_int_register()) + { + a.sxtw(dst.x(), src.get_register_int(4)); + } +} + +void drcbe_arm64::mov_float_reg_param(a64::Assembler &a, uint32_t regsize, a64::Vec const &dst, const be_parameter &src) const +{ + assert(!src.is_immediate()); + + if (src.is_memory()) + emit_float_ldr_mem(a, select_register(dst, regsize), src.memory()); + else if (src.is_float_register() && dst.id() != src.freg()) + a.fmov(select_register(dst, regsize), src.get_register_float(regsize)); +} + +void drcbe_arm64::mov_float_param_reg(a64::Assembler &a, uint32_t regsize, const be_parameter &dst, a64::Vec const &src) const +{ + assert(!dst.is_immediate()); + + if (dst.is_memory()) + emit_float_str_mem(a, select_register(src, regsize), dst.memory()); + else if (dst.is_float_register() && src.id() != dst.freg()) + a.fmov(dst.get_register_float(regsize), select_register(src, regsize)); +} + +void drcbe_arm64::mov_float_param_int_reg(a64::Assembler &a, uint32_t regsize, const be_parameter &dst, a64::Gp const &src) const +{ + assert(!dst.is_immediate()); + + if (dst.is_memory()) + emit_str_mem(a, src, dst.memory()); + else if (dst.is_float_register()) + a.fmov(dst.get_register_float(regsize), src); +} + +void drcbe_arm64::mov_float_param_param(a64::Assembler &a, uint32_t regsize, const be_parameter &dst, const be_parameter &src) const +{ + assert(!src.is_immediate()); + assert(!dst.is_immediate()); + + if (dst.is_float_register()) + { + mov_float_reg_param(a, regsize, dst.get_register_float(regsize), src); + } + else if (dst.is_memory()) + { + if (src.is_float_register()) + { + mov_float_param_reg(a, regsize, dst, src.get_register_float(regsize)); + } + else if (src.is_memory()) + { + const a64::Gp scratch = select_register(SCRATCH_REG2, regsize); + emit_ldr_mem(a, scratch, src.memory()); + emit_str_mem(a, scratch, dst.memory()); + } + } +} + +void drcbe_arm64::call_arm_addr(a64::Assembler &a, const void *offs) const +{ + const uint64_t codeoffs = a.code()->baseAddress() + a.offset(); + const int64_t reloffs = codeoffs - (int64_t)offs; + if (is_valid_immediate_signed(reloffs, 26)) + { + a.bl(offs); + } + else + { + get_imm_relative(a, SCRATCH_REG1, uintptr_t(offs)); + a.blr(SCRATCH_REG1); + } +} + +void drcbe_arm64::check_unordered_condition(a64::Assembler &a, uml::condition_t cond, Label condition_met, bool not_equal) const +{ + if (cond != uml::COND_U && cond != uml::COND_NU) + return; + + get_unordered(a, SCRATCH_REG1); + + if (cond == uml::COND_U) + { + if (not_equal) + a.cbz(SCRATCH_REG1, condition_met); + else + a.cbnz(SCRATCH_REG1, condition_met); + } + else + { + if (not_equal) + a.cbnz(SCRATCH_REG1, condition_met); + else + a.cbz(SCRATCH_REG1, condition_met); + } +} + +void drcbe_arm64::store_unordered(a64::Assembler &a) const +{ + a.cset(SCRATCH_REG1, a64::CondCode::kPL); + a.cset(SCRATCH_REG2, a64::CondCode::kNE); + a.and_(SCRATCH_REG1, SCRATCH_REG1, SCRATCH_REG2); + a.cset(SCRATCH_REG2, a64::CondCode::kCS); + a.and_(SCRATCH_REG1, SCRATCH_REG1, SCRATCH_REG2); + a.cset(SCRATCH_REG2, a64::CondCode::kVS); + a.and_(SCRATCH_REG1, SCRATCH_REG1, SCRATCH_REG2); + a.bfi(FLAGS_REG, SCRATCH_REG2, 4, 1); +} + +void drcbe_arm64::get_unordered(a64::Assembler &a, const a64::Gp ®) const +{ + get_shifted_bit(a, reg.x(), FLAGS_REG, 1, 4); +} + +void drcbe_arm64::store_carry_reg(a64::Assembler &a, const a64::Gp ®) const +{ + a.bfi(FLAGS_REG, reg.x(), 0, 1); +} + +void drcbe_arm64::store_carry(a64::Assembler &a, bool inverted) const +{ + if (inverted) + a.cset(SCRATCH_REG1, a64::CondCode::kCC); + else + a.cset(SCRATCH_REG1, a64::CondCode::kCS); + + store_carry_reg(a, SCRATCH_REG1); +} + +void drcbe_arm64::get_carry(a64::Assembler &a, const a64::Gp ®, bool inverted) const +{ + a.and_(reg.x(), FLAGS_REG, 1); + + if (inverted) + a.eor(reg.x(), reg.x(), 1); +} + +void drcbe_arm64::load_carry(a64::Assembler &a, bool inverted) const +{ + a.mrs(SCRATCH_REG1, a64::Predicate::SysReg::kNZCV); + a.bfi(SCRATCH_REG1, FLAGS_REG, 29, 1); + + if (inverted) + a.eor(SCRATCH_REG1, SCRATCH_REG1, 1 << 29); + + a.msr(a64::Predicate::SysReg::kNZCV, SCRATCH_REG1); +} + +void drcbe_arm64::get_shifted_bit(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, uint32_t bits, uint32_t shift) const +{ + a.lsr(dst.x(), src.x(), shift); + a.and_(dst.x(), dst.x(), bits); +} + +void drcbe_arm64::calculate_carry_shift_left(a64::Assembler &a, const a64::Gp ®, const a64::Gp &shift, int maxBits) const +{ + Label skip = a.newLabel(); + a.cbz(shift, skip); + + const a64::Gp scratch = select_register(SCRATCH_REG1, reg.isGpW() ? 4 : 8); + + // carry = ((PARAM1 << (shift - 1)) >> maxBits) & 1 + a.movz(scratch, maxBits + 1); + a.sub(scratch, scratch, shift); + a.lsr(scratch, reg, scratch); + store_carry_reg(a, scratch); + + a.bind(skip); +} + +void drcbe_arm64::calculate_carry_shift_left_imm(a64::Assembler &a, const a64::Gp ®, const int shift, int maxBits) const +{ + if (shift == 0) + return; + + const a64::Gp scratch = select_register(SCRATCH_REG1, reg.isGpW() ? 4 : 8); + + // carry = ((PARAM1 << (shift - 1)) >> maxBits) & 1 + a.lsr(scratch, reg, maxBits + 1 - shift); + store_carry_reg(a, scratch); +} + +void drcbe_arm64::calculate_carry_shift_right(a64::Assembler &a, const a64::Gp ®, const a64::Gp &shift) const +{ + Label skip = a.newLabel(); + a.cbz(shift, skip); + + const a64::Gp scratch = select_register(SCRATCH_REG1, reg.isGpW() ? 4 : 8); + + // carry = (PARAM1 >> (shift - 1)) & 1 + a.sub(scratch, shift, 1); + a.lsr(scratch, reg, scratch); + store_carry_reg(a, scratch); + + a.bind(skip); +} + +void drcbe_arm64::calculate_carry_shift_right_imm(a64::Assembler &a, const a64::Gp ®, const int shift) const +{ + if (shift == 0) + return; + + const a64::Gp scratch = select_register(SCRATCH_REG1, reg.isGpW() ? 4 : 8); + + // carry = (PARAM1 >> (shift - 1)) & 1 + a.lsr(scratch, reg, shift - 1); + store_carry_reg(a, scratch); +} + +drcbe_arm64::drcbe_arm64(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits) + : drcbe_interface(drcuml, cache, device) + , m_hash(cache, modes, addrbits, ignorebits) + , m_map(cache, 0xaaaaaaaa5555) + , m_log_asmjit(nullptr) + , m_entry(nullptr) + , m_exit(nullptr) + , m_nocode(nullptr) + , m_baseptr(cache.near() + 0x80) + , m_near(*(near_state *)cache.alloc_near(sizeof(m_near))) +{ + // get pointers to C functions we need to call + using debugger_hook_func = void (*)(device_debug *, offs_t); + static const debugger_hook_func debugger_inst_hook = [] (device_debug *dbg, offs_t pc) { dbg->instruction_hook(pc); }; + m_near.debug_cpu_instruction_hook = (uint8_t *)debugger_inst_hook; + m_near.drcmap_get_value = (uint8_t *)&drc_map_variables::static_get_value; + m_near.emulated_flags = 0; + + // build the opcode table (static but it doesn't hurt to regenerate it) + for (auto & elem : s_opcode_table_source) + s_opcode_table[elem.opcode] = elem.func; + + // create the log + if (device.machine().options().drc_log_native()) + { + m_log_asmjit = fopen(std::string("drcbearm64_asmjit_").append(device.shortname()).append(".asm").c_str(), "w"); + } + + // resolve the actual addresses of the address space handlers + auto const resolve_accessor = + [] (resolved_handler &handler, address_space &space, auto accessor) + { + if (MAME_DELEGATE_USE_TYPE == MAME_DELEGATE_TYPE_ITANIUM) + { + struct { uintptr_t ptr; ptrdiff_t adj; } equiv; + assert(sizeof(accessor) == sizeof(equiv)); + *reinterpret_cast(&equiv) = accessor; + handler.obj = uintptr_t(reinterpret_cast(&space) + (equiv.adj >> 1)); + if (BIT(equiv.adj, 0)) + { + auto const vptr = *reinterpret_cast(handler.obj) + equiv.ptr; + handler.func = *reinterpret_cast(vptr); + } + else + { + handler.func = reinterpret_cast(equiv.ptr); + } + } + else if (MAME_DELEGATE_USE_TYPE == MAME_DELEGATE_TYPE_MSVC) + { + // interpret the pointer to member function ignoring the virtual inheritance variant + struct single { uintptr_t ptr; }; + struct multi { uintptr_t ptr; int adj; }; + struct { uintptr_t ptr; int adj; int vadj; int vindex; } unknown; + assert(sizeof(accessor) <= sizeof(unknown)); + *reinterpret_cast(&unknown) = accessor; + uint32_t const *func = reinterpret_cast(unknown.ptr); + handler.obj = uintptr_t(&space); + if ((sizeof(unknown) == sizeof(accessor)) && unknown.vindex) + { + handler.obj += unknown.vadj; + auto const vptr = *reinterpret_cast(handler.obj); + handler.obj += *reinterpret_cast(vptr + unknown.vindex); + } + if (sizeof(single) < sizeof(accessor)) + handler.obj += unknown.adj; + + // walk past thunks + while (true) + { + if ((0x90000010 == (func[0] & 0x9f00001f)) && (0x91000210 == (func[1] & 0xffc003ff)) && (0xd61f0200 == func[2])) + { + // page-relative jump with +/-4GB reach - adrp xip0,... ; add xip0,xip0,#... ; br xip0 + int64_t const page = + (uint64_t(func[0] & 0x60000000) >> 17) | + (uint64_t(func[0] & 0x00ffffe0) << 9) | + ((func[0] & 0x00800000) ? (~std::uint64_t(0) << 33) : 0); + uint32_t const offset = (func[1] & 0x003ffc00) >> 10; + func = reinterpret_cast(((uintptr_t(func) + page) & (~uintptr_t(0) << 12)) + offset); + } + else if ((0xf9400010 == func[0]) && (0xf9400210 == (func[1] & 0xffc003ff)) && (0xd61f0200 == func[2])) + { + // virtual function call thunk - ldr xip0,[x0] ; ldr xip0,[x0,#...] ; br xip0 + uint32_t const *const *const vptr = *reinterpret_cast(handler.obj); + func = vptr[(func[1] & 0x003ffc00) >> 10]; + } + else + { + // not something we can easily bypass + break; + } + } + handler.func = reinterpret_cast(uintptr_t(func)); + } + }; + + m_resolved_accessors.resize(m_space.size()); + for (int space = 0; m_space.size() > space; ++space) + { + if (m_space[space]) + { + resolve_accessor(m_resolved_accessors[space].read_byte, *m_space[space], static_cast(&address_space::read_byte)); + resolve_accessor(m_resolved_accessors[space].read_word, *m_space[space], static_cast(&address_space::read_word)); + resolve_accessor(m_resolved_accessors[space].read_word_masked, *m_space[space], static_cast(&address_space::read_word)); + resolve_accessor(m_resolved_accessors[space].read_dword, *m_space[space], static_cast(&address_space::read_dword)); + resolve_accessor(m_resolved_accessors[space].read_dword_masked, *m_space[space], static_cast(&address_space::read_dword)); + resolve_accessor(m_resolved_accessors[space].read_qword, *m_space[space], static_cast(&address_space::read_qword)); + resolve_accessor(m_resolved_accessors[space].read_qword_masked, *m_space[space], static_cast(&address_space::read_qword)); + + resolve_accessor(m_resolved_accessors[space].write_byte, *m_space[space], static_cast(&address_space::write_byte)); + resolve_accessor(m_resolved_accessors[space].write_word, *m_space[space], static_cast(&address_space::write_word)); + resolve_accessor(m_resolved_accessors[space].write_word_masked, *m_space[space], static_cast(&address_space::write_word)); + resolve_accessor(m_resolved_accessors[space].write_dword, *m_space[space], static_cast(&address_space::write_dword)); + resolve_accessor(m_resolved_accessors[space].write_dword_masked, *m_space[space], static_cast(&address_space::write_dword)); + resolve_accessor(m_resolved_accessors[space].write_qword, *m_space[space], static_cast(&address_space::write_qword)); + resolve_accessor(m_resolved_accessors[space].write_qword_masked, *m_space[space], static_cast(&address_space::write_qword)); + } + } +} + +drcbe_arm64::~drcbe_arm64() +{ + if (m_log_asmjit) + fclose(m_log_asmjit); +} + +size_t drcbe_arm64::emit(CodeHolder &ch) +{ + Error err; + + size_t const alignment = ch.baseAddress() - uint64_t(m_cache.top()); + size_t const code_size = ch.codeSize(); + + // test if enough room remains in drc cache + drccodeptr *cachetop = m_cache.begin_codegen(alignment + code_size); + if (cachetop == nullptr) + return 0; + + err = ch.copyFlattenedData(drccodeptr(ch.baseAddress()), code_size, CopySectionFlags::kPadTargetBuffer); + if (err) + throw emu_fatalerror("CodeHolder::copyFlattenedData() error %d", err); + + // update the drc cache and end codegen + *cachetop += alignment + code_size; + m_cache.end_codegen(); + + return code_size; +} + +void drcbe_arm64::reset() +{ + uint8_t *dst = (uint8_t *)m_cache.top(); + + CodeHolder ch; + ch.init(Environment::host(), uint64_t(dst)); + + m_near.calldepth = 0; + m_near.hashstacksave = nullptr; + + FileLogger logger(m_log_asmjit); + if (logger.file()) + { + logger.setFlags(FormatFlags::kHexOffsets | FormatFlags::kHexImms | FormatFlags::kMachineCode); + logger.setIndentation(FormatIndentationGroup::kCode, 4); + ch.setLogger(&logger); + } + + a64::Assembler a(&ch); + if (logger.file()) + a.addDiagnosticOptions(DiagnosticOptions::kValidateIntermediate); + + // generate entry point + m_entry = (arm64_entry_point_func)dst; + a.bind(a.newNamedLabel("entry_point")); + + FuncDetail entry_point; + entry_point.init(FuncSignature::build(CallConvId::kHost), Environment::host()); + + FuncFrame frame; + frame.init(entry_point); + frame.setPreservedFP(); + frame.setAllDirty(); + + FuncArgsAssignment args(&entry_point); + args.assignAll(REG_PARAM1); + args.updateFuncFrame(frame); + + frame.finalize(); + + a.emitProlog(frame); + + a.ldr(BASE_REG, get_mem_absolute(a, &m_baseptr)); + emit_ldr_mem(a, FLAGS_REG.w(), &m_near.emulated_flags); + emit_str_mem(a, a64::wzr, &m_near.calldepth); + emit_str_mem(a, a64::xzr, &m_near.hashstacksave); + + a.emitArgsAssignment(frame, args); + + a.br(REG_PARAM1); + + // generate exit point + m_exit = dst + a.offset(); + a.bind(a.newNamedLabel("exit_point")); + + a.mov(a64::sp, a64::x29); + + a.emitEpilog(frame); + a.ret(a64::x30); + + // generate a no code point + m_nocode = dst + a.offset(); + a.bind(a.newNamedLabel("nocode_point")); + a.br(REG_PARAM1); + + // emit the generated code + emit(ch); + + // reset our hash tables + m_hash.reset(); + + m_hash.set_default_codeptr(m_nocode); +} + +int drcbe_arm64::execute(code_handle &entry) +{ + m_cache.codegen_complete(); + return (*m_entry)(entry.codeptr()); +} + +void drcbe_arm64::generate(drcuml_block &block, const instruction *instlist, uint32_t numinst) +{ + // tell all of our utility objects that a block is beginning + m_hash.block_begin(block, instlist, numinst); + m_map.block_begin(block); + + // compute the base by aligning the cache top to a cache line + auto [err, linesize] = osd_get_cache_line_size(); + uintptr_t linemask = 63; + if (err) + { + osd_printf_verbose("Error getting cache line size (%s:%d %s), assuming 64 bytes\n", err.category().name(), err.value(), err.message()); + } + else + { + assert(linesize); + linemask = linesize - 1; + for (unsigned shift = 1; linemask & (linemask + 1); ++shift) + linemask |= linemask >> shift; + } + uint8_t *dst = (uint8_t *)(uint64_t(m_cache.top() + linemask) & ~linemask); + + CodeHolder ch; + ch.init(Environment::host(), uint64_t(dst)); + ThrowableErrorHandler e; + ch.setErrorHandler(&e); + + FileLogger logger(m_log_asmjit); + if (logger.file()) + { + logger.setFlags(FormatFlags::kHexOffsets | FormatFlags::kHexImms | FormatFlags::kMachineCode); + logger.setIndentation(FormatIndentationGroup::kCode, 4); + ch.setLogger(&logger); + } + + a64::Assembler a(&ch); + if (logger.file()) + a.addDiagnosticOptions(DiagnosticOptions::kValidateIntermediate); + + // generate code + for (int inum = 0; inum < numinst; inum++) + { + const instruction &inst = instlist[inum]; + assert(inst.opcode() < std::size(s_opcode_table)); + + // generate code + (this->*s_opcode_table[inst.opcode()])(a, inst); + } + + emit_str_mem(a, FLAGS_REG.w(), &m_near.emulated_flags); + + // emit the generated code + if (!emit(ch)) + block.abort(); + + // tell all of our utility objects that the block is finished + m_hash.block_end(block); + m_map.block_end(block); +} + +bool drcbe_arm64::hash_exists(uint32_t mode, uint32_t pc) +{ + return m_hash.code_exists(mode, pc); +} + +void drcbe_arm64::get_info(drcbe_info &info) +{ + for (info.direct_iregs = 0; info.direct_iregs < REG_I_COUNT; info.direct_iregs++) + { + if (int_register_map[info.direct_iregs] == 0) + break; + } + + for (info.direct_fregs = 0; info.direct_fregs < REG_F_COUNT; info.direct_fregs++) + { + if (float_register_map[info.direct_fregs] == 0) + break; + } +} + +void drcbe_arm64::op_handle(a64::Assembler &a, const uml::instruction &inst) +{ + assert_no_condition(inst); + assert_no_flags(inst); + assert(inst.numparams() == 1); + assert(inst.param(0).is_code_handle()); + + // make a label for documentation + Label handle = a.newNamedLabel(inst.param(0).handle().string()); + a.bind(handle); + + // emit a jump around the stack adjust in case code falls through here + Label skip = a.newLabel(); + a.b(skip); + + // register the current pointer for the handle + inst.param(0).handle().set_codeptr(drccodeptr(a.code()->baseAddress() + a.offset())); + + // the handle points to prolog code that creates a minimal non-leaf frame + a.stp(a64::x29, a64::x30, arm::Mem(a64::sp, -16).pre()); + a.bind(skip); +} + +void drcbe_arm64::op_hash(a64::Assembler &a, const uml::instruction &inst) +{ + assert_no_condition(inst); + assert_no_flags(inst); + assert(inst.numparams() == 2); + assert(inst.param(0).is_immediate()); + assert(inst.param(1).is_immediate()); + + const uint64_t mode = inst.param(0).immediate(); + const uint64_t pc = inst.param(1).immediate(); + + m_hash.set_codeptr(mode, pc, drccodeptr(a.code()->baseAddress() + a.offset())); +} + +void drcbe_arm64::op_label(a64::Assembler &a, const uml::instruction &inst) +{ + assert_no_condition(inst); + assert_no_flags(inst); + assert(inst.numparams() == 1); + assert(inst.param(0).is_code_label()); + + std::string labelName = util::string_format("PC$%x", inst.param(0).label()); + Label label = a.labelByName(labelName.c_str()); + if (!label.isValid()) + label = a.newNamedLabel(labelName.c_str()); + + a.bind(label); +} + +void drcbe_arm64::op_comment(a64::Assembler &a, const uml::instruction &inst) +{ + assert_no_condition(inst); + assert_no_flags(inst); + assert(inst.numparams() == 1); + assert(inst.param(0).is_string()); +} + +void drcbe_arm64::op_mapvar(a64::Assembler &a, const uml::instruction &inst) +{ + assert_no_condition(inst); + assert_no_flags(inst); + assert(inst.numparams() == 2); + assert(inst.param(0).is_mapvar()); + assert(inst.param(1).is_immediate()); + + const int mapvar = inst.param(0).mapvar(); + const uint64_t value = inst.param(1).immediate(); + + m_map.set_value(drccodeptr(a.code()->baseAddress() + a.offset()), mapvar, value); +} + +void drcbe_arm64::op_nop(a64::Assembler &a, const uml::instruction &inst) +{ + a.nop(); +} + +void drcbe_arm64::op_break(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + static const char *const message = "break from drc"; + get_imm_relative(a, REG_PARAM1, (uintptr_t)message); + call_arm_addr(a, (const void *)&osd_break_into_debugger); +} + +void drcbe_arm64::op_debug(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + const a64::Gp temp = TEMP_REG1.w(); + + if (m_device.machine().debug_flags & DEBUG_FLAG_ENABLED) + { + be_parameter pcp(*this, inst.param(0), PTYPE_MRI); + + Label skip = a.newLabel(); + + emit_ldr_mem(a, temp, &m_device.machine().debug_flags); + a.tbz(temp, 1, skip); // DEBUG_FLAG_CALL_HOOK + + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_device.debug()); + mov_reg_param(a, 4, REG_PARAM2, pcp); + + emit_ldr_mem(a, TEMP_REG2, &m_near.debug_cpu_instruction_hook); + a.blr(TEMP_REG2); + + a.bind(skip); + } +} + +void drcbe_arm64::op_exit(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_any_condition(inst); + assert_no_flags(inst); + + be_parameter retp(*this, inst.param(0), PTYPE_MRI); + + mov_reg_param(a, 4, REG_PARAM1, retp); + + if (inst.condition() == uml::COND_ALWAYS) + { + a.b(m_exit); + } + else if (inst.condition() == uml::COND_U || inst.condition() == uml::COND_NU) + { + Label skip = a.newLabel(); + check_unordered_condition(a, inst.condition(), skip, false); + a.b(m_exit); + a.bind(skip); + } + else + { + a.b(ARM_CONDITION(a, inst.condition()), m_exit); + } +} + +void drcbe_arm64::op_hashjmp(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter modep(*this, inst.param(0), PTYPE_MRI); + be_parameter pcp(*this, inst.param(1), PTYPE_MRI); + const parameter &exp = inst.param(2); + assert(exp.is_code_handle()); + + emit_str_mem(a, a64::wzr, &m_near.calldepth); + a.mov(a64::sp, a64::x29); + + if (modep.is_immediate() && m_hash.is_mode_populated(modep.immediate())) + { + if (pcp.is_immediate()) + { + const uint32_t l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask(); + const uint32_t l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask(); + emit_ldr_mem(a, TEMP_REG1, &m_hash.base()[modep.immediate()][l1val][l2val]); + } + else + { + mov_reg_param(a, 4, TEMP_REG2, pcp); + + get_imm_relative(a, TEMP_REG1, (uintptr_t)&m_hash.base()[modep.immediate()][0]); // TEMP_REG1 = m_base[mode] + + a.ubfx(TEMP_REG3, TEMP_REG2, m_hash.l1shift(), m_hash.l1bits()); + a.ldr(TEMP_REG3, a64::Mem(TEMP_REG1, TEMP_REG3, arm::Shift(arm::ShiftOp::kLSL, 3))); // TEMP_REG3 = m_base[mode][(pc >> m_l1shift) & m_l1mask] + + a.ubfx(TEMP_REG2, TEMP_REG2, m_hash.l2shift(), m_hash.l2bits()); + a.ldr(TEMP_REG1, a64::Mem(TEMP_REG3, TEMP_REG2, arm::Shift(arm::ShiftOp::kLSL, 3))); // TEMP_REG1 = m_base[mode][(pc >> m_l1shift) & m_l1mask][(pc >> m_l2shift) & m_l2mask] + } + } + else + { + get_imm_relative(a, TEMP_REG2, (uintptr_t)m_hash.base()); + + if (modep.is_immediate()) + { + a.ldr(TEMP_REG1, a64::Mem(TEMP_REG2, modep.immediate() * 8)); // TEMP_REG1 = m_base[modep] + } + else + { + const a64::Gp mode = modep.select_register(TEMP_REG1, 8); + mov_reg_param(a, 4, mode, modep); + a.ldr(TEMP_REG1, a64::Mem(TEMP_REG2, mode, arm::Shift(arm::ShiftOp::kLSL, 3))); // TEMP_REG1 = m_base[modep] + } + + if (pcp.is_immediate()) + { + const uint32_t l1val = ((pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask()) * 8; + const uint32_t l2val = ((pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask()) * 8; + + if (is_valid_immediate(l1val, 15)) + { + a.ldr(TEMP_REG1, a64::Mem(TEMP_REG1, l1val)); + } + else + { + a.mov(SCRATCH_REG1, l1val >> 3); + a.ldr(TEMP_REG1, a64::Mem(TEMP_REG1, SCRATCH_REG1, arm::Shift(arm::ShiftOp::kLSL, 3))); + } + + if (is_valid_immediate(l2val, 15)) + { + a.ldr(TEMP_REG1, a64::Mem(TEMP_REG1, l2val)); + } + else + { + a.mov(SCRATCH_REG1, l2val >> 3); + a.ldr(TEMP_REG1, a64::Mem(TEMP_REG1, SCRATCH_REG1, arm::Shift(arm::ShiftOp::kLSL, 3))); + } + } + else + { + const a64::Gp pc = pcp.select_register(TEMP_REG2, 8); + mov_reg_param(a, 4, pc, pcp); + + a.ubfx(TEMP_REG3, pc, m_hash.l1shift(), m_hash.l1bits()); // (pc >> m_l1shift) & m_l1mask + a.ldr(TEMP_REG3, a64::Mem(TEMP_REG1, TEMP_REG3, arm::Shift(arm::ShiftOp::kLSL, 3))); // TEMP_REG3 = m_base[mode][(pc >> m_l1shift) & m_l1mask] + + a.ubfx(TEMP_REG2, pc, m_hash.l2shift(), m_hash.l2bits()); // (pc >> m_l2shift) & m_l2mask + a.ldr(TEMP_REG1, a64::Mem(TEMP_REG3, TEMP_REG2, arm::Shift(arm::ShiftOp::kLSL, 3))); // x25 = m_base[mode][(pc >> m_l1shift) & m_l1mask][(pc >> m_l2shift) & m_l2mask] + } + } + + Label lab = a.newLabel(); + a.adr(REG_PARAM1, lab); + a.br(TEMP_REG1); + + a.bind(lab); + emit_str_mem(a, REG_PARAM1, &m_near.hashstacksave); + + a.mov(SCRATCH_REG1, 1); + emit_str_mem(a, SCRATCH_REG1.w(), &m_near.calldepth); + + mov_mem_param(a, 4, &m_state.exp, pcp); + + drccodeptr *const targetptr = exp.handle().codeptr_addr(); + if (*targetptr != nullptr) + { + call_arm_addr(a, *targetptr); + } + else + { + emit_ldr_mem(a, SCRATCH_REG1, targetptr); + a.blr(SCRATCH_REG1); + } +} + +void drcbe_arm64::op_jmp(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_any_condition(inst); + assert_no_flags(inst); + + const parameter &labelp = inst.param(0); + assert(labelp.is_code_label()); + + std::string labelName = util::string_format("PC$%x", labelp.label()); + Label jmptarget = a.labelByName(labelName.c_str()); + if (!jmptarget.isValid()) + jmptarget = a.newNamedLabel(labelName.c_str()); + + if (inst.condition() == uml::COND_ALWAYS) + a.b(jmptarget); + else + { + if (inst.condition() == COND_C || inst.condition() == COND_NC || inst.condition() == COND_A || inst.condition() == COND_BE) + load_carry(a, true); + + if (inst.condition() == uml::COND_U || inst.condition() == uml::COND_NU) + check_unordered_condition(a, inst.condition(), jmptarget, false); + else + a.b(ARM_CONDITION(a, inst.condition()), jmptarget); + } +} + +void drcbe_arm64::op_exh(a64::Assembler &a, const uml::instruction &inst) +{ + assert_any_condition(inst); + assert_no_flags(inst); + + const parameter &handp = inst.param(0); + assert(handp.is_code_handle()); + be_parameter exp(*this, inst.param(1), PTYPE_MRI); + + // perform the exception processing + Label no_exception; + if (inst.condition() != uml::COND_ALWAYS) + { + no_exception = a.newLabel(); + + if (inst.condition() == COND_C || inst.condition() == COND_NC || inst.condition() == COND_A || inst.condition() == COND_BE) + load_carry(a, true); + + if (inst.condition() == uml::COND_U || inst.condition() == uml::COND_NU) + check_unordered_condition(a, inst.condition(), no_exception, true); + else + a.b(ARM_NOT_CONDITION(a, inst.condition()), no_exception); + } + + Label lab = a.newLabel(); + emit_ldr_mem(a, SCRATCH_REG1.w(), &m_near.calldepth); + a.cbnz(SCRATCH_REG1, lab); + a.adr(SCRATCH_REG2, lab); + emit_str_mem(a, SCRATCH_REG2, &m_near.hashstacksave); + a.bind(lab); + a.add(SCRATCH_REG1, SCRATCH_REG1, 1); + emit_str_mem(a, SCRATCH_REG1.w(), &m_near.calldepth); + + mov_mem_param(a, 4, &m_state.exp, exp); + + drccodeptr *const targetptr = handp.handle().codeptr_addr(); + if (*targetptr != nullptr) + { + call_arm_addr(a, *targetptr); + } + else + { + emit_ldr_mem(a, SCRATCH_REG1, targetptr); + a.blr(SCRATCH_REG1); + } + + if (inst.condition() != uml::COND_ALWAYS) + a.bind(no_exception); +} + +void drcbe_arm64::op_callh(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_any_condition(inst); + assert_no_flags(inst); + + const parameter &handp = inst.param(0); + assert(handp.is_code_handle()); + + Label skip; + if (inst.condition() != uml::COND_ALWAYS) + { + skip = a.newLabel(); + + if (inst.condition() == COND_C || inst.condition() == COND_NC || inst.condition() == COND_A || inst.condition() == COND_BE) + load_carry(a, true); + + if (inst.condition() == uml::COND_U || inst.condition() == uml::COND_NU) + check_unordered_condition(a, inst.condition(), skip, true); + else + a.b(ARM_NOT_CONDITION(a, inst.condition()), skip); + } + + Label lab = a.newLabel(); + emit_ldr_mem(a, SCRATCH_REG1.w(), &m_near.calldepth); + a.cbnz(SCRATCH_REG1, lab); + a.adr(SCRATCH_REG2, lab); + emit_str_mem(a, SCRATCH_REG2, &m_near.hashstacksave); + a.bind(lab); + a.add(SCRATCH_REG1, SCRATCH_REG1, 1); + emit_str_mem(a, SCRATCH_REG1.w(), &m_near.calldepth); + + drccodeptr *const targetptr = handp.handle().codeptr_addr(); + if (*targetptr != nullptr) + { + call_arm_addr(a, *targetptr); + } + else + { + emit_ldr_mem(a, SCRATCH_REG1, targetptr); + a.blr(SCRATCH_REG1); + } + + if (inst.condition() != uml::COND_ALWAYS) + a.bind(skip); +} + +void drcbe_arm64::op_ret(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_any_condition(inst); + assert_no_flags(inst); + assert(inst.numparams() == 0); + + Label skip; + if (inst.condition() != uml::COND_ALWAYS) + { + skip = a.newLabel(); + + if (inst.condition() == COND_C || inst.condition() == COND_NC || inst.condition() == COND_A || inst.condition() == COND_BE) + load_carry(a, true); + + if (inst.condition() == uml::COND_U || inst.condition() == uml::COND_NU) + check_unordered_condition(a, inst.condition(), skip, true); + else + a.b(ARM_NOT_CONDITION(a, inst.condition()), skip); + } + + Label lab = a.newLabel(); + emit_ldr_mem(a, SCRATCH_REG1.w(), &m_near.calldepth); + a.cbz(SCRATCH_REG1, lab); + a.sub(SCRATCH_REG1, SCRATCH_REG1, 1); + emit_str_mem(a, SCRATCH_REG1.w(), &m_near.calldepth); + a.bind(lab); + + a.ldp(a64::x29, a64::x30, arm::Mem(a64::sp).post(16)); + a.ret(a64::x30); + + if (inst.condition() != uml::COND_ALWAYS) + a.bind(skip); +} + +void drcbe_arm64::op_callc(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_any_condition(inst); + assert_no_flags(inst); + + const parameter &funcp = inst.param(0); + assert(funcp.is_c_function()); + be_parameter paramp(*this, inst.param(1), PTYPE_M); + + Label skip; + if (inst.condition() != uml::COND_ALWAYS) + { + skip = a.newLabel(); + + if (inst.condition() == COND_C || inst.condition() == COND_NC || inst.condition() == COND_A || inst.condition() == COND_BE) + load_carry(a, true); + + if (inst.condition() == uml::COND_U || inst.condition() == uml::COND_NU) + check_unordered_condition(a, inst.condition(), skip, true); + else + a.b(ARM_NOT_CONDITION(a, inst.condition()), skip); + } + + emit_str_mem(a, FLAGS_REG.w(), &m_near.emulated_flags); + + get_imm_relative(a, REG_PARAM1, (uintptr_t)paramp.memory()); + get_imm_relative(a, TEMP_REG1, (uintptr_t)funcp.cfunc()); + a.blr(TEMP_REG1); + + emit_ldr_mem(a, FLAGS_REG.w(), &m_near.emulated_flags); + + if (inst.condition() != uml::COND_ALWAYS) + a.bind(skip); +} + +void drcbe_arm64::op_recover(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + + get_imm_relative(a, REG_PARAM1, (uintptr_t)&m_map); + emit_ldr_mem(a, REG_PARAM2, &m_near.hashstacksave); + a.mov(REG_PARAM3, inst.param(1).mapvar()); + + emit_ldr_mem(a, TEMP_REG1, &m_near.drcmap_get_value); + + a.blr(TEMP_REG1); + + mov_param_reg(a, inst.size(), dstp, REG_PARAM1); +} + +void drcbe_arm64::op_setfmod(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter srcp(*this, inst.param(0), PTYPE_MRI); + const a64::Gp src = srcp.select_register(TEMP_REG1, inst.size()); + const a64::Gp scratch = select_register(FUNC_SCRATCH_REG, inst.size()); + + if (srcp.is_immediate()) + { + a.mov(scratch, srcp.immediate() & 3); + } + else + { + mov_reg_param(a, inst.size(), src, srcp); + a.and_(scratch, src, 3); + } + + emit_strb_mem(a, scratch.w(), &m_state.fmod); +} + +void drcbe_arm64::op_getfmod(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + const a64::Gp dst = dstp.select_register(TEMP_REG1, inst.size()); + + emit_ldrb_mem(a, dst.w(), &m_state.fmod); + mov_param_reg(a, inst.size(), dstp, dst); +} + +void drcbe_arm64::op_getexp(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + const a64::Gp dst = dstp.select_register(TEMP_REG1, inst.size()); + + emit_ldr_mem(a, dst.w(), &m_state.exp); + mov_param_reg(a, inst.size(), dstp, dst); +} + +void drcbe_arm64::op_getflgs(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter maskp(*this, inst.param(1), PTYPE_I); + assert(maskp.is_immediate()); + + const a64::Gp dst = dstp.select_register(TEMP_REG1, 8); + + a.mov(dst, a64::xzr); + + if (maskp.immediate() & FLAG_C) + a.and_(dst, FLAGS_REG, 1); + + if (maskp.immediate() & FLAG_V) + { + a.cset(SCRATCH_REG1, a64::CondCode::kVS); + a.orr(dst, dst, SCRATCH_REG1, 1); + } + + if (maskp.immediate() & FLAG_Z) + { + a.cset(SCRATCH_REG1, a64::CondCode::kEQ); + a.orr(dst, dst, SCRATCH_REG1, 2); + } + + if (maskp.immediate() & FLAG_S) + { + a.cset(SCRATCH_REG1, a64::CondCode::kMI); + a.orr(dst, dst, SCRATCH_REG1, 3); + } + + if (maskp.immediate() & FLAG_U) + { + get_unordered(a, SCRATCH_REG1); + a.orr(dst, dst, SCRATCH_REG1, 4); + } + + mov_param_reg(a, inst.size(), dstp, dst); +} + +void drcbe_arm64::op_setflgs(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + + be_parameter flagsp(*this, inst.param(0), PTYPE_MRI); + + mov_reg_param(a, inst.size(), FLAGS_REG, flagsp); + + a.mrs(TEMP_REG1, a64::Predicate::SysReg::kNZCV); + + a.and_(TEMP_REG2, FLAGS_REG, 0b1100); // zero + sign + a.ubfx(TEMP_REG3, FLAGS_REG, 1, 1); // overflow flag + a.orr(TEMP_REG2, TEMP_REG2, TEMP_REG3); + a.bfi(TEMP_REG1, TEMP_REG2, 28, 4); + + a.msr(a64::Predicate::SysReg::kNZCV, TEMP_REG1); +} + +void drcbe_arm64::op_save(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_M); + + const a64::Gp membase = SCRATCH_REG1; + + get_imm_relative(a, membase, (uintptr_t)dstp.memory()); + + // Calculate flags to be stored + a.mrs(TEMP_REG1, a64::Predicate::SysReg::kNZCV); + a.lsr(TEMP_REG1, TEMP_REG1, 28); + + a.and_(TEMP_REG2, TEMP_REG1, 0b1100); // zero + sign + a.orr(TEMP_REG2, TEMP_REG2, FLAGS_REG); // carry + unordered flags + + a.bfi(TEMP_REG2, TEMP_REG1, 1, 1); // overflow flag + + a.strb(TEMP_REG2.w(), arm::Mem(membase, offsetof(drcuml_machine_state, flags))); + + emit_ldrb_mem(a, TEMP_REG1.w(), &m_state.fmod); + a.strb(TEMP_REG1.w(), arm::Mem(membase, offsetof(drcuml_machine_state, fmod))); + + emit_ldr_mem(a, TEMP_REG1.w(), &m_state.exp); + a.str(TEMP_REG1.w(), arm::Mem(membase, offsetof(drcuml_machine_state, exp))); + + int regoffs = offsetof(drcuml_machine_state, r); + for (int regnum = 0; regnum < std::size(m_state.r); regnum++) + { + if (int_register_map[regnum] != 0) + { + a.str(a64::Gp::fromTypeAndId(RegType::kARM_GpX, int_register_map[regnum]), arm::Mem(membase, regoffs + (8 * regnum))); + } + else + { + emit_ldr_mem(a, TEMP_REG1, &m_state.r[regnum].d); + a.str(TEMP_REG1, arm::Mem(membase, regoffs + (8 * regnum))); + } + } + + regoffs = offsetof(drcuml_machine_state, f); + for (int regnum = 0; regnum < std::size(m_state.f); regnum++) + { + if (float_register_map[regnum] != 0) + { + a.str(a64::Vec::fromTypeAndId(RegType::kARM_VecD, float_register_map[regnum]), arm::Mem(membase, regoffs + (8 * regnum))); + } + else + { + emit_ldr_mem(a, TEMP_REG1, &m_state.f[regnum].d); + a.str(TEMP_REG1, arm::Mem(membase, regoffs + (8 * regnum))); + } + } +} + +void drcbe_arm64::op_restore(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + + be_parameter srcp(*this, inst.param(0), PTYPE_M); + + const a64::Gp membase = SCRATCH_REG1; + + get_imm_relative(a, membase, (uintptr_t)srcp.memory()); + + int regoffs = offsetof(drcuml_machine_state, r); + for (int regnum = 0; regnum < std::size(m_state.r); regnum++) + { + if (int_register_map[regnum] != 0) + { + a.ldr(a64::Gp::fromTypeAndId(RegType::kARM_GpX, int_register_map[regnum]), arm::Mem(membase, regoffs + (8 * regnum))); + } + else + { + a.ldr(TEMP_REG1, arm::Mem(membase, regoffs + (8 * regnum))); + emit_str_mem(a, TEMP_REG1, &m_state.r[regnum].d); + } + } + + regoffs = offsetof(drcuml_machine_state, f); + for (int regnum = 0; regnum < std::size(m_state.f); regnum++) + { + if (float_register_map[regnum] != 0) + { + a.ldr(a64::Vec::fromTypeAndId(RegType::kARM_VecD, float_register_map[regnum]), arm::Mem(membase, regoffs + (8 * regnum))); + } + else + { + a.ldr(TEMP_REG1, arm::Mem(membase, regoffs + (8 * regnum))); + emit_str_mem(a, TEMP_REG1, &m_state.f[regnum].d); + } + } + + a.ldrb(TEMP_REG1.w(), arm::Mem(membase, offsetof(drcuml_machine_state, fmod))); + emit_strb_mem(a, TEMP_REG1.w(), &m_state.fmod); + + a.ldr(TEMP_REG1.w(), arm::Mem(membase, offsetof(drcuml_machine_state, exp))); + emit_str_mem(a, TEMP_REG1.w(), &m_state.exp); + + a.ldrb(FLAGS_REG.w(), arm::Mem(membase, offsetof(drcuml_machine_state, flags))); + + a.mrs(TEMP_REG1, a64::Predicate::SysReg::kNZCV); + + a.and_(TEMP_REG2, FLAGS_REG, 0b1100); // zero + sign + a.ubfx(TEMP_REG3, FLAGS_REG, 1, 1); // overflow flag + a.orr(TEMP_REG2, TEMP_REG2, TEMP_REG3); + a.bfi(TEMP_REG1, TEMP_REG2, 28, 4); + + a.msr(a64::Predicate::SysReg::kNZCV, TEMP_REG1); +} + +void drcbe_arm64::op_load(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter basep(*this, inst.param(1), PTYPE_M); + be_parameter indp(*this, inst.param(2), PTYPE_MRI); + const parameter &scalesizep = inst.param(3); + assert(scalesizep.is_size_scale()); + const int size = scalesizep.size(); + + const a64::Gp basereg = TEMP_REG1; + const a64::Gp dstreg = dstp.select_register(TEMP_REG2, inst.size()); + + const int32_t offset = indp.is_immediate() ? indp.immediate() << scalesizep.scale() : 0; + if (indp.is_immediate() && is_valid_immediate(offset, 15)) + { + const auto memptr = &reinterpret_cast(basep.memory())[offset]; + + // immediate index + if (size == SIZE_BYTE) + emit_ldrb_mem(a, dstreg.w(), memptr); + else if (size == SIZE_WORD) + emit_ldrh_mem(a, dstreg.w(), memptr); + else if (size == SIZE_DWORD) + emit_ldr_mem(a, dstreg.w(), memptr); + else + emit_ldr_mem(a, dstreg.x(), memptr); + } + else + { + get_imm_relative(a, basereg, uint64_t(basep.memory())); + + const a64::Gp offsreg = indp.select_register(TEMP_REG3, 4); + mov_reg_param(a, 4, offsreg, indp); + + // the scale needs to match the load size for shifting to be allowed + auto mem = arm::Mem(basereg, offsreg, arm::Shift(arm::ShiftOp::kLSL, scalesizep.scale())); + if (scalesizep.scale() != size) + { + if (scalesizep.scale() != 0) + { + a.add(basereg, basereg, offsreg, arm::Shift(arm::ShiftOp::kLSL, scalesizep.scale())); + mem = arm::Mem(basereg); + } + else + { + mem = arm::Mem(basereg, offsreg); + } + } + + if (size == SIZE_BYTE) + a.ldrb(dstreg.w(), mem); + else if (size == SIZE_WORD) + a.ldrh(dstreg.w(), mem); + else if (size == SIZE_DWORD) + a.ldr(dstreg.w(), mem); + else + a.ldr(dstreg, mem); + } + + mov_param_reg(a, inst.size(), dstp, dstreg); +} + +void drcbe_arm64::op_loads(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter basep(*this, inst.param(1), PTYPE_M); + be_parameter indp(*this, inst.param(2), PTYPE_MRI); + const parameter &scalesizep = inst.param(3); + assert(scalesizep.is_size_scale()); + const int size = scalesizep.size(); + + const a64::Gp basereg = TEMP_REG1; + const a64::Gp dstreg = dstp.select_register(TEMP_REG2, inst.size()); + + const int32_t offset = indp.is_immediate() ? indp.immediate() << scalesizep.scale() : 0; + if (indp.is_immediate() && is_valid_immediate(offset, 15)) + { + // immediate index + if (size == SIZE_BYTE) + emit_ldrsb_mem(a, dstreg.x(), (uint8_t*)basep.memory() + offset); + else if (size == SIZE_WORD) + emit_ldrsh_mem(a, dstreg.x(), (uint8_t*)basep.memory() + offset); + else if (size == SIZE_DWORD) + emit_ldrsw_mem(a, dstreg.x(), (uint8_t*)basep.memory() + offset); + else + emit_ldr_mem(a, dstreg.x(), (uint8_t*)basep.memory() + offset); + } + else + { + get_imm_relative(a, basereg, uint64_t(basep.memory())); + + const a64::Gp offsreg = indp.select_register(TEMP_REG3, 8); + mov_reg_param(a, 4, offsreg, indp); + + // the scale needs to match the load size for shifting to be allowed + auto mem = arm::Mem(basereg, offsreg, arm::Shift(arm::ShiftOp::kLSL, scalesizep.scale())); + if (scalesizep.scale() != size) + { + if (scalesizep.scale() != 0) + { + a.add(basereg, basereg, offsreg, arm::Shift(arm::ShiftOp::kLSL, scalesizep.scale())); + mem = arm::Mem(basereg); + } + else + { + mem = arm::Mem(basereg, offsreg); + } + } + + if (size == SIZE_BYTE) + a.ldrsb(dstreg, mem); + else if (size == SIZE_WORD) + a.ldrsh(dstreg, mem); + else if (size == SIZE_DWORD && inst.size() == 8) + a.ldrsw(dstreg, mem); + else + a.ldr(dstreg, mem); + } + + mov_param_reg(a, inst.size(), dstp, dstreg); +} + +void drcbe_arm64::op_store(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter basep(*this, inst.param(0), PTYPE_M); + be_parameter indp(*this, inst.param(1), PTYPE_MRI); + be_parameter srcp(*this, inst.param(2), PTYPE_MRI); + const parameter &scalesizep = inst.param(3); + const int size = scalesizep.size(); + + const a64::Gp basereg = TEMP_REG1; + + const int32_t offset = indp.is_immediate() ? indp.immediate() << scalesizep.scale() : 0; + if (indp.is_immediate() && is_valid_immediate(offset, 15)) + { + const a64::Gp srcreg = srcp.select_register(TEMP_REG2, inst.size()); + mov_reg_param(a, inst.size(), srcreg, srcp); + + if (size == SIZE_BYTE) + emit_strb_mem(a, srcreg.w(), (uint8_t*)basep.memory() + offset); + else if (size == SIZE_WORD) + emit_strh_mem(a, srcreg.w(), (uint8_t*)basep.memory() + offset); + else if (size == SIZE_DWORD) + emit_str_mem(a, srcreg.w(), (uint8_t*)basep.memory() + offset); + else + emit_str_mem(a, srcreg.x(), (uint8_t*)basep.memory() + offset); + } + else + { + get_imm_relative(a, basereg, uint64_t(basep.memory())); + + const a64::Gp srcreg = srcp.select_register(TEMP_REG2, inst.size()); + const a64::Gp offsreg = indp.select_register(TEMP_REG3, 8); + + mov_reg_param(a, 4, srcreg, srcp); + mov_reg_param(a, 4, offsreg, indp); + + // the scale needs to match the store size for shifting to be allowed + auto mem = arm::Mem(basereg, offsreg, arm::Shift(arm::ShiftOp::kLSL, scalesizep.scale())); + if (scalesizep.scale() != size) + { + if (scalesizep.scale() != 0) + { + a.add(basereg, basereg, offsreg, arm::Shift(arm::ShiftOp::kLSL, scalesizep.scale())); + mem = arm::Mem(basereg); + } + else + { + mem = arm::Mem(basereg, offsreg); + } + } + + if (size == SIZE_BYTE) + a.strb(srcreg.w(), mem); + else if (size == SIZE_WORD) + a.strh(srcreg.w(), mem); + else if (size == SIZE_DWORD) + a.str(srcreg.w(), mem); + else + a.str(srcreg, mem); + } +} + +void drcbe_arm64::op_read(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter addrp(*this, inst.param(1), PTYPE_MRI); + const parameter &spacesizep = inst.param(2); + assert(spacesizep.is_size_space()); + + const auto &trampolines = m_accessors[spacesizep.space()]; + const auto &resolved = m_resolved_accessors[spacesizep.space()]; + + mov_reg_param(a, 4, REG_PARAM2, addrp); + + if (spacesizep.size() == SIZE_BYTE) + { + if (resolved.read_byte.func) + { + get_imm_relative(a, REG_PARAM1, resolved.read_byte.obj); + call_arm_addr(a, resolved.read_byte.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.read_byte); + a.blr(TEMP_REG1); + } + } + else if (spacesizep.size() == SIZE_WORD) + { + if (resolved.read_word.func) + { + get_imm_relative(a, REG_PARAM1, resolved.read_word.obj); + call_arm_addr(a, resolved.read_word.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.read_word); + a.blr(TEMP_REG1); + } + } + else if (spacesizep.size() == SIZE_DWORD) + { + if (resolved.read_dword.func) + { + get_imm_relative(a, REG_PARAM1, resolved.read_dword.obj); + call_arm_addr(a, resolved.read_dword.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.read_dword); + a.blr(TEMP_REG1); + } + } + else if (spacesizep.size() == SIZE_QWORD) + { + if (resolved.read_qword.func) + { + get_imm_relative(a, REG_PARAM1, resolved.read_qword.obj); + call_arm_addr(a, resolved.read_qword.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.read_qword); + a.blr(TEMP_REG1); + } + } + + mov_param_reg(a, inst.size(), dstp, REG_PARAM1); +} + +void drcbe_arm64::op_readm(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter addrp(*this, inst.param(1), PTYPE_MRI); + be_parameter maskp(*this, inst.param(2), PTYPE_MRI); + const parameter &spacesizep = inst.param(3); + assert(spacesizep.is_size_space()); + + const auto &trampolines = m_accessors[spacesizep.space()]; + const auto &resolved = m_resolved_accessors[spacesizep.space()]; + + mov_reg_param(a, 4, REG_PARAM2, addrp); + mov_reg_param(a, inst.size(), REG_PARAM3, maskp); + + if (spacesizep.size() == SIZE_WORD) + { + if (resolved.read_word_masked.func) + { + get_imm_relative(a, REG_PARAM1, resolved.read_word_masked.obj); + call_arm_addr(a, resolved.read_word_masked.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.read_word_masked); + a.blr(TEMP_REG1); + } + } + else if (spacesizep.size() == SIZE_DWORD) + { + if (resolved.read_dword_masked.func) + { + get_imm_relative(a, REG_PARAM1, resolved.read_dword_masked.obj); + call_arm_addr(a, resolved.read_dword_masked.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.read_dword_masked); + a.blr(TEMP_REG1); + } + } + else if (spacesizep.size() == SIZE_QWORD) + { + if (resolved.read_qword_masked.func) + { + get_imm_relative(a, REG_PARAM1, resolved.read_qword_masked.obj); + call_arm_addr(a, resolved.read_qword_masked.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.read_qword_masked); + a.blr(TEMP_REG1); + } + } + + mov_param_reg(a, inst.size(), dstp, REG_PARAM1); +} + +void drcbe_arm64::op_write(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter addrp(*this, inst.param(0), PTYPE_MRI); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + const parameter &spacesizep = inst.param(2); + assert(spacesizep.is_size_space()); + + const auto &trampolines = m_accessors[spacesizep.space()]; + const auto &resolved = m_resolved_accessors[spacesizep.space()]; + + mov_reg_param(a, 4, REG_PARAM2, addrp); + mov_reg_param(a, inst.size(), REG_PARAM3, srcp); + + if (spacesizep.size() == SIZE_BYTE) + { + if (resolved.write_byte.func) + { + get_imm_relative(a, REG_PARAM1, resolved.write_byte.obj); + call_arm_addr(a, resolved.write_byte.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.write_byte); + a.blr(TEMP_REG1); + } + } + else if (spacesizep.size() == SIZE_WORD) + { + if (resolved.write_word.func) + { + get_imm_relative(a, REG_PARAM1, resolved.write_word.obj); + call_arm_addr(a, resolved.write_word.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.write_word); + a.blr(TEMP_REG1); + } + } + else if (spacesizep.size() == SIZE_DWORD) + { + if (resolved.write_dword.func) + { + get_imm_relative(a, REG_PARAM1, resolved.write_dword.obj); + call_arm_addr(a, resolved.write_dword.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.write_dword); + a.blr(TEMP_REG1); + } + } + else if (spacesizep.size() == SIZE_QWORD) + { + if (resolved.write_qword.func) + { + get_imm_relative(a, REG_PARAM1, resolved.write_qword.obj); + call_arm_addr(a, resolved.write_qword.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.write_qword); + a.blr(TEMP_REG1); + } + } +} + +void drcbe_arm64::op_writem(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter addrp(*this, inst.param(0), PTYPE_MRI); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + be_parameter maskp(*this, inst.param(2), PTYPE_MRI); + const parameter &spacesizep = inst.param(3); + assert(spacesizep.is_size_space()); + + // set up a call to the write handler + const auto &trampolines = m_accessors[spacesizep.space()]; + const auto &resolved = m_resolved_accessors[spacesizep.space()]; + + mov_reg_param(a, 4, REG_PARAM2, addrp); + mov_reg_param(a, inst.size(), REG_PARAM3, srcp); + mov_reg_param(a, inst.size(), REG_PARAM4, maskp); + + if (spacesizep.size() == SIZE_WORD) + { + if (resolved.write_word_masked.func) + { + get_imm_relative(a, REG_PARAM1, resolved.write_word_masked.obj); + call_arm_addr(a, resolved.write_word_masked.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.write_word_masked); + a.blr(TEMP_REG1); + } + } + else if (spacesizep.size() == SIZE_DWORD) + { + if (resolved.write_dword_masked.func) + { + get_imm_relative(a, REG_PARAM1, resolved.write_dword_masked.obj); + call_arm_addr(a, resolved.write_dword_masked.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.write_dword_masked); + a.blr(TEMP_REG1); + } + } + else if (spacesizep.size() == SIZE_QWORD) + { + if (resolved.write_qword_masked.func) + { + get_imm_relative(a, REG_PARAM1, resolved.write_qword_masked.obj); + call_arm_addr(a, resolved.write_qword_masked.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.write_qword_masked); + a.blr(TEMP_REG1); + } + } +} + +void drcbe_arm64::op_carry(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_C); + + be_parameter srcp(*this, inst.param(0), PTYPE_MRI); + be_parameter bitp(*this, inst.param(1), PTYPE_MRI); + + const a64::Gp src = srcp.select_register(TEMP_REG1, inst.size()); + const a64::Gp scratch = select_register(FUNC_SCRATCH_REG, inst.size()); + + // load non-immediate bit numbers into a register + // flags = (flags & ~FLAG_C) | ((src >> (PARAM1 & 31)) & FLAG_C) + + if (srcp.is_immediate() && bitp.is_immediate()) + { + a.mov(scratch, BIT(srcp.immediate(), bitp.immediate())); + a.bfi(FLAGS_REG, scratch.x(), 0, 1); + } + else if (bitp.is_immediate()) + { + const auto shift = bitp.immediate() % (inst.size() * 8); + + mov_reg_param(a, inst.size(), src, srcp); + + // move carry bit to lsb + if (shift != 0) + { + a.lsr(scratch, src, shift); + store_carry_reg(a, scratch); + } + else + { + store_carry_reg(a, src); + } + } + else + { + const a64::Gp shift = bitp.select_register(TEMP_REG2, inst.size()); + + mov_reg_param(a, inst.size(), src, srcp); + mov_reg_param(a, inst.size(), shift, bitp); + + a.and_(shift, shift, inst.size() * 8 - 1); + + // move carry bit to lsb + a.lsr(scratch, src, shift); + store_carry_reg(a, scratch); + } +} + +void drcbe_arm64::op_set(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_any_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + + if (inst.condition() == uml::COND_ALWAYS) + { + mov_param_imm(a, inst.size(), dstp, 1); + return; + } + else + { + const a64::Gp dst = dstp.select_register(TEMP_REG1, inst.size()); + + if (inst.condition() == COND_C || inst.condition() == COND_NC || inst.condition() == COND_A || inst.condition() == COND_BE) + load_carry(a, true); + + if (inst.condition() == uml::COND_U || inst.condition() == uml::COND_NU) + { + get_unordered(a, dst); + + if (inst.condition() == uml::COND_NU) + a.eor(dst, dst, 1); + } + else + a.cset(dst, ARM_CONDITION(a, inst.condition())); + + mov_param_reg(a, inst.size(), dstp, dst); + } +} + +void drcbe_arm64::op_mov(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_any_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + + // add a conditional branch unless a conditional move is possible + Label skip; + + if (inst.condition() != uml::COND_ALWAYS) + { + skip = a.newLabel(); + + if (inst.condition() == COND_C || inst.condition() == COND_NC || inst.condition() == COND_A || inst.condition() == COND_BE) + load_carry(a, true); + + if (inst.condition() == uml::COND_U || inst.condition() == uml::COND_NU) + check_unordered_condition(a, inst.condition(), skip, true); + else + a.b(ARM_NOT_CONDITION(a, inst.condition()), skip); + } + + mov_param_param(a, inst.size(), dstp, srcp); + + if (inst.condition() != uml::COND_ALWAYS) + a.bind(skip); +} + +void drcbe_arm64::op_sext(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_S | FLAG_Z); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + const parameter &sizep = inst.param(2); + assert(sizep.is_size()); + const auto size = sizep.size(); + + const a64::Gp dstreg = dstp.select_register(TEMP_REG2, inst.size()); + + if ((1 << size) >= inst.size()) + { + mov_param_param(a, inst.size(), dstp, srcp); + } + else + { + if (srcp.is_memory()) + { + if (size == SIZE_BYTE) + emit_ldrsb_mem(a, dstreg.x(), srcp.memory()); + else if (size == SIZE_WORD) + emit_ldrsh_mem(a, dstreg.x(), srcp.memory()); + else if (size == SIZE_DWORD) + emit_ldrsw_mem(a, dstreg.x(), srcp.memory()); + else if (size == SIZE_QWORD) + emit_ldr_mem(a, dstreg.x(), srcp.memory()); + } + else + { + const a64::Gp tempreg = srcp.select_register(dstreg, 8); + mov_reg_param(a, inst.size(), tempreg, srcp); + + if (size == SIZE_BYTE) + a.sxtb(dstreg.x(), tempreg.w()); + else if (size == SIZE_WORD) + a.sxth(dstreg.x(), tempreg.w()); + else if (size == SIZE_DWORD) + a.sxtw(dstreg.x(), tempreg.w()); + } + + mov_param_reg(a, inst.size(), dstp, dstreg); + } + + if (inst.flags()) + a.tst(dstreg, dstreg); +} + +void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_S | FLAG_Z); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + be_parameter shiftp(*this, inst.param(2), PTYPE_MRI); + be_parameter maskp(*this, inst.param(3), PTYPE_MRI); + + const a64::Gp output = dstp.select_register(TEMP_REG1, inst.size()); + const a64::Gp shift = shiftp.select_register(TEMP_REG2, inst.size()); + const a64::Gp scratch = shiftp.select_register(FUNC_SCRATCH_REG, inst.size()); + const uint64_t instbits = inst.size() * 8; + + if (maskp.is_immediate() && maskp.is_immediate_value(0)) + { + // A zero mask will always result in zero so optimize it out + const a64::Gp zero = select_register(a64::xzr, inst.size()); + + mov_param_reg(a, inst.size(), dstp, zero); + + if (inst.flags()) + a.tst(zero, zero); + + return; + } + + bool optimized = false; + if (srcp.is_immediate() && shiftp.is_immediate() && maskp.is_immediate()) + { + // Optimize all constant inputs into a single mov + uint64_t result = srcp.immediate(); + + if (shiftp.immediate() != 0) + { + if (inst.size() == 4) + result = rotl_32(result, shiftp.immediate()); + else + result = rotl_64(result, shiftp.immediate()); + } + + a.mov(output, result & maskp.immediate()); + + optimized = true; + } + else if (maskp.is_immediate() && shiftp.is_immediate() && !maskp.is_immediate_value(util::make_bitmask(instbits))) + { + // A mask of all 1s will be handled efficiently in the unoptimized path, so only optimize for the other cases if possible + const auto pop = population_count_64(maskp.immediate()); + const auto lz = count_leading_zeros_64(maskp.immediate()) & (instbits - 1); + const auto invlamask = ~(maskp.immediate() << lz) & util::make_bitmask(instbits); + const bool is_right_aligned = (maskp.immediate() & (maskp.immediate() + 1)) == 0; + const bool is_contiguous = (invlamask & (invlamask + 1)) == 0; + const auto s = shiftp.immediate() & (instbits - 1); + + if (is_right_aligned || is_contiguous) + { + mov_reg_param(a, inst.size(), output, srcp); + optimized = true; + } + + if (is_right_aligned) + { + // Optimize a contiguous right-aligned mask + const auto s2 = (instbits - s) & (instbits - 1); + + if (s >= pop) + { + a.ubfx(output, output, s2, pop); + } + else + { + if (s2 > 0) + a.ror(output, output, s2); + + a.bfc(output, pop, instbits - pop); + } + } + else if (is_contiguous) + { + // Optimize a contiguous mask + auto const rot = ((instbits * 2) - s - pop - lz) & (instbits - 1); + + if (rot > 0) + a.ror(output, output, rot); + + a.ubfiz(output, output, instbits - pop - lz, pop); + } + } + + if (!optimized) + { + mov_reg_param(a, inst.size(), output, srcp); + + if (shiftp.is_immediate()) + { + const auto s = -int64_t(shiftp.immediate()) & (instbits - 1); + + if (s != 0) + a.ror(output, output, s); + } + else + { + const a64::Gp scratch2 = select_register(SCRATCH_REG2, inst.size()); + + mov_reg_param(a, inst.size(), shift, shiftp); + + a.and_(scratch, shift, inst.size() * 8 - 1); + a.mov(scratch2, instbits); + a.sub(scratch, scratch2, scratch); + a.ror(output, output, scratch); + } + + // srcp and the results of the rors above are already going to the output register, so if the mask is all 1s this can all be skipped + if (maskp.is_immediate() && is_valid_immediate_mask(maskp.immediate(), inst.size())) + { + a.ands(output, output, maskp.immediate()); + } + else if (!maskp.is_immediate() || maskp.immediate() != util::make_bitmask(instbits)) + { + const a64::Gp mask = maskp.select_register(TEMP_REG2, inst.size()); + mov_reg_param(a, inst.size(), mask, maskp); + + a.ands(output, output, mask); + } + } + + mov_param_reg(a, inst.size(), dstp, output); + + if (inst.flags()) + a.tst(output, output); +} + +void drcbe_arm64::op_rolins(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_S | FLAG_Z); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + be_parameter shiftp(*this, inst.param(2), PTYPE_MRI); + be_parameter maskp(*this, inst.param(3), PTYPE_MRI); + const uint64_t instbits = inst.size() * 8; + + a64::Gp dst; + + if (maskp.is_immediate() && maskp.is_immediate_value(0)) + { + // A zero mask means no bits will be inserted so it can be optimized out + if (inst.flags()) + { + dst = dstp.select_register(TEMP_REG2, inst.size()); + mov_reg_param(a, inst.size(), dst, dstp); + a.tst(dst, dst); + } + + return; + } + + bool can_use_dst_reg = dstp.is_int_register(); + if (can_use_dst_reg && srcp.is_int_register()) + can_use_dst_reg = srcp.ireg() != dstp.ireg(); + if (can_use_dst_reg && maskp.is_int_register()) + can_use_dst_reg = maskp.ireg() != dstp.ireg(); + if (can_use_dst_reg && shiftp.is_int_register()) + can_use_dst_reg = shiftp.ireg() != dstp.ireg(); + + bool optimized = false; + if (srcp.is_immediate() && maskp.is_immediate() && shiftp.is_immediate() && maskp.is_immediate_value(util::make_bitmask(instbits))) + { + dst = dstp.select_register(TEMP_REG2, inst.size()); + + uint64_t result = 0; + if (inst.size() == 4) + result = rotl_32(srcp.immediate(), shiftp.immediate()); + else + result = rotl_64(srcp.immediate(), shiftp.immediate()); + + a.mov(dst, result); + + optimized = true; + } + else if (maskp.is_immediate() && shiftp.is_immediate() && maskp.is_immediate_value(util::make_bitmask(instbits))) + { + // a mask of all 1s means that the result of the rol will completely overwrite + // the output value, so just load the source value into the output register and rol on that + dst = dstp.select_register(TEMP_REG2, inst.size()); + mov_reg_param(a, inst.size(), dst, srcp); + + const auto shift = -int64_t(shiftp.immediate()) & (instbits - 1); + + if (shift != 0) + a.ror(dst, dst, shift); + + optimized = true; + } + else if (maskp.is_immediate() && shiftp.is_immediate()) + { + const auto pop = population_count_64(maskp.immediate()); + const auto lz = count_leading_zeros_64(maskp.immediate()) & (instbits - 1); + const auto invlamask = ~(maskp.immediate() << lz) & util::make_bitmask(instbits); + const bool is_right_aligned = (maskp.immediate() & (maskp.immediate() + 1)) == 0; + const bool is_contiguous = (invlamask & (invlamask + 1)) == 0; + const auto s = shiftp.immediate() & (instbits - 1); + + const a64::Gp src = select_register(SCRATCH_REG2, inst.size()); + + if (is_right_aligned || is_contiguous) + { + uint32_t rot = 0; + uint32_t lsb = 0; + + dst = can_use_dst_reg ? dstp.select_register(SCRATCH_REG1, inst.size()) : select_register(SCRATCH_REG1, inst.size()); + mov_reg_param(a, inst.size(), dst, dstp); + + if (is_right_aligned) + { + // Optimize a contiguous right-aligned mask + rot = instbits - s; + } + else if (is_contiguous) + { + // Optimize a contiguous mask + rot = (instbits * 2) - s - pop - lz; + lsb = instbits - pop - lz; + } + + rot &= instbits - 1; + + if (srcp.is_immediate() && rot > 0) + { + // save some instructions by avoid mov to register by computing the ror and storing it into src directly + uint64_t result = 0; + + if (inst.size() == 4) + result = rotr_32(srcp.immediate(), rot); + else + result = rotr_64(srcp.immediate(), rot); + + a.mov(src, result); + + a.bfi(dst, src, lsb, pop); + + optimized = true; + } + else + { + mov_reg_param(a, inst.size(), src, srcp); + + if (rot > 0) + a.ror(src, src, rot); + + a.bfi(dst, src, lsb, pop); + + optimized = true; + } + } + else if (srcp.is_immediate()) + { + const a64::Gp scratch = select_register(SCRATCH_REG1, inst.size()); + + dst = dstp.select_register(TEMP_REG2, inst.size()); + + // val1 = src & ~PARAM3 + if (is_valid_immediate_mask(maskp.immediate(), inst.size())) + { + a.and_(dst, dst, ~maskp.immediate()); + } + else + { + a.mov(scratch, ~maskp.immediate()); + a.and_(dst, dst, scratch); + } + + uint64_t result = 0; + if (inst.size() == 4) + result = rotl_32(srcp.immediate(), s) & maskp.immediate(); + else + result = rotl_64(srcp.immediate(), s) & maskp.immediate(); + + if (result != 0) + { + if (is_valid_immediate(result, 12)) + { + a.orr(dst, dst, result); + } + else + { + a.mov(scratch, result); + a.orr(dst, dst, select_register(scratch, inst.size())); + } + } + + optimized = true; + } + } + + if (!optimized) + { + dst = can_use_dst_reg ? dstp.select_register(TEMP_REG2, inst.size()) : select_register(TEMP_REG2, inst.size()); + mov_reg_param(a, inst.size(), dst, dstp); + + const a64::Gp src = srcp.select_register(TEMP_REG1, inst.size()); + const a64::Gp scratch = select_register(SCRATCH_REG1, inst.size()); + + mov_reg_param(a, inst.size(), src, srcp); + + if (shiftp.is_immediate()) + { + const auto shift = -int64_t(shiftp.immediate()) & ((inst.size() * 8) - 1); + + if (shift != 0) + a.ror(scratch, src, shift); + else + a.mov(scratch, src); + } + else + { + const a64::Gp shift = shiftp.select_register(SCRATCH_REG2, inst.size()); + const a64::Gp scratch2 = shiftp.select_register(FUNC_SCRATCH_REG, inst.size()); + mov_reg_param(a, inst.size(), shift, shiftp); + + a.mov(scratch, inst.size() * 8); + a.and_(scratch2, shift, inst.size() * 8 - 1); + a.sub(scratch2, scratch, scratch2); + a.ror(scratch, src, scratch2); + } + + const a64::Gp mask = maskp.select_register(SCRATCH_REG2, inst.size()); + mov_reg_param(a, inst.size(), mask, maskp); + + a.bic(dst, dst, mask); // val1 = src & ~PARAM3 + a.and_(scratch, scratch, mask); // val2 = val2 & PARAM3 + a.orr(dst, dst, scratch); // val1 | val2 + } + + mov_param_reg(a, inst.size(), dstp, dst); + + if (inst.flags()) + a.tst(dst, dst); +} + +template void drcbe_arm64::op_add(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + const a64::Gp output = dstp.select_register(TEMP_REG3, inst.size()); + + if (Opcode == a64::Inst::kIdAdcs) + load_carry(a); + + if (src1p.is_immediate() && is_valid_immediate(src1p.immediate(), 11)) + { + const a64::Gp src = src2p.select_register(TEMP_REG2, inst.size()); + + mov_reg_param(a, inst.size(), src, src2p); + if (src1p.immediate() == 0) + a.emit(Opcode, output, src, select_register(a64::xzr, inst.size())); + else + a.emit(Opcode, output, src, src1p.immediate()); + mov_param_reg(a, inst.size(), dstp, output); + } + else if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11)) + { + const a64::Gp src = src1p.select_register(TEMP_REG1, inst.size()); + + mov_reg_param(a, inst.size(), src, src1p); + if (src2p.is_immediate_value(0)) + a.emit(Opcode, output, src, select_register(a64::xzr, inst.size())); + else + a.emit(Opcode, output, src, src2p.immediate()); + mov_param_reg(a, inst.size(), dstp, output); + } + else + { + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); + + mov_reg_param(a, inst.size(), src1, src1p); + mov_reg_param(a, inst.size(), src2, src2p); + a.emit(Opcode, output, src1, src2); + mov_param_reg(a, inst.size(), dstp, output); + } + + store_carry(a); +} + +template void drcbe_arm64::op_sub(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + if (Opcode == a64::Inst::kIdSbcs) + load_carry(a, true); + + const a64::Gp output = dstp.select_register(TEMP_REG3, inst.size()); + + if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11)) + { + const a64::Gp src = select_register(TEMP_REG1, inst.size()); + + mov_reg_param(a, inst.size(), src, src1p); + if (src2p.is_immediate_value(0)) + a.emit(Opcode, output, src, select_register(a64::xzr, inst.size())); + else + a.emit(Opcode, output, src, src2p.immediate()); + mov_param_reg(a, inst.size(), dstp, output); + } + else + { + const a64::Gp src1 = select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = select_register(TEMP_REG2, inst.size()); + + mov_reg_param(a, inst.size(), src1, src1p); + mov_reg_param(a, inst.size(), src2, src2p); + a.emit(Opcode, output, src1, src2); + mov_param_reg(a, inst.size(), dstp, output); + } + + store_carry(a, true); +} + +void drcbe_arm64::op_cmp(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S); + + be_parameter src1p(*this, inst.param(0), PTYPE_MRI); + be_parameter src2p(*this, inst.param(1), PTYPE_MRI); + + const a64::Gp temp = select_register(TEMP_REG1, inst.size()); + const a64::Gp temp2 = select_register(TEMP_REG2, inst.size()); + + mov_reg_param(a, inst.size(), temp, src1p); + + if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11)) + { + if (src2p.is_immediate_value(0)) + a.cmp(temp, select_register(a64::xzr, inst.size())); + else + a.cmp(temp, src2p.immediate()); + } + else + { + mov_reg_param(a, inst.size(), temp2, src2p); + a.cmp(temp, temp2); + } + + store_carry(a, true); +} + +void drcbe_arm64::op_mulu(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter edstp(*this, inst.param(1), PTYPE_MR); + be_parameter src1p(*this, inst.param(2), PTYPE_MRI); + be_parameter src2p(*this, inst.param(3), PTYPE_MRI); + const bool compute_hi = (dstp != edstp); + + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); + const a64::Gp lo = TEMP_REG3; + const a64::Gp hi = TEMP_REG2; + + if ((src1p.is_immediate() && src1p.is_immediate_value(0)) || (src2p.is_immediate() && src2p.is_immediate_value(0))) + { + a.mov(lo, a64::xzr); + a.mov(hi, a64::xzr); + } + else + { + mov_reg_param(a, inst.size(), src1, src1p); + mov_reg_param(a, inst.size(), src2, src2p); + + if (inst.size() == 8) + { + a.mul(lo, src1, src2); + a.umulh(hi, src1, src2); + } + else + { + a.umull(lo, src1, src2); + a.lsr(hi, lo, 32); + } + } + + mov_param_reg(a, inst.size(), dstp, lo); + if (compute_hi) + mov_param_reg(a, inst.size(), edstp, hi); + + if (inst.flags()) + { + a.mrs(SCRATCH_REG1, a64::Predicate::SysReg::kNZCV); + + a.tst(lo, lo); + a.cset(TEMP_REG1, a64::CondCode::kEQ); + a.tst(hi, hi); + a.cset(TEMP_REG3, a64::CondCode::kEQ); + a.and_(TEMP_REG1, TEMP_REG1, TEMP_REG3); + a.bfi(SCRATCH_REG1, TEMP_REG1, 30, 1); // zero flag + + a.tst(hi, hi); // overflow check + a.cset(TEMP_REG3, a64::CondCode::kNE); + a.bfi(SCRATCH_REG1, TEMP_REG3, 28, 1); // overflow flag + + a.lsr(TEMP_REG3, hi, inst.size() * 8 - 1); // take top bit of result as sign flag + a.bfi(SCRATCH_REG1, TEMP_REG3, 31, 1); // sign flag + + a.msr(a64::Predicate::SysReg::kNZCV, SCRATCH_REG1); + } +} + +void drcbe_arm64::op_mululw(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); + const a64::Gp lo = TEMP_REG3; + const a64::Gp hi = TEMP_REG2; + + if ((src1p.is_immediate() && src1p.is_immediate_value(0)) || (src2p.is_immediate() && src2p.is_immediate_value(0))) + { + a.mov(lo, a64::xzr); + a.mov(hi, a64::xzr); + } + else + { + mov_reg_param(a, inst.size(), src1, src1p); + mov_reg_param(a, inst.size(), src2, src2p); + + if (inst.size() == 8) + { + a.mul(lo, src1, src2); + a.umulh(hi, src1, src2); + } + else + { + a.umull(lo, src1, src2); + a.lsr(hi, lo, 32); + } + } + + mov_param_reg(a, inst.size(), dstp, lo); + + if (inst.flags()) + { + a.mrs(TEMP_REG1, a64::Predicate::SysReg::kNZCV); + + a.tst(select_register(lo, inst.size()), select_register(lo, inst.size())); + a.cset(SCRATCH_REG1, a64::CondCode::kEQ); + a.bfi(TEMP_REG1, SCRATCH_REG1, 30, 1); // zero flag + + a.cmp(hi, 0); + a.cset(SCRATCH_REG1, a64::CondCode::kNE); + a.bfi(TEMP_REG1, SCRATCH_REG1, 28, 1); // overflow flag + + a.lsr(SCRATCH_REG1, lo, inst.size() * 8 - 1); // take top bit of result as sign flag + a.bfi(TEMP_REG1, SCRATCH_REG1, 31, 1); // sign flag + + a.msr(a64::Predicate::SysReg::kNZCV, TEMP_REG1); + } +} + +void drcbe_arm64::op_muls(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter edstp(*this, inst.param(1), PTYPE_MR); + be_parameter src1p(*this, inst.param(2), PTYPE_MRI); + be_parameter src2p(*this, inst.param(3), PTYPE_MRI); + const bool compute_hi = (dstp != edstp); + + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); + const a64::Gp lo = TEMP_REG3; + const a64::Gp hi = TEMP_REG2; + + if ((src1p.is_immediate() && src1p.is_immediate_value(0)) || (src2p.is_immediate() && src2p.is_immediate_value(0))) + { + a.mov(lo, a64::xzr); + a.mov(hi, a64::xzr); + } + else + { + mov_reg_param(a, inst.size(), src1, src1p); + mov_reg_param(a, inst.size(), src2, src2p); + + if (inst.size() == 8) + { + a.mul(lo, src1, src2); + a.smulh(hi, src1, src2); + } + else + { + a.smull(lo, src1, src2); + a.lsr(hi, lo, 32); + } + } + + mov_param_reg(a, inst.size(), dstp, lo); + if (compute_hi) + mov_param_reg(a, inst.size(), edstp, hi); + + if (inst.flags()) + { + a.mrs(SCRATCH_REG1, a64::Predicate::SysReg::kNZCV); + + a.tst(lo, lo); + a.cset(TEMP_REG1, a64::CondCode::kEQ); + a.tst(hi, hi); + a.cset(SCRATCH_REG2, a64::CondCode::kEQ); + a.and_(TEMP_REG1, TEMP_REG1, SCRATCH_REG2); + a.bfi(SCRATCH_REG1, TEMP_REG1, 30, 1); // zero flag + + if (inst.size() == 4) + { + a.sxtw(TEMP_REG1, lo.w()); + a.cmp(TEMP_REG1, lo); + } + else + { + a.asr(TEMP_REG1, lo, 63); + a.cmp(TEMP_REG1, hi); + } + + a.cset(TEMP_REG1, a64::CondCode::kNE); + a.bfi(SCRATCH_REG1, TEMP_REG1, 28, 1); // overflow flag + + a.lsr(TEMP_REG1, hi, inst.size() * 8 - 1); // take top bit of result as sign flag + a.bfi(SCRATCH_REG1, TEMP_REG1, 31, 1); // sign flag + + a.msr(a64::Predicate::SysReg::kNZCV, SCRATCH_REG1); + } +} + +void drcbe_arm64::op_mulslw(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); + const a64::Gp lo = TEMP_REG3; + const a64::Gp hi = TEMP_REG2; + + if ((src1p.is_immediate() && src1p.is_immediate_value(0)) || (src2p.is_immediate() && src2p.is_immediate_value(0))) + { + a.mov(lo, a64::xzr); + + if (inst.flags() && inst.size() == 8) + a.mov(hi, a64::xzr); + } + else + { + mov_reg_param(a, inst.size(), src1, src1p); + mov_reg_param(a, inst.size(), src2, src2p); + + if (inst.size() == 8) + { + a.mul(lo, src1, src2); + + if (inst.flags()) + a.smulh(hi, src1, src2); + } + else + { + a.smull(lo, src1, src2); + } + } + + mov_param_reg(a, inst.size(), dstp, lo); + + if (inst.flags()) + { + a.mrs(SCRATCH_REG1, a64::Predicate::SysReg::kNZCV); + + a.tst(select_register(lo, inst.size()), select_register(lo, inst.size())); + a.cset(TEMP_REG1, a64::CondCode::kEQ); + a.bfi(SCRATCH_REG1, TEMP_REG1, 30, 1); // zero flag + + if (inst.size() == 4) + { + a.sxtw(TEMP_REG1, lo.w()); + a.cmp(TEMP_REG1, lo); + } + else + { + a.asr(TEMP_REG1, lo, 63); + a.cmp(TEMP_REG1, hi); + } + + a.cset(TEMP_REG1, a64::CondCode::kNE); + a.bfi(SCRATCH_REG1, TEMP_REG1, 28, 1); // overflow flag + + a.lsr(TEMP_REG1, lo, inst.size() * 8 - 1); // take top bit of result as sign flag + a.bfi(SCRATCH_REG1, TEMP_REG1, 31, 1); // sign flag + + a.msr(a64::Predicate::SysReg::kNZCV, SCRATCH_REG1); + } +} + +template void drcbe_arm64::op_div(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter edstp(*this, inst.param(1), PTYPE_MR); + be_parameter src1p(*this, inst.param(2), PTYPE_MRI); + be_parameter src2p(*this, inst.param(3), PTYPE_MRI); + const bool compute_rem = (dstp != edstp); + + if (!src2p.is_immediate() || (src2p.is_immediate() && !src2p.is_immediate_value(0))) + { + Label skip_zero = a.newLabel(); + Label skip = a.newLabel(); + + const a64::Gp temp = select_register(TEMP_REG1, inst.size()); + const a64::Gp temp2 = select_register(TEMP_REG2, inst.size()); + const a64::Gp temp3 = select_register(TEMP_REG3, inst.size()); + + mov_reg_param(a, inst.size(), temp2, src2p); + a.cbz(temp2, skip_zero); + + mov_reg_param(a, inst.size(), temp, src1p); + + a.emit(Opcode, temp3, temp, temp2); + + mov_param_reg(a, inst.size(), dstp, temp3); + + if (compute_rem) + { + a.msub(temp2, temp3, temp2, temp); + mov_param_reg(a, inst.size(), edstp, temp2); + } + + if (inst.flags()) + a.tst(temp3, temp3); + + a.b(skip); + + a.bind(skip_zero); + a.mov(SCRATCH_REG1, 1 << 28); // set overflow flag + a.msr(a64::Predicate::SysReg::kNZCV, SCRATCH_REG1); + + a.bind(skip); + } + else + { + a.mov(SCRATCH_REG1, 1 << 28); // set overflow flag + a.msr(a64::Predicate::SysReg::kNZCV, SCRATCH_REG1); + } +} + +void drcbe_arm64::op_and(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + const a64::Gp dst = dstp.select_register(TEMP_REG3, inst.size()); + + if (src1p.is_immediate() && src2p.is_immediate()) + { + get_imm_relative(a, dst, src1p.immediate() & src2p.immediate()); + + if (inst.flags()) + a.tst(dst, dst); + } + else if (src2p.is_immediate() && is_valid_immediate_mask(src2p.immediate(), inst.size())) + { + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + mov_reg_param(a, inst.size(), src1, src1p); + + if (src2p.is_immediate_value(0)) + a.ands(dst, src1, select_register(a64::xzr, inst.size())); + else + a.ands(dst, src1, src2p.immediate()); + } + else + { + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); + mov_reg_param(a, inst.size(), src1, src1p); + mov_reg_param(a, inst.size(), src2, src2p); + + a.ands(dst, src1, src2); + } + + mov_param_reg(a, inst.size(), dstp, dst); +} + +void drcbe_arm64::op_test(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_Z | FLAG_S); + + be_parameter src1p(*this, inst.param(0), PTYPE_MRI); + be_parameter src2p(*this, inst.param(1), PTYPE_MRI); + + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); + + mov_reg_param(a, inst.size(), src1, src1p); + + if (src2p.is_immediate() && is_valid_immediate_mask(src2p.immediate(), inst.size())) + { + if (src2p.is_immediate_value(0)) + a.tst(src1, select_register(a64::xzr, inst.size())); + else + a.tst(src1, src2p.immediate()); + } + else + { + mov_reg_param(a, inst.size(), src2, src2p); + a.tst(src1, src2); + } +} + +void drcbe_arm64::op_or(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); + const a64::Gp dst = dstp.select_register(TEMP_REG3, inst.size()); + + if (src1p.is_immediate() && src2p.is_immediate()) + { + get_imm_relative(a, dst, src1p.immediate() | src2p.immediate()); + } + else if (src2p.is_immediate() && is_valid_immediate_mask(src2p.immediate(), inst.size())) + { + mov_reg_param(a, inst.size(), src1, src1p); + + if (src2p.is_immediate_value(0)) + { + if (dst.id() != src1.id()) + a.mov(dst, src1); + } + else if (is_valid_immediate(src2p.immediate(), 12)) + { + a.orr(dst, src1, src2p.immediate()); + } + else + { + a.mov(SCRATCH_REG1, src2p.immediate()); + a.orr(dst, src1, select_register(SCRATCH_REG1, inst.size())); + } + } + else + { + mov_reg_param(a, inst.size(), src1, src1p); + mov_reg_param(a, inst.size(), src2, src2p); + + a.orr(dst, src1, src2); + } + + mov_param_reg(a, inst.size(), dstp, dst); + + if (inst.flags()) + a.tst(dst, dst); +} + +void drcbe_arm64::op_xor(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); + const a64::Gp dst = dstp.select_register(TEMP_REG3, inst.size()); + + if (src1p.is_immediate() && src2p.is_immediate()) + { + get_imm_relative(a, dst, src1p.immediate() ^ src2p.immediate()); + } + else if (src2p.is_immediate() && is_valid_immediate_mask(src2p.immediate(), inst.size())) + { + mov_reg_param(a, inst.size(), src1, src1p); + + if (src2p.is_immediate_value(0)) + { + if (dst.id() != src1.id()) + a.mov(dst, src1); + } + else + { + a.eor(dst, src1, src2p.immediate()); + } + } + else + { + mov_reg_param(a, inst.size(), src1, src1p); + mov_reg_param(a, inst.size(), src2, src2p); + + a.eor(dst, src1, src2); + } + + mov_param_reg(a, inst.size(), dstp, dst); + + if (inst.flags()) + a.tst(dst, dst); +} + +void drcbe_arm64::op_lzcnt(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + + const a64::Gp src = srcp.select_register(TEMP_REG1, inst.size()); + const a64::Gp dst = dstp.select_register(TEMP_REG2, inst.size()); + + mov_reg_param(a, inst.size(), src, srcp); + + a.clz(dst, src); + + mov_param_reg(a, inst.size(), dstp, dst); + + if (inst.flags()) + a.tst(dst, dst); +} + +void drcbe_arm64::op_tzcnt(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + + const a64::Gp src = srcp.select_register(TEMP_REG1, inst.size()); + const a64::Gp dst = dstp.select_register(TEMP_REG2, inst.size()); + const a64::Gp temp = select_register(TEMP_REG3, inst.size()); + + mov_reg_param(a, inst.size(), src, srcp); + + a.rbit(dst, src); // reverse bits to count the tail bits from the head + a.clz(dst, dst); + + mov_param_reg(a, inst.size(), dstp, dst); + + if (inst.flags()) + { + a.eor(temp, dst, inst.size() * 8); + a.tst(temp, temp); + } +} + +void drcbe_arm64::op_bswap(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + + const a64::Gp src = srcp.select_register(TEMP_REG1, inst.size()); + const a64::Gp dst = dstp.select_register(TEMP_REG1, inst.size()); + + mov_reg_param(a, inst.size(), src, srcp); + + if (inst.size() == 8) + a.rev64(dst, src); + else + a.rev32(dst, src); + + mov_param_reg(a, inst.size(), dstp, dst); + + if (inst.flags()) + a.tst(dst, dst); +} + + +template void drcbe_arm64::op_shift(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_C | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + size_t const maxBits = inst.size() * 8 - 1; + + // If possible it's more optimal to write directly to the dst register, + // but be careful to not overwrite one of the source values since they're needed for later calculations + bool can_use_dst_reg = dstp.is_int_register(); + if (can_use_dst_reg && src1p.is_int_register()) + can_use_dst_reg = src1p.ireg() != dstp.ireg(); + if (can_use_dst_reg && src2p.is_int_register()) + can_use_dst_reg = src2p.ireg() != dstp.ireg(); + + const a64::Gp src = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp shift = src2p.select_register(TEMP_REG2, inst.size()); + const a64::Gp dst = can_use_dst_reg ? dstp.select_register(TEMP_REG3, inst.size()) : select_register(TEMP_REG3, inst.size()); + const a64::Gp scratch = select_register(FUNC_SCRATCH_REG, inst.size()); + + mov_reg_param(a, inst.size(), src, src1p); + + if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), (inst.size() == 8) ? 5 : 4)) + { + const auto shift = src2p.immediate() % (inst.size() * 8); + + a.emit(Opcode, dst, src, shift); + + if (shift != 0) + { + if (Opcode == a64::Inst::kIdRor || Opcode == a64::Inst::kIdLsr || Opcode == a64::Inst::kIdAsr) + calculate_carry_shift_right_imm(a, src, shift); + else if (Opcode == a64::Inst::kIdLsl) + calculate_carry_shift_left_imm(a, src, shift, maxBits); + + if (inst.flags()) + a.tst(dst, dst); + } + } + else + { + mov_reg_param(a, inst.size(), shift, src2p); + + a.and_(scratch, shift, inst.size() * 8 - 1); + + a.emit(Opcode, dst, src, scratch); + + Label skip = a.newLabel(); + a.cbz(scratch, skip); + + if (Opcode == a64::Inst::kIdRor || Opcode == a64::Inst::kIdLsr || Opcode == a64::Inst::kIdAsr) + calculate_carry_shift_right(a, src, scratch); + else if (Opcode == a64::Inst::kIdLsl) + calculate_carry_shift_left(a, src, scratch, maxBits); + + if (inst.flags()) + a.tst(dst, dst); + + a.bind(skip); + } + + // save dst after using inputs for calculations so the registers have no chance of being overwritten + mov_param_reg(a, inst.size(), dstp, dst); +} + +void drcbe_arm64::op_rol(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_C | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + size_t const maxBits = inst.size() * 8 - 1; + + bool can_use_dst_reg = dstp.is_int_register(); + if (can_use_dst_reg && src1p.is_int_register()) + can_use_dst_reg = src1p.ireg() != dstp.ireg(); + if (can_use_dst_reg && src2p.is_int_register()) + can_use_dst_reg = src2p.ireg() != dstp.ireg(); + + const a64::Gp param = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp shift = src2p.select_register(TEMP_REG2, inst.size()); + const a64::Gp output = can_use_dst_reg ? dstp.select_register(TEMP_REG3, inst.size()) : select_register(TEMP_REG3, inst.size()); + const a64::Gp scratch2 = select_register(FUNC_SCRATCH_REG, inst.size()); + + mov_reg_param(a, inst.size(), param, src1p); + + if (src2p.is_immediate()) + { + const auto s = src2p.immediate() % (inst.size() * 8); + const auto s2 = ((inst.size() * 8) - s) % (inst.size() * 8); + + if (s2 == 0) + { + if (output.id() != param.id()) + a.mov(output, param); + } + else + { + a.ror(output, param, s2); + } + + if (s != 0) + { + calculate_carry_shift_left_imm(a, param, s, maxBits); + + if (inst.flags()) + a.tst(output, output); + } + } + else + { + Label skip = a.newLabel(); + + mov_reg_param(a, inst.size(), shift, src2p); + + const a64::Gp scratch = select_register(SCRATCH_REG1, inst.size()); + a.mov(scratch, inst.size() * 8); + a.and_(scratch2, shift, maxBits); + a.sub(scratch, scratch, scratch2); + a.ror(output, param, scratch); + + a.cbz(scratch2, skip); + + calculate_carry_shift_left(a, param, scratch2, maxBits); + + if (inst.flags()) + a.tst(output, output); + + a.bind(skip); + } + + mov_param_reg(a, inst.size(), dstp, output); +} + +void drcbe_arm64::op_rolc(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_C | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + size_t const maxBits = inst.size() * 8 - 1; + + bool can_use_dst_reg = dstp.is_int_register(); + if (can_use_dst_reg && src1p.is_int_register()) + can_use_dst_reg = src1p.ireg() != dstp.ireg(); + if (can_use_dst_reg && src2p.is_int_register()) + can_use_dst_reg = src2p.ireg() != dstp.ireg(); + + const a64::Gp param1 = src1p.select_register(TEMP_REG3, inst.size()); + const a64::Gp output = can_use_dst_reg ? dstp.select_register(TEMP_REG1, inst.size()) : select_register(TEMP_REG1, inst.size()); + const a64::Gp carry = select_register(SCRATCH_REG2, inst.size()); + + mov_reg_param(a, inst.size(), param1, src1p); + + // shift > 1: src = (PARAM1 << shift) | (carry << (shift - 1)) | (PARAM1 >> (33 - shift)) + // shift = 1: src = (PARAM1 << shift) | carry + + if (src2p.is_immediate()) + { + const auto shift = src2p.immediate() % (inst.size() * 8); + + if (shift != 0) + { + a.ubfx(carry, param1, (inst.size() * 8) - shift, 1); + if (shift > 1) + a.ubfx(output, param1, (inst.size() * 8) - shift + 1, shift - 1); + a.bfi(output.x(), FLAGS_REG, shift - 1, 1); + a.bfi(output, param1, shift, (inst.size() * 8) - shift); + a.bfi(FLAGS_REG, carry.x(), 0, 1); + + if (inst.flags()) + a.tst(output, output); + } + else + { + a.mov(output, param1); + } + } + else + { + const a64::Gp shift = src2p.select_register(TEMP_REG2, inst.size()); + const a64::Gp scratch = select_register(SCRATCH_REG1, inst.size()); + const a64::Gp scratch2 = select_register(FUNC_SCRATCH_REG, inst.size()); + + mov_reg_param(a, inst.size(), shift, src2p); + + a.and_(scratch2, shift, maxBits); + + a.lsl(output, param1, scratch2); // PARAM1 << shift + + Label skip = a.newLabel(); + Label skip3 = a.newLabel(); + a.cbz(scratch2, skip3); + + get_carry(a, carry); + + a.sub(scratch, scratch2, 1); + a.cbz(scratch, skip); + + // add carry flag to output + a.lsl(carry, carry, scratch); + + a.mov(scratch, maxBits + 2); // PARAM1 >> (33 - shift) + a.sub(scratch, scratch, scratch2); + a.lsr(scratch, param1, scratch); + a.orr(output, output, scratch); + + a.bind(skip); + + a.orr(output, output, carry); + + calculate_carry_shift_left(a, param1, scratch2, maxBits); + + if (inst.flags()) + a.tst(output, output); + + a.bind(skip3); + } + + mov_param_reg(a, inst.size(), dstp, output); +} + +void drcbe_arm64::op_rorc(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_C | FLAG_Z | FLAG_S); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + + size_t const maxBits = inst.size() * 8 - 1; + + bool can_use_dst_reg = dstp.is_int_register(); + if (can_use_dst_reg && src1p.is_int_register()) + can_use_dst_reg = src1p.ireg() != dstp.ireg(); + if (can_use_dst_reg && src2p.is_int_register()) + can_use_dst_reg = src2p.ireg() != dstp.ireg(); + + const a64::Gp param1 = src1p.select_register(TEMP_REG3, inst.size()); + const a64::Gp output = can_use_dst_reg ? dstp.select_register(TEMP_REG1, inst.size()) : select_register(TEMP_REG1, inst.size()); + const a64::Gp carry = select_register(SCRATCH_REG2, inst.size()); + + mov_reg_param(a, inst.size(), param1, src1p); + + // if (shift > 1) + // src = (PARAM1 >> shift) | (((flags & FLAG_C) << 31) >> (shift - 1)) | (PARAM1 << (33 - shift)); + // else if (shift == 1) + // src = (PARAM1 >> shift) | ((flags & FLAG_C) << 31); + + if (src2p.is_immediate()) + { + const auto shift = src2p.immediate() % (inst.size() * 8); + + if (shift != 0) + { + a.ubfx(carry, param1, shift - 1, 1); + a.ubfx(output, param1, shift, (inst.size() * 8) - shift); + a.bfi(output.x(), FLAGS_REG, (inst.size() * 8) - shift, 1); + if (shift > 1) + a.bfi(output, param1, (inst.size() * 8) - shift + 1, shift - 1); + a.bfi(FLAGS_REG, carry.x(), 0, 1); + + if (inst.flags()) + a.tst(output, output); + } + else + { + a.mov(output, param1); + } + } + else + { + const a64::Gp shift = src2p.select_register(TEMP_REG2, inst.size()); + const a64::Gp scratch = select_register(SCRATCH_REG1, inst.size()); + const a64::Gp scratch2 = select_register(FUNC_SCRATCH_REG, inst.size()); + + mov_reg_param(a, inst.size(), shift, src2p); + + a.and_(scratch2, shift, maxBits); + + a.lsr(output, param1, shift); // PARAM1 >> shift + + Label skip = a.newLabel(); + Label skip3 = a.newLabel(); + a.cbz(scratch2, skip3); + + get_carry(a, carry); + a.lsl(carry, carry, maxBits); // (flags & FLAG_C) << 31 + + a.sub(scratch, scratch2, 1); // carry >> (shift - 1) + a.cbz(scratch, skip); + + // add carry flag to output + a.lsr(carry, carry, scratch); + + a.mov(scratch, maxBits + 2); // PARAM1 << (33 - shift) + a.sub(scratch, scratch, scratch2); + a.lsl(scratch, param1, scratch); + a.orr(output, output, scratch); + + a.bind(skip); + + a.orr(output, output, carry); + + calculate_carry_shift_right(a, param1, scratch2); + + if (inst.flags()) + a.tst(output, output); + + a.bind(skip3); + } + + mov_param_reg(a, inst.size(), dstp, output); +} + +void drcbe_arm64::op_fload(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MF); + be_parameter basep(*this, inst.param(1), PTYPE_M); + be_parameter indp(*this, inst.param(2), PTYPE_MRI); + + const a64::Vec dstreg = dstp.select_register(TEMPF_REG1, inst.size()); + const a64::Gp basereg = TEMP_REG1; + + get_imm_relative(a, basereg, uint64_t(basep.memory())); + + if (indp.is_immediate()) + { + a.ldr(dstreg, arm::Mem(basereg, indp.immediate() * inst.size())); + } + else + { + const a64::Gp indreg = indp.select_register(TEMP_REG1, 4); + + mov_reg_param(a, 4, indreg, indp); + + a.ldr(dstreg, arm::Mem(basereg, indreg, arm::Shift(arm::ShiftOp::kLSL, (inst.size() == 4) ? 2 : 3))); + } + + mov_float_param_reg(a, inst.size(), dstp, dstreg); +} + +void drcbe_arm64::op_fstore(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter basep(*this, inst.param(0), PTYPE_M); + be_parameter indp(*this, inst.param(1), PTYPE_MRI); + be_parameter srcp(*this, inst.param(2), PTYPE_MF); + + const a64::Vec srcreg = srcp.select_register(TEMPF_REG1, inst.size()); + const a64::Gp basereg = TEMP_REG1; + + get_imm_relative(a, basereg, uint64_t(basep.memory())); + + mov_float_reg_param(a, inst.size(), srcreg, srcp); + + if (indp.is_immediate()) + { + a.str(srcreg, arm::Mem(basereg, indp.immediate() * inst.size())); + } + else + { + const a64::Gp indreg = indp.select_register(TEMP_REG1, 4); + + mov_reg_param(a, 4, indreg, indp); + + a.str(srcreg, arm::Mem(basereg, indreg, arm::Shift(arm::ShiftOp::kLSL, (inst.size() == 4) ? 2 : 3))); + } +} + +void drcbe_arm64::op_fread(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter addrp(*this, inst.param(1), PTYPE_MRI); + const parameter &spacesizep = inst.param(2); + assert(spacesizep.is_size_space()); + assert((1 << spacesizep.size()) == inst.size()); + + const auto &trampolines = m_accessors[spacesizep.space()]; + const auto &resolved = m_resolved_accessors[spacesizep.space()]; + + mov_reg_param(a, 4, REG_PARAM2, addrp); + + if (inst.size() == 4) + { + if (resolved.read_dword.func) + { + get_imm_relative(a, REG_PARAM1, resolved.read_dword.obj); + call_arm_addr(a, resolved.read_dword.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.read_dword); + a.blr(TEMP_REG1); + } + + mov_float_param_int_reg(a, inst.size(), dstp, REG_PARAM1.w()); + } + else if (inst.size() == 8) + { + if (resolved.read_qword.func) + { + get_imm_relative(a, REG_PARAM1, resolved.read_qword.obj); + call_arm_addr(a, resolved.read_qword.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.read_qword); + a.blr(TEMP_REG1); + } + + mov_float_param_int_reg(a, inst.size(), dstp, REG_PARAM1); + } +} + +void drcbe_arm64::op_fwrite(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter addrp(*this, inst.param(0), PTYPE_MRI); + be_parameter srcp(*this, inst.param(1), PTYPE_MF); + const parameter &spacesizep = inst.param(2); + assert(spacesizep.is_size_space()); + assert((1 << spacesizep.size()) == inst.size()); + + const auto &trampolines = m_accessors[spacesizep.space()]; + const auto &resolved = m_resolved_accessors[spacesizep.space()]; + + mov_reg_param(a, 4, REG_PARAM2, addrp); + mov_float_reg_param(a, inst.size(), TEMPF_REG1, srcp); + + a.fmov(select_register(REG_PARAM3, inst.size()), select_register(TEMPF_REG1, inst.size())); + + if (inst.size() == 4) + { + if (resolved.write_dword.func) + { + get_imm_relative(a, REG_PARAM1, resolved.write_dword.obj); + call_arm_addr(a, resolved.write_dword.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.write_dword); + a.blr(TEMP_REG1); + } + } + else if (inst.size() == 8) + { + if (resolved.write_qword.func) + { + get_imm_relative(a, REG_PARAM1, resolved.write_qword.obj); + call_arm_addr(a, resolved.write_qword.func); + } + else + { + get_imm_relative(a, REG_PARAM1, (uintptr_t)m_space[spacesizep.space()]); + emit_ldr_mem(a, TEMP_REG1, &trampolines.write_qword); + a.blr(TEMP_REG1); + } + } +} + +void drcbe_arm64::op_fmov(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_any_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MF); + be_parameter srcp(*this, inst.param(1), PTYPE_MF); + + Label skip; + if (inst.condition() != uml::COND_ALWAYS) + { + skip = a.newLabel(); + + if (inst.condition() == COND_C || inst.condition() == COND_NC || inst.condition() == COND_A || inst.condition() == COND_BE) + load_carry(a, true); + + if (inst.condition() == uml::COND_U || inst.condition() == uml::COND_NU) + check_unordered_condition(a, inst.condition(), skip, true); + else + a.b(ARM_NOT_CONDITION(a, inst.condition()), skip); + } + + mov_float_param_param(a, inst.size(), dstp, srcp); + + if (inst.condition() != uml::COND_ALWAYS) + a.bind(skip); +} + +void drcbe_arm64::op_ftoint(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MF); + const parameter &sizep = inst.param(2); + assert(sizep.is_size()); + const parameter &roundp = inst.param(3); + assert(roundp.is_rounding()); + + const a64::Gp dstreg = dstp.select_register(TEMP_REG1, 1 << sizep.size()); + const a64::Vec srcreg = srcp.select_register(TEMPF_REG1, inst.size()); + + if (!srcp.is_float_register()) + mov_float_reg_param(a, inst.size(), srcreg, srcp); + + switch (roundp.rounding()) + { + case ROUND_ROUND: + a.fcvtns(dstreg, srcreg); + break; + + case ROUND_CEIL: + a.fcvtps(dstreg, srcreg); + break; + + case ROUND_FLOOR: + a.fcvtms(dstreg, srcreg); + break; + + case ROUND_TRUNC: + case ROUND_DEFAULT: + default: + a.fcvtzs(dstreg, srcreg); + break; + } + + mov_param_reg(a, 1 << sizep.size(), dstp, dstreg); +} + +void drcbe_arm64::op_ffrint(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MF); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + const parameter &sizep = inst.param(2); + assert(sizep.is_size()); + + const a64::Vec dstreg = dstp.select_register(TEMPF_REG1, inst.size()); + const a64::Gp srcreg = srcp.select_register(TEMP_REG1, 1 << sizep.size()); + + if (!srcp.is_int_register()) + mov_reg_param(a, 1 << sizep.size(), srcreg, srcp); + + a.scvtf(dstreg, srcreg); + + if (!dstp.is_float_register()) + mov_float_param_reg(a, inst.size(), dstp, dstreg); +} + +void drcbe_arm64::op_ffrflt(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MF); + be_parameter srcp(*this, inst.param(1), PTYPE_MF); + const parameter &sizep = inst.param(2); + assert(sizep.is_size()); + + const a64::Vec dstreg = dstp.select_register(TEMPF_REG1, inst.size()); + const a64::Vec srcreg = srcp.select_register(TEMPF_REG2, 1 << sizep.size()); + + if (!srcp.is_float_register()) + mov_float_reg_param(a, 1 << sizep.size(), srcreg, srcp); + + // double to float, or float to double + a.fcvt(dstreg, srcreg); + + if (!dstp.is_float_register()) + mov_float_param_reg(a, inst.size(), dstp, dstreg); +} + +void drcbe_arm64::op_frnds(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MF); + be_parameter srcp(*this, inst.param(1), PTYPE_MF); + + const a64::Vec dstreg = dstp.select_register(TEMPF_REG2, inst.size()); + const a64::Vec srcreg = srcp.select_register(TEMPF_REG1, inst.size()); + + if (!srcp.is_float_register()) + mov_float_reg_param(a, inst.size(), srcreg, srcp); + + a.fcvt(dstreg.s(), srcreg.d()); // convert double to short + a.fcvt(dstreg.d(), dstreg.s()); // convert short to double + + if (!dstp.is_float_register()) + mov_float_param_reg(a, inst.size(), dstp, dstreg); +} + +void drcbe_arm64::op_fcmp(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_C | FLAG_Z | FLAG_U); + + be_parameter src1p(*this, inst.param(0), PTYPE_MF); + be_parameter src2p(*this, inst.param(1), PTYPE_MF); + + const a64::Vec srcreg1 = src1p.select_register(TEMPF_REG1, inst.size()); + const a64::Vec srcreg2 = src2p.select_register(TEMPF_REG2, inst.size()); + + if (!src1p.is_float_register()) + mov_float_reg_param(a, inst.size(), srcreg1, src1p); + if (!src2p.is_float_register()) + mov_float_reg_param(a, inst.size(), srcreg2, src2p); + + a.fcmp(srcreg1, srcreg2); + + store_carry(a, true); + store_unordered(a); +} + +template void drcbe_arm64::op_float_alu(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MF); + be_parameter src1p(*this, inst.param(1), PTYPE_MF); + be_parameter src2p(*this, inst.param(2), PTYPE_MF); + + // pick a target register for the general case + const a64::Vec dstreg = dstp.select_register(TEMPF_REG3, inst.size()); + const a64::Vec srcreg1 = src1p.select_register(TEMPF_REG1, inst.size()); + const a64::Vec srcreg2 = src2p.select_register(TEMPF_REG2, inst.size()); + + if (!src1p.is_float_register()) + mov_float_reg_param(a, inst.size(), srcreg1, src1p); + if (!src2p.is_float_register()) + mov_float_reg_param(a, inst.size(), srcreg2, src2p); + + a.emit(Opcode, dstreg, srcreg1, srcreg2); + + if (!dstp.is_float_register()) + mov_float_param_reg(a, inst.size(), dstp, dstreg); +} + +template void drcbe_arm64::op_float_alu2(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MF); + be_parameter srcp(*this, inst.param(1), PTYPE_MF); + + // pick a target register for the general case + const a64::Vec dstreg = dstp.select_register(TEMPF_REG2, inst.size()); + const a64::Vec srcreg = srcp.select_register(TEMPF_REG1, inst.size()); + + if (!srcp.is_float_register()) + mov_float_reg_param(a, inst.size(), srcreg, srcp); + + a.emit(Opcode, dstreg, srcreg); + + if (!dstp.is_float_register()) + mov_float_param_reg(a, inst.size(), dstp, dstreg); +} + +void drcbe_arm64::op_fcopyi(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MF); + be_parameter srcp(*this, inst.param(1), PTYPE_MR); + + const a64::Vec dstreg = dstp.select_register(TEMPF_REG1, inst.size()); + const a64::Gp srcreg = srcp.select_register(TEMP_REG1, inst.size()); + + mov_reg_param(a, inst.size(), srcreg, srcp); + a.fmov(dstreg, srcreg); + mov_float_param_reg(a, inst.size(), dstp, dstreg); +} + +void drcbe_arm64::op_icopyf(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MF); + + const a64::Gp dstreg = dstp.select_register(TEMP_REG1, inst.size()); + const a64::Vec srcreg = srcp.select_register(TEMPF_REG1, inst.size()); + + mov_float_reg_param(a, inst.size(), srcreg, srcp); + a.fmov(dstreg, srcreg); + mov_param_reg(a, inst.size(), dstp, dstreg); +} + +} diff --git a/src/devices/cpu/drcbearm64.h b/src/devices/cpu/drcbearm64.h new file mode 100644 index 00000000000..6acf3aa01f0 --- /dev/null +++ b/src/devices/cpu/drcbearm64.h @@ -0,0 +1,287 @@ +// license:BSD-3-Clause +// copyright-holders:windyfairy +#ifndef MAME_CPU_DRCBEARM64_H +#define MAME_CPU_DRCBEARM64_H + +#pragma once + +#include "drcuml.h" +#include "drcbeut.h" + +#include "asmjit/src/asmjit/asmjit.h" +#include "asmjit/src/asmjit/a64.h" + +#include + + +namespace drc { + +class drcbe_arm64 : public drcbe_interface +{ + using arm64_entry_point_func = uint32_t (*)(void *entry); + +public: + drcbe_arm64(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits); + virtual ~drcbe_arm64(); + + virtual void reset() override; + virtual int execute(uml::code_handle &entry) override; + virtual void generate(drcuml_block &block, const uml::instruction *instlist, uint32_t numinst) override; + virtual bool hash_exists(uint32_t mode, uint32_t pc) override; + virtual void get_info(drcbe_info &info) override; + virtual bool logging() const override { return false; } + +private: + class be_parameter + { + static inline constexpr int REG_MAX = 30; + + public: + // parameter types + enum be_parameter_type + { + PTYPE_NONE = 0, // invalid + PTYPE_IMMEDIATE, // immediate; value = sign-extended to 64 bits + PTYPE_INT_REGISTER, // integer register; value = 0-REG_MAX + PTYPE_FLOAT_REGISTER, // floating point register; value = 0-REG_MAX + PTYPE_MEMORY, // memory; value = pointer to memory + PTYPE_MAX + }; + + typedef uint64_t be_parameter_value; + + be_parameter() : m_type(PTYPE_NONE), m_value(0) { } + be_parameter(uint64_t val) : m_type(PTYPE_IMMEDIATE), m_value(val) { } + be_parameter(drcbe_arm64 &drcbe, const uml::parameter ¶m, uint32_t allowed); + be_parameter(const be_parameter ¶m) = default; + + static be_parameter make_ireg(int regnum) { assert(regnum >= 0 && regnum < REG_MAX); return be_parameter(PTYPE_INT_REGISTER, regnum); } + static be_parameter make_freg(int regnum) { assert(regnum >= 0 && regnum < REG_MAX); return be_parameter(PTYPE_FLOAT_REGISTER, regnum); } + static be_parameter make_memory(void *base) { return be_parameter(PTYPE_MEMORY, reinterpret_cast(base)); } + static be_parameter make_memory(const void *base) { return be_parameter(PTYPE_MEMORY, reinterpret_cast(const_cast(base))); } + + bool operator==(const be_parameter &rhs) const { return (m_type == rhs.m_type && m_value == rhs.m_value); } + bool operator!=(const be_parameter &rhs) const { return (m_type != rhs.m_type || m_value != rhs.m_value); } + + be_parameter_type type() const { return m_type; } + uint64_t immediate() const { assert(m_type == PTYPE_IMMEDIATE); return m_value; } + uint32_t ireg() const { assert(m_type == PTYPE_INT_REGISTER); assert(m_value < REG_MAX); return m_value; } + uint32_t freg() const { assert(m_type == PTYPE_FLOAT_REGISTER); assert(m_value < REG_MAX); return m_value; } + void *memory() const { assert(m_type == PTYPE_MEMORY); return reinterpret_cast(m_value); } + + bool is_immediate() const { return (m_type == PTYPE_IMMEDIATE); } + bool is_int_register() const { return (m_type == PTYPE_INT_REGISTER); } + bool is_float_register() const { return (m_type == PTYPE_FLOAT_REGISTER); } + bool is_memory() const { return (m_type == PTYPE_MEMORY); } + + bool is_immediate_value(uint64_t value) const { return (m_type == PTYPE_IMMEDIATE && m_value == value); } + + asmjit::a64::Vec get_register_float(uint32_t regsize) const; + asmjit::a64::Gp get_register_int(uint32_t regsize) const; + asmjit::a64::Vec select_register(asmjit::a64::Vec const ®, uint32_t regsize) const; + asmjit::a64::Gp select_register(asmjit::a64::Gp const ®, uint32_t regsize) const; + + private: + be_parameter(be_parameter_type type, be_parameter_value value) : m_type(type), m_value(value) { } + + be_parameter_type m_type; + be_parameter_value m_value; + }; + + void op_handle(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_hash(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_label(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_comment(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_mapvar(asmjit::a64::Assembler &a, const uml::instruction &inst); + + void op_nop(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_break(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_debug(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_exit(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_hashjmp(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_jmp(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_exh(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_callh(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_ret(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_callc(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_recover(asmjit::a64::Assembler &a, const uml::instruction &inst); + + void op_setfmod(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_getfmod(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_getexp(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_getflgs(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_setflgs(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_save(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_restore(asmjit::a64::Assembler &a, const uml::instruction &inst); + + void op_load(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_loads(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_store(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_read(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_readm(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_write(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_writem(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_carry(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_set(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_mov(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_sext(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_roland(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_rolins(asmjit::a64::Assembler &a, const uml::instruction &inst); + template void op_add(asmjit::a64::Assembler &a, const uml::instruction &inst); + template void op_sub(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_cmp(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_mulu(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_mululw(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_muls(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_mulslw(asmjit::a64::Assembler &a, const uml::instruction &inst); + template void op_div(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_and(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_test(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_or(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_xor(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_lzcnt(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_tzcnt(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_bswap(asmjit::a64::Assembler &a, const uml::instruction &inst); + template void op_shift(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_rol(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_rolc(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_rorc(asmjit::a64::Assembler &a, const uml::instruction &inst); + + void op_fload(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_fstore(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_fread(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_fwrite(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_fmov(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_ftoint(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_ffrint(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_ffrflt(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_frnds(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_fcmp(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_fcopyi(asmjit::a64::Assembler &a, const uml::instruction &inst); + void op_icopyf(asmjit::a64::Assembler &a, const uml::instruction &inst); + + template void op_float_alu(asmjit::a64::Assembler &a, const uml::instruction &inst); + template void op_float_alu2(asmjit::a64::Assembler &a, const uml::instruction &inst); + + size_t emit(asmjit::CodeHolder &ch); + + + // helper functions + asmjit::a64::Vec select_register(asmjit::a64::Vec const ®, uint32_t regsize) const; + asmjit::a64::Gp select_register(asmjit::a64::Gp const ®, uint32_t regsize) const; + + static bool is_valid_immediate(uint64_t val, size_t bits); + static bool is_valid_immediate_signed(int64_t val, size_t bits); + static bool is_valid_immediate_mask(uint64_t val, size_t bytes); + + asmjit::arm::Mem get_mem_absolute(asmjit::a64::Assembler &a, const void *ptr) const; + void get_imm_relative(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const uint64_t ptr) const; + + bool emit_add_optimized(asmjit::a64::Assembler &a, const asmjit::a64::Gp &dst, const asmjit::a64::Gp &src, int64_t val) const; + bool emit_sub_optimized(asmjit::a64::Assembler &a, const asmjit::a64::Gp &dst, const asmjit::a64::Gp &src, int64_t val) const; + + void emit_ldr_str_base_mem(asmjit::a64::Assembler &a, asmjit::a64::Inst::Id opcode, const asmjit::a64::Reg ®, const void *ptr) const; + void emit_ldr_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; + void emit_ldrb_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; + void emit_ldrh_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; + void emit_ldrsb_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; + void emit_ldrsh_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; + void emit_ldrsw_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; + void emit_str_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; + void emit_strb_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; + void emit_strh_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; + + void emit_float_ldr_mem(asmjit::a64::Assembler &a, const asmjit::a64::Vec ®, const void *ptr) const; + void emit_float_str_mem(asmjit::a64::Assembler &a, const asmjit::a64::Vec ®, const void *ptr) const; + + void get_carry(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, bool inverted = false) const; + void load_carry(asmjit::a64::Assembler &a, bool inverted = false) const; + void store_carry(asmjit::a64::Assembler &a, bool inverted = false) const; + void store_carry_reg(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®) const; + + void store_unordered(asmjit::a64::Assembler &a) const; + void get_unordered(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®) const; + void check_unordered_condition(asmjit::a64::Assembler &a, uml::condition_t cond, asmjit::Label condition_met, bool not_equal) const; + + void get_shifted_bit(asmjit::a64::Assembler &a, const asmjit::a64::Gp &dst, const asmjit::a64::Gp &src, uint32_t bits, uint32_t shift) const; + + void calculate_carry_shift_left(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const asmjit::a64::Gp &shift, int maxBits) const; + void calculate_carry_shift_left_imm(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const int shift, int maxBits) const; + + void calculate_carry_shift_right(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const asmjit::a64::Gp &shift) const; + void calculate_carry_shift_right_imm(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const int shift) const; + + void mov_float_reg_param(asmjit::a64::Assembler &a, uint32_t regsize, asmjit::a64::Vec const &dst, const be_parameter &src) const; + void mov_float_param_param(asmjit::a64::Assembler &a, uint32_t regsize, const be_parameter &dst, const be_parameter &src) const; + void mov_float_param_reg(asmjit::a64::Assembler &a, uint32_t regsize, const be_parameter &dst, asmjit::a64::Vec const &src) const; + void mov_float_param_int_reg(asmjit::a64::Assembler &a, uint32_t regsize, const be_parameter &dst, asmjit::a64::Gp const &src) const; + + void mov_reg_param(asmjit::a64::Assembler &a, uint32_t regsize, const asmjit::a64::Gp &dst, const be_parameter &src) const; + void mov_param_reg(asmjit::a64::Assembler &a, uint32_t regsize, const be_parameter &dst, const asmjit::a64::Gp &src) const; + void mov_param_imm(asmjit::a64::Assembler &a, uint32_t regsize, const be_parameter &dst, uint64_t src) const; + void mov_param_param(asmjit::a64::Assembler &a, uint32_t regsize, const be_parameter &dst, const be_parameter &src) const; + void mov_mem_param(asmjit::a64::Assembler &a, uint32_t regsize, void *dst, const be_parameter &src) const; + void mov_signed_reg64_param32(asmjit::a64::Assembler &a, const asmjit::a64::Gp &dst, const be_parameter &src) const; + void mov_r64_imm(asmjit::a64::Assembler &a, const asmjit::a64::Gp &dst, uint64_t const src) const; + + void call_arm_addr(asmjit::a64::Assembler &a, const void *offs) const; + + drc_hash_table m_hash; + drc_map_variables m_map; + FILE * m_log_asmjit; + + arm64_entry_point_func m_entry; + drccodeptr m_exit; + drccodeptr m_nocode; + + uint8_t *m_baseptr; + + struct near_state + { + void *debug_cpu_instruction_hook; + void *drcmap_get_value; + void *hashstacksave; + + uint32_t emulated_flags; + uint32_t calldepth; + }; + near_state &m_near; + + using opcode_generate_func = void (drcbe_arm64::*)(asmjit::a64::Assembler &, const uml::instruction &); + struct opcode_table_entry + { + uml::opcode_t opcode; + opcode_generate_func func; + }; + static const opcode_table_entry s_opcode_table_source[]; + static opcode_generate_func s_opcode_table[uml::OP_MAX]; + + struct resolved_handler { uintptr_t obj = 0; void *func = nullptr; }; + struct resolved_accessors + { + + resolved_handler read_byte; + resolved_handler read_word; + resolved_handler read_word_masked; + resolved_handler read_dword; + resolved_handler read_dword_masked; + resolved_handler read_qword; + resolved_handler read_qword_masked; + + resolved_handler write_byte; + resolved_handler write_word; + resolved_handler write_word_masked; + resolved_handler write_dword; + resolved_handler write_dword_masked; + resolved_handler write_qword; + resolved_handler write_qword_masked; + }; + using resolved_accessors_vector = std::vector; + resolved_accessors_vector m_resolved_accessors; +}; + +} + +using drc::drcbe_arm64; + +#endif // MAME_CPU_DRCBEARM64_H diff --git a/src/devices/cpu/drcbex64.cpp b/src/devices/cpu/drcbex64.cpp index 891b8c69dd5..c7ddf18b535 100644 --- a/src/devices/cpu/drcbex64.cpp +++ b/src/devices/cpu/drcbex64.cpp @@ -1713,12 +1713,14 @@ void drcbe_x64::op_hashjmp(Assembler &a, const instruction &inst) } } + // fix stack alignment if "no code" landing returned from abuse of call with misaligned stack + a.sub(rsp, 8); // sub rsp,8 + // in all cases, if there is no code, we return here to generate the exception if (LOG_HASHJMPS) smart_call_m64(a, &m_near.debug_log_hashjmp_fail); mov_mem_param(a, MABS(&m_state.exp, 4), pcp); // mov [exp],param - a.sub(rsp, 8); // sub rsp,8 a.call(MABS(exp.handle().codeptr_addr())); // call [exp] } diff --git a/src/devices/cpu/drcbex64.h b/src/devices/cpu/drcbex64.h index db8c1543aa5..826b56d8b32 100644 --- a/src/devices/cpu/drcbex64.h +++ b/src/devices/cpu/drcbex64.h @@ -29,7 +29,7 @@ namespace drc { class drcbe_x64 : public drcbe_interface { - typedef uint32_t (*x86_entry_point_func)(uint8_t *rbpvalue, x86code *entry); + using x86_entry_point_func = uint32_t (*)(uint8_t *rbpvalue, x86code *entry); public: // construction/destruction @@ -50,7 +50,7 @@ private: { public: // HACK: leftover from x86emit - static int const REG_MAX = 16; + static inline constexpr int REG_MAX = 16; // parameter types enum be_parameter_type @@ -59,7 +59,6 @@ private: PTYPE_IMMEDIATE, // immediate; value = sign-extended to 64 bits PTYPE_INT_REGISTER, // integer register; value = 0-REG_MAX PTYPE_FLOAT_REGISTER, // floating point register; value = 0-REG_MAX - PTYPE_VECTOR_REGISTER, // vector register; value = 0-REG_MAX PTYPE_MEMORY, // memory; value = pointer to memory PTYPE_MAX }; @@ -69,15 +68,15 @@ private: // construction be_parameter() : m_type(PTYPE_NONE), m_value(0) { } - be_parameter(be_parameter const ¶m) : m_type(param.m_type), m_value(param.m_value) { } be_parameter(uint64_t val) : m_type(PTYPE_IMMEDIATE), m_value(val) { } be_parameter(drcbe_x64 &drcbe, const uml::parameter ¶m, uint32_t allowed); + be_parameter(const be_parameter ¶m) = default; // creators for types that don't safely default - static inline be_parameter make_ireg(int regnum) { assert(regnum >= 0 && regnum < REG_MAX); return be_parameter(PTYPE_INT_REGISTER, regnum); } - static inline be_parameter make_freg(int regnum) { assert(regnum >= 0 && regnum < REG_MAX); return be_parameter(PTYPE_FLOAT_REGISTER, regnum); } - static inline be_parameter make_memory(void *base) { return be_parameter(PTYPE_MEMORY, reinterpret_cast(base)); } - static inline be_parameter make_memory(const void *base) { return be_parameter(PTYPE_MEMORY, reinterpret_cast(const_cast(base))); } + static be_parameter make_ireg(int regnum) { assert(regnum >= 0 && regnum < REG_MAX); return be_parameter(PTYPE_INT_REGISTER, regnum); } + static be_parameter make_freg(int regnum) { assert(regnum >= 0 && regnum < REG_MAX); return be_parameter(PTYPE_FLOAT_REGISTER, regnum); } + static be_parameter make_memory(void *base) { return be_parameter(PTYPE_MEMORY, reinterpret_cast(base)); } + static be_parameter make_memory(const void *base) { return be_parameter(PTYPE_MEMORY, reinterpret_cast(const_cast(base))); } // operators bool operator==(be_parameter const &rhs) const { return (m_type == rhs.m_type && m_value == rhs.m_value); } diff --git a/src/devices/cpu/drcbex86.h b/src/devices/cpu/drcbex86.h index b39093ddb82..264576bbc3d 100644 --- a/src/devices/cpu/drcbex86.h +++ b/src/devices/cpu/drcbex86.h @@ -28,7 +28,7 @@ namespace drc { class drcbe_x86 : public drcbe_interface { - typedef uint32_t (*x86_entry_point_func)(x86code *entry); + using x86_entry_point_func = uint32_t (*)(x86code *entry); public: // construction/destruction @@ -45,7 +45,7 @@ public: private: // HACK: leftover from x86emit - static int const REG_MAX = 16; + static inline constexpr int REG_MAX = 16; // a be_parameter is similar to a uml::parameter but maps to native registers/memory class be_parameter @@ -58,7 +58,6 @@ private: PTYPE_IMMEDIATE, // immediate; value = sign-extended to 64 bits PTYPE_INT_REGISTER, // integer register; value = 0-REG_MAX PTYPE_FLOAT_REGISTER, // floating point register; value = 0-REG_MAX - PTYPE_VECTOR_REGISTER, // vector register; value = 0-REG_MAX PTYPE_MEMORY, // memory; value = pointer to memory PTYPE_MAX }; @@ -68,15 +67,15 @@ private: // construction be_parameter() : m_type(PTYPE_NONE), m_value(0) { } - be_parameter(be_parameter const ¶m) : m_type(param.m_type), m_value(param.m_value) { } be_parameter(uint64_t val) : m_type(PTYPE_IMMEDIATE), m_value(val) { } be_parameter(drcbe_x86 &drcbe, const uml::parameter ¶m, uint32_t allowed); + be_parameter(const be_parameter ¶m) = default; // creators for types that don't safely default - static inline be_parameter make_ireg(int regnum) { assert(regnum >= 0 && regnum < REG_MAX); return be_parameter(PTYPE_INT_REGISTER, regnum); } - static inline be_parameter make_freg(int regnum) { assert(regnum >= 0 && regnum < REG_MAX); return be_parameter(PTYPE_FLOAT_REGISTER, regnum); } - static inline be_parameter make_memory(void *base) { return be_parameter(PTYPE_MEMORY, reinterpret_cast(base)); } - static inline be_parameter make_memory(const void *base) { return be_parameter(PTYPE_MEMORY, reinterpret_cast(const_cast(base))); } + static be_parameter make_ireg(int regnum) { assert(regnum >= 0 && regnum < REG_MAX); return be_parameter(PTYPE_INT_REGISTER, regnum); } + static be_parameter make_freg(int regnum) { assert(regnum >= 0 && regnum < REG_MAX); return be_parameter(PTYPE_FLOAT_REGISTER, regnum); } + static be_parameter make_memory(void *base) { return be_parameter(PTYPE_MEMORY, reinterpret_cast(base)); } + static be_parameter make_memory(const void *base) { return be_parameter(PTYPE_MEMORY, reinterpret_cast(const_cast(base))); } // operators bool operator==(be_parameter const &rhs) const { return (m_type == rhs.m_type && m_value == rhs.m_value); } diff --git a/src/devices/cpu/drcuml.cpp b/src/devices/cpu/drcuml.cpp index 897e0dc498c..0b2c391e02e 100644 --- a/src/devices/cpu/drcuml.cpp +++ b/src/devices/cpu/drcuml.cpp @@ -37,9 +37,12 @@ #include "emuopts.h" #include "drcbec.h" #ifdef NATIVE_DRC +#ifndef ASMJIT_NO_X86 #include "drcbex86.h" #include "drcbex64.h" #endif +#include "drcbearm64.h" +#endif #include diff --git a/src/devices/cpu/uml.cpp b/src/devices/cpu/uml.cpp index 7133a5ca0ad..ea77a64afe9 100644 --- a/src/devices/cpu/uml.cpp +++ b/src/devices/cpu/uml.cpp @@ -86,7 +86,6 @@ using namespace uml; #define PTYPES_IMM (1 << parameter::PTYPE_IMMEDIATE) #define PTYPES_IREG (1 << parameter::PTYPE_INT_REGISTER) #define PTYPES_FREG (1 << parameter::PTYPE_FLOAT_REGISTER) -#define PTYPES_VREG (1 << parameter::PTYPE_VECTOR_REGISTER) #define PTYPES_MVAR (1 << parameter::PTYPE_MAPVAR) #define PTYPES_MEM (1 << parameter::PTYPE_MEMORY) #define PTYPES_SIZE (1 << parameter::PTYPE_SIZE) @@ -429,7 +428,6 @@ void uml::instruction::simplify() case SIZE_WORD: convert_to_mov_immediate(s16(m_param[1].immediate())); break; case SIZE_DWORD: convert_to_mov_immediate(s32(m_param[1].immediate())); break; case SIZE_QWORD: convert_to_mov_immediate(s64(m_param[1].immediate())); break; - case SIZE_DQWORD: fatalerror("Invalid SEXT target size\n"); } break; diff --git a/src/devices/cpu/uml.h b/src/devices/cpu/uml.h index 3149d97f2c0..9577b393fcd 100644 --- a/src/devices/cpu/uml.h +++ b/src/devices/cpu/uml.h @@ -39,12 +39,6 @@ namespace uml constexpr int REG_F_COUNT = 10; constexpr int REG_F_END = REG_F0 + REG_F_COUNT; - // vector registers - constexpr int REG_V0 = 0xc00; - constexpr int REG_V_COUNT = 10; - constexpr int REG_V_END = REG_V0 + REG_V_COUNT; - - // map variables constexpr int MAPVAR_M0 = 0x1000; constexpr int MAPVAR_COUNT = 10; constexpr int MAPVAR_END = MAPVAR_M0 + MAPVAR_COUNT; @@ -105,7 +99,6 @@ namespace uml SIZE_WORD, // 2-byte SIZE_DWORD, // 4-byte SIZE_QWORD, // 8-byte - SIZE_DQWORD, // 16-byte (vector) SIZE_SHORT = SIZE_DWORD, // 4-byte (float) SIZE_DOUBLE = SIZE_QWORD // 8-byte (float) }; @@ -284,7 +277,6 @@ namespace uml PTYPE_IMMEDIATE, // immediate; value = sign-extended to 64 bits PTYPE_INT_REGISTER, // integer register; value = REG_I0 - REG_I_END PTYPE_FLOAT_REGISTER, // floating point register; value = REG_F0 - REG_F_END - PTYPE_VECTOR_REGISTER, // vector register; value = REG_V0 - REG_V_END PTYPE_MAPVAR, // map variable; value = MAPVAR_M0 - MAPVAR_END PTYPE_MEMORY, // memory; value = pointer to memory PTYPE_SIZE, // size; value = operand_size @@ -305,19 +297,18 @@ namespace uml constexpr parameter() : m_type(PTYPE_NONE), m_value(0) { } constexpr parameter(parameter const ¶m) : m_type(param.m_type), m_value(param.m_value) { } constexpr parameter(u64 val) : m_type(PTYPE_IMMEDIATE), m_value(val) { } - parameter(operand_size size, memory_scale scale) : m_type(PTYPE_SIZE_SCALE), m_value((scale << 4) | size) { assert(size >= SIZE_BYTE && size <= SIZE_DQWORD); assert(scale >= SCALE_x1 && scale <= SCALE_x8); } - parameter(operand_size size, memory_space space) : m_type(PTYPE_SIZE_SPACE), m_value((space << 4) | size) { assert(size >= SIZE_BYTE && size <= SIZE_DQWORD); assert(space >= SPACE_PROGRAM && space <= SPACE_IO); } + parameter(operand_size size, memory_scale scale) : m_type(PTYPE_SIZE_SCALE), m_value((scale << 4) | size) { assert(size >= SIZE_BYTE && size <= SIZE_QWORD); assert(scale >= SCALE_x1 && scale <= SCALE_x8); } + parameter(operand_size size, memory_space space) : m_type(PTYPE_SIZE_SPACE), m_value((space << 4) | size) { assert(size >= SIZE_BYTE && size <= SIZE_QWORD); assert(space >= SPACE_PROGRAM && space <= SPACE_IO); } parameter(code_handle &handle) : m_type(PTYPE_CODE_HANDLE), m_value(reinterpret_cast(&handle)) { } constexpr parameter(code_label const &label) : m_type(PTYPE_CODE_LABEL), m_value(label) { } // creators for types that don't safely default static parameter make_ireg(int regnum) { assert(regnum >= REG_I0 && regnum < REG_I_END); return parameter(PTYPE_INT_REGISTER, regnum); } static parameter make_freg(int regnum) { assert(regnum >= REG_F0 && regnum < REG_F_END); return parameter(PTYPE_FLOAT_REGISTER, regnum); } - static parameter make_vreg(int regnum) { assert(regnum >= REG_V0 && regnum < REG_V_END); return parameter(PTYPE_VECTOR_REGISTER, regnum); } static parameter make_mapvar(int mvnum) { assert(mvnum >= MAPVAR_M0 && mvnum < MAPVAR_END); return parameter(PTYPE_MAPVAR, mvnum); } static parameter make_memory(void *base) { return parameter(PTYPE_MEMORY, reinterpret_cast(base)); } static parameter make_memory(void const *base) { return parameter(PTYPE_MEMORY, reinterpret_cast(const_cast(base))); } - static parameter make_size(operand_size size) { assert(size >= SIZE_BYTE && size <= SIZE_DQWORD); return parameter(PTYPE_SIZE, size); } + static parameter make_size(operand_size size) { assert(size >= SIZE_BYTE && size <= SIZE_QWORD); return parameter(PTYPE_SIZE, size); } static parameter make_string(char const *string) { return parameter(PTYPE_STRING, reinterpret_cast(const_cast(string))); } static parameter make_cfunc(c_function func) { return parameter(PTYPE_C_FUNCTION, reinterpret_cast(func)); } static parameter make_rounding(float_rounding_mode mode) { assert(mode >= ROUND_TRUNC && mode <= ROUND_DEFAULT); return parameter(PTYPE_ROUNDING, mode); } @@ -331,7 +322,6 @@ namespace uml u64 immediate() const { assert(m_type == PTYPE_IMMEDIATE); return m_value; } int ireg() const { assert(m_type == PTYPE_INT_REGISTER); assert(m_value >= REG_I0 && m_value < REG_I_END); return m_value; } int freg() const { assert(m_type == PTYPE_FLOAT_REGISTER); assert(m_value >= REG_F0 && m_value < REG_F_END); return m_value; } - int vreg() const { assert(m_type == PTYPE_VECTOR_REGISTER); assert(m_value >= REG_V0 && m_value < REG_V_END); return m_value; } int mapvar() const { assert(m_type == PTYPE_MAPVAR); assert(m_value >= MAPVAR_M0 && m_value < MAPVAR_END); return m_value; } void *memory() const { assert(m_type == PTYPE_MEMORY); return reinterpret_cast(m_value); } operand_size size() const { assert(m_type == PTYPE_SIZE || m_type == PTYPE_SIZE_SCALE || m_type == PTYPE_SIZE_SPACE); return operand_size(m_value & 15); } @@ -347,7 +337,6 @@ namespace uml constexpr bool is_immediate() const { return m_type == PTYPE_IMMEDIATE; } constexpr bool is_int_register() const { return m_type == PTYPE_INT_REGISTER; } constexpr bool is_float_register() const { return m_type == PTYPE_FLOAT_REGISTER; } - constexpr bool is_vector_register() const { return m_type == PTYPE_VECTOR_REGISTER; } constexpr bool is_mapvar() const { return m_type == PTYPE_MAPVAR; } constexpr bool is_memory() const { return m_type == PTYPE_MEMORY; } constexpr bool is_size() const { return m_type == PTYPE_SIZE; } @@ -621,7 +610,6 @@ namespace uml // global inline functions to specify a register parameter by index inline parameter ireg(int n) { return parameter::make_ireg(REG_I0 + n); } inline parameter freg(int n) { return parameter::make_freg(REG_F0 + n); } - inline parameter vreg(int n) { return parameter::make_vreg(REG_V0 + n); } inline parameter mapvar(int n) { return parameter::make_mapvar(MAPVAR_M0 + n); } // global inline functions to define memory parameters @@ -650,17 +638,6 @@ namespace uml const parameter F8(parameter::make_freg(REG_F0 + 8)); const parameter F9(parameter::make_freg(REG_F0 + 9)); - const parameter V0(parameter::make_vreg(REG_V0 + 0)); - const parameter V1(parameter::make_vreg(REG_V0 + 1)); - const parameter V2(parameter::make_vreg(REG_V0 + 2)); - const parameter V3(parameter::make_vreg(REG_V0 + 3)); - const parameter V4(parameter::make_vreg(REG_V0 + 4)); - const parameter V5(parameter::make_vreg(REG_V0 + 5)); - const parameter V6(parameter::make_vreg(REG_V0 + 6)); - const parameter V7(parameter::make_vreg(REG_V0 + 7)); - const parameter V8(parameter::make_vreg(REG_V0 + 8)); - const parameter V9(parameter::make_vreg(REG_V0 + 9)); - const parameter M0(parameter::make_mapvar(MAPVAR_M0 + 0)); const parameter M1(parameter::make_mapvar(MAPVAR_M0 + 1)); const parameter M2(parameter::make_mapvar(MAPVAR_M0 + 2));