feat(gx): add directxmath for MinGW

This commit is contained in:
superp00t 2024-09-07 13:54:54 -04:00
parent 0d09dee4b3
commit 3e77eb935a
51 changed files with 49251 additions and 12 deletions

View File

@ -6,24 +6,24 @@ file(GLOB GX_SOURCES
"texture/*.cpp"
)
if(WHOA_SYSTEM_WIN)
if (WHOA_SYSTEM_WIN)
file(GLOB D3D_SOURCES "d3d/*.cpp")
list(APPEND GX_SOURCES ${D3D_SOURCES})
endif()
endif ()
if(WHOA_SYSTEM_MAC)
if (WHOA_SYSTEM_MAC)
file(GLOB GLL_SOURCES "gll/*.cpp" "gll/*.mm")
set_source_files_properties(${GLL_SOURCES}
PROPERTIES COMPILE_FLAGS "-x objective-c++"
)
list(APPEND GX_SOURCES ${GLL_SOURCES})
endif()
endif ()
# Build OpenGL/SDL graphics device if enabled
if(WHOA_BUILD_GLSDL)
if (WHOA_BUILD_GLSDL)
file(GLOB GLSDL_SOURCES "glsdl/*.cpp")
list(APPEND GX_SOURCES ${GLSDL_SOURCES})
endif()
endif ()
add_library(gx STATIC ${GX_SOURCES})
@ -46,12 +46,20 @@ target_link_libraries(gx
tempest
)
if(WHOA_SYSTEM_WIN)
if (WHOA_SYSTEM_WIN)
target_link_libraries(gx
PRIVATE
d3d9.lib
)
endif()
# MSVC includes DirectXMath by default
if (NOT MSVC)
target_link_libraries(gx
PRIVATE
DirectXMath
)
endif ()
endif ()
# Link SDL2 and GLEW for GLSDL
if (WHOA_BUILD_GLSDL)
@ -60,12 +68,12 @@ if (WHOA_BUILD_GLSDL)
SDL2::SDL2-static
libglew_static
)
endif()
endif ()
if(WHOA_SYSTEM_MAC)
if (WHOA_SYSTEM_MAC)
target_link_libraries(gx
PRIVATE
"-framework AppKit"
"-framework OpenGL"
)
endif()
endif ()

View File

@ -4,7 +4,7 @@
#include "gx/texture/CGxTex.hpp"
#include "math/Utils.hpp"
#include <algorithm>
#include <directxmath.h>
#include <DirectXMath.h>
int32_t CGxDeviceD3d::s_clientAdjustWidth;
int32_t CGxDeviceD3d::s_clientAdjustHeight;

View File

@ -0,0 +1,8 @@
# Auto detect text files and perform LF normalization
* text=auto
# Explicitly declare code/VS files as CRLF
*.cpp eol=crlf
*.cmd eol=crlf
*.h eol=crlf
*.inl eol=crlf

24
vendor/directxmath-3.19.0/.gitignore vendored Normal file
View File

@ -0,0 +1,24 @@
*.psess
*.vsp
*.log
*.err
*.wrn
*.suo
*.sdf
*.user
*.i
*.vspscc
*.opensdf
*.opendb
*.ipch
*.cache
*.tlog
*.lastbuildstate
*.ilk
*.VC.db
*.nupkg
.vs
/Tests
/wiki
/out
/CMakeUserPresets.json

View File

@ -0,0 +1,33 @@
<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
<metadata minClientVersion="2.8.6">
<id>directxmath</id>
<version>0.0.0-SpecifyVersionOnCommandline</version>
<title>DirectXMath</title>
<authors>Microsoft</authors>
<owners>microsoft,directxtk</owners>
<summary>DirectXMath is an all inline SIMD C++ linear algebra library for use in games and graphics apps.</summary>
<description>The DirectXMath API provides SIMD-friendly C++ types and functions for common linear algebra and graphics math operations common to DirectX applications. The library provides optimized versions for Windows 32-bit (x86), Windows 64-bit (x64), and Windows on ARM through SSE2 and ARM-NEON intrinsics support in the Visual Studio compiler.</description>
<releaseNotes>Matches the February 2024 release.</releaseNotes>
<projectUrl>http://go.microsoft.com/fwlink/?LinkID=615560</projectUrl>
<repository type="git" url="https://github.com/microsoft/DirectXMath.git" />
<icon>images\icon.jpg</icon>
<readme>docs\README.md</readme>
<license type="expression">MIT</license>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<copyright>&#169; Microsoft Corporation. All rights reserved.</copyright>
<tags>C++ native DirectX math nativepackage</tags>
</metadata>
<files>
<file target="docs" src="*.md" />
<file target="include" src="Inc\*" />
<file src=".nuget/directxmath.targets" target="build\native" />
<file src=".nuget/icon.jpg" target="images\" />
</files>
</package>

View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="utf-8"?>
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemDefinitionGroup>
<ClCompile>
<PreprocessorDefinitions>HAS_DIRECTXMATH;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(MSBuildThisFileDirectory)..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
</Project>

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8" ?>
<SignConfigXML>
<job dest="__OUTPATHROOT__" certSubject="NuGet" jobname="NugetSigningTest">
<file src="__INPATHROOT__\directxmath*.nupkg" signType="CP-401405" dest="__OUTPATHROOT__\directxmath*.nupkg" />
</job>
</SignConfigXML>

113
vendor/directxmath-3.19.0/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,113 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
cmake_minimum_required (VERSION 3.20)
set(DIRECTXMATH_VERSION 3.1.9)
project(DirectXMath
VERSION ${DIRECTXMATH_VERSION}
DESCRIPTION "DirectXMath SIMD C++ math library"
HOMEPAGE_URL "https://go.microsoft.com/fwlink/?LinkID=615560"
LANGUAGES CXX)
include(GNUInstallDirs)
#--- Library
set(LIBRARY_HEADERS
Inc/DirectXCollision.h
Inc/DirectXCollision.inl
Inc/DirectXColors.h
Inc/DirectXMath.h
Inc/DirectXMathConvert.inl
Inc/DirectXMathMatrix.inl
Inc/DirectXMathMisc.inl
Inc/DirectXMathVector.inl
Inc/DirectXPackedVector.h
Inc/DirectXPackedVector.inl)
add_library(${PROJECT_NAME} INTERFACE)
target_include_directories(${PROJECT_NAME} INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/Inc>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/directxmath>)
target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_11)
#--- Package
include(CMakePackageConfigHelpers)
string(TOLOWER ${PROJECT_NAME} PACKAGE_NAME)
write_basic_package_version_file(
${PACKAGE_NAME}-config-version.cmake
VERSION ${DIRECTXMATH_VERSION}
COMPATIBILITY AnyNewerVersion
ARCH_INDEPENDENT)
install(TARGETS ${PROJECT_NAME}
EXPORT ${PROJECT_NAME}-targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/build/${PROJECT_NAME}-config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}-config.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PACKAGE_NAME})
install(EXPORT ${PROJECT_NAME}-targets
FILE ${PROJECT_NAME}-targets.cmake
NAMESPACE Microsoft::
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PACKAGE_NAME})
install(FILES ${LIBRARY_HEADERS}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/directxmath)
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}-config.cmake
${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}-config-version.cmake
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PACKAGE_NAME})
# Create pkg-config file
include(build/JoinPaths.cmake)
# from: https://github.com/jtojnar/cmake-snips#concatenating-paths-when-building-pkg-config-files
join_paths(DIRECTXMATH_INCLUDEDIR_FOR_PKG_CONFIG "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}")
join_paths(DIRECTXMATH_LIBDIR_FOR_PKG_CONFIG "\${prefix}" "${CMAKE_INSTALL_LIBDIR}")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/build/DirectXMath.pc.in"
"${CMAKE_CURRENT_BINARY_DIR}/DirectXMath.pc" @ONLY)
# Install the pkg-config file
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DirectXMath.pc"
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
#--- Test suite
if(DEFINED VCPKG_TARGET_ARCHITECTURE)
set(DXMATH_ARCHITECTURE ${VCPKG_TARGET_ARCHITECTURE})
elseif(CMAKE_GENERATOR_PLATFORM MATCHES "^[Ww][Ii][Nn]32$")
set(DXMATH_ARCHITECTURE x86)
elseif(CMAKE_GENERATOR_PLATFORM MATCHES "^[Xx]64$")
set(DXMATH_ARCHITECTURE x64)
elseif(CMAKE_GENERATOR_PLATFORM MATCHES "^[Aa][Rr][Mm]$")
set(DXMATH_ARCHITECTURE arm)
elseif(CMAKE_GENERATOR_PLATFORM MATCHES "^[Aa][Rr][Mm]64$")
set(DXMATH_ARCHITECTURE arm64)
elseif(CMAKE_VS_PLATFORM_NAME_DEFAULT MATCHES "^[Ww][Ii][Nn]32$")
set(DXMATH_ARCHITECTURE x86)
elseif(CMAKE_VS_PLATFORM_NAME_DEFAULT MATCHES "^[Xx]64$")
set(DXMATH_ARCHITECTURE x64)
elseif(CMAKE_VS_PLATFORM_NAME_DEFAULT MATCHES "^[Aa][Rr][Mm]$")
set(DXMATH_ARCHITECTURE arm)
elseif(CMAKE_VS_PLATFORM_NAME_DEFAULT MATCHES "^[Aa][Rr][Mm]64$")
set(DXMATH_ARCHITECTURE arm64)
elseif(NOT (DEFINED DXMATH_ARCHITECTURE))
set(DXMATH_ARCHITECTURE "x64")
endif()
#--- Test suite
include(CTest)
if(BUILD_TESTING AND WIN32 AND (NOT WINDOWS_STORE) AND (EXISTS "${CMAKE_CURRENT_LIST_DIR}/Tests/CMakeLists.txt"))
enable_testing()
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/Tests)
endif()

View File

@ -0,0 +1,175 @@
{
"version": 2,
"configurePresets": [
{
"name": "base",
"displayName": "Basic Config",
"description": "Basic build using Ninja generator",
"generator": "Ninja",
"hidden": true,
"binaryDir": "${sourceDir}/out/build/${presetName}",
"cacheVariables": { "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}" }
},
{
"name": "x64",
"architecture": {
"value": "x64",
"strategy": "external"
},
"cacheVariables": { "DXMATH_ARCHITECTURE": "x64" },
"hidden": true
},
{
"name": "x86",
"architecture": {
"value": "x86",
"strategy": "external"
},
"cacheVariables": { "DXMATH_ARCHITECTURE": "x86" },
"hidden": true
},
{
"name": "ARM",
"architecture": {
"value": "arm",
"strategy": "external"
},
"cacheVariables": { "DXMATH_ARCHITECTURE": "arm" },
"hidden": true
},
{
"name": "ARM64",
"architecture": {
"value": "arm64",
"strategy": "external"
},
"cacheVariables": { "DXMATH_ARCHITECTURE": "arm64" },
"hidden": true
},
{
"name": "Debug",
"cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" },
"hidden": true
},
{
"name": "Release",
"cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" },
"hidden": true
},
{
"name": "OneCore",
"cacheVariables": { "BUILD_FOR_ONECORE": true },
"hidden": true
},
{
"name": "AVX",
"cacheVariables": { "BUILD_AVX_TEST": true },
"hidden": true
},
{
"name": "AVX2",
"cacheVariables": { "BUILD_AVX2_TEST": true },
"hidden": true
},
{
"name": "F16C",
"cacheVariables": { "BUILD_F16C_TEST": true },
"hidden": true
},
{
"name": "NI",
"cacheVariables": { "BUILD_NO_INTRINSICS": true },
"hidden": true
},
{
"name": "MSVC",
"hidden": true,
"cacheVariables": {
"CMAKE_CXX_COMPILER": "cl.exe"
},
"toolset": {
"value": "host=x64",
"strategy": "external"
}
},
{
"name": "Clang",
"hidden": true,
"cacheVariables": {
"CMAKE_CXX_COMPILER": "clang-cl.exe"
},
"toolset": {
"value": "host=x64",
"strategy": "external"
}
},
{
"name": "GNUC",
"hidden": true,
"cacheVariables": {
"CMAKE_CXX_COMPILER": "g++.exe"
},
"toolset": {
"value": "host=x64",
"strategy": "external"
}
},
{
"name": "Intel",
"hidden": true,
"cacheVariables": {
"CMAKE_CXX_COMPILER": "icl.exe"
},
"toolset": {
"value": "host=x64",
"strategy": "external"
}
},
{
"name": "IntelLLVM",
"hidden": true,
"cacheVariables": {
"CMAKE_CXX_COMPILER": "icx.exe"
},
"toolset": {
"value": "host=x64",
"strategy": "external"
}
},
{ "name": "x64-Debug" , "description": "MSVC for x64 (Debug) - SSE/SSE2", "inherits": [ "base", "x64", "Debug", "MSVC" ] },
{ "name": "x64-Release" , "description": "MSVC for x64 (Release) - SSE/SSE2", "inherits": [ "base", "x64", "Release", "MSVC" ] },
{ "name": "x86-Debug" , "description": "MSVC for x86 (Debug) - SSE/SSE2", "inherits": [ "base", "x86", "Debug", "MSVC" ] },
{ "name": "x86-Release" , "description": "MSVC for x86 (Release) - SSE/SSE2", "inherits": [ "base", "x86", "Release", "MSVC" ] },
{ "name": "arm-Debug" , "description": "MSVC for ARM (Debug) - ARM-NEON", "inherits": [ "base", "ARM", "Debug", "MSVC" ] },
{ "name": "arm-Release" , "description": "MSVC for ARM (Release) - ARM-NEON", "inherits": [ "base", "ARM", "Release", "MSVC" ] },
{ "name": "arm64-Debug" , "description": "MSVC for ARM64 (Debug) - ARM-NEON", "inherits": [ "base", "ARM64", "Debug", "MSVC" ] },
{ "name": "arm64-Release", "description": "MSVC for ARM64 (Release) - ARM-NEON", "inherits": [ "base", "ARM64", "Release", "MSVC" ] },
{ "name": "x64-Debug-Clang" , "description": "Clang/LLVM for x64 (Debug) - SSE/SSE2", "inherits": [ "base", "x64", "Debug", "Clang" ] },
{ "name": "x64-Release-Clang" , "description": "Clang/LLVM for x64 (Release) - SSE/SSE2", "inherits": [ "base", "x64", "Release", "Clang" ] },
{ "name": "x86-Debug-Clang" , "description": "Clang/LLVM for x86 (Debug) - SSE/SSE2", "inherits": [ "base", "x86", "Debug", "Clang" ], "environment": { "CXXFLAGS": "-m32" } },
{ "name": "x86-Release-Clang" , "description": "Clang/LLVM for x86 (Release) - SSE/SSE2", "inherits": [ "base", "x86", "Release", "Clang" ], "environment": { "CXXFLAGS": "-m32" } },
{ "name": "arm64-Debug-Clang" , "description": "Clang/LLVM for AArch64 (Debug) - ARM-NEON", "inherits": [ "base", "ARM64", "Debug", "Clang" ], "environment": { "CXXFLAGS": "--target=arm64-pc-windows-msvc" } },
{ "name": "arm64-Release-Clang", "description": "Clang/LLVM for AArch64 (Release) - ARM-NEON", "inherits": [ "base", "ARM64", "Release", "Clang" ], "environment": { "CXXFLAGS": "--target=arm64-pc-windows-msvc" } }
],
"testPresets": [
{ "name": "x64-Debug" , "configurePreset": "x64-Debug" },
{ "name": "x64-Release" , "configurePreset": "x64-Release" },
{ "name": "x86-Debug" , "configurePreset": "x86-Debug" },
{ "name": "x86-Release" , "configurePreset": "x86-Release" },
{ "name": "arm64-Debug" , "configurePreset": "arm64-Debug" },
{ "name": "arm64-Release", "configurePreset": "arm64-Release" },
{ "name": "x64-Debug-Clang" , "configurePreset": "x64-Debug-Clang" },
{ "name": "x64-Release-Clang" , "configurePreset": "x64-Release-Clang" },
{ "name": "x86-Debug-Clang" , "configurePreset": "x86-Debug-Clang" },
{ "name": "x86-Release-Clang" , "configurePreset": "x86-Release-Clang" },
{ "name": "arm64-Debug-Clang" , "configurePreset": "arm64-Debug-Clang" },
{ "name": "arm64-Release-Clang", "configurePreset": "arm64-Release-Clang" }
]
}

View File

@ -0,0 +1,275 @@
//-------------------------------------------------------------------------------------
// DirectXMathAVX.h -- AVX (version 1) extensions for SIMD C++ Math library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
#error AVX not supported on ARM platform
#endif
#include <DirectXMath.h>
namespace DirectX
{
namespace AVX
{
inline bool XMVerifyAVXSupport()
{
// Should return true for AMD Bulldozer, Intel "Sandy Bridge", and Intel "Ivy Bridge" or later processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid( CPUInfo, 0 );
#endif
if ( CPUInfo[0] < 1 )
return false;
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 1 );
#endif
// We check for AVX, OSXSAVE, SSSE4.1, and SSE3
return ( (CPUInfo[2] & 0x18080001) == 0x18080001 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVectorReplicatePtr( _In_ const float *pValue )
{
return _mm_broadcast_ss( pValue );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatX( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(0, 0, 0, 0) );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatY( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(1, 1, 1, 1) );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatZ( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(2, 2, 2, 2) );
}
inline XMVECTOR XM_CALLCONV XMVectorSplatW( FXMVECTOR V )
{
return _mm_permute_ps( V, _MM_SHUFFLE(3, 3, 3, 3) );
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle( FXMVECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3 )
{
assert( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
_Analysis_assume_( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
unsigned int elem[4] = { E0, E1, E2, E3 };
__m128i vControl = _mm_loadu_si128( reinterpret_cast<const __m128i *>(&elem[0]) );
return _mm_permutevar_ps( V, vControl );
}
inline XMVECTOR XM_CALLCONV XMVectorPermute( FXMVECTOR V1, FXMVECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW )
{
assert( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
_Analysis_assume_( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
static const XMVECTORU32 three = { { { 3, 3, 3, 3 } } };
XM_ALIGNED_DATA(16) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW };
__m128i vControl = _mm_load_si128( reinterpret_cast<const __m128i *>(&elem[0]) );
__m128i vSelect = _mm_cmpgt_epi32( vControl, three );
vControl = _mm_castps_si128( _mm_and_ps( _mm_castsi128_ps( vControl ), three ) );
__m128 shuffled1 = _mm_permutevar_ps( V1, vControl );
__m128 shuffled2 = _mm_permutevar_ps( V2, vControl );
__m128 masked1 = _mm_andnot_ps( _mm_castsi128_ps( vSelect ), shuffled1 );
__m128 masked2 = _mm_and_ps( _mm_castsi128_ps( vSelect ), shuffled2 );
return _mm_or_ps( masked1, masked2 );
}
inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements)
{
assert( Elements < 4 );
_Analysis_assume_( Elements < 4 );
return AVX::XMVectorPermute(V1, V2, Elements, ((Elements) + 1), ((Elements) + 2), ((Elements) + 3));
}
inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements)
{
assert( Elements < 4 );
_Analysis_assume_( Elements < 4 );
return AVX::XMVectorSwizzle( V, Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3 );
}
inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, uint32_t Elements)
{
assert( Elements < 4 );
_Analysis_assume_( Elements < 4 );
return AVX::XMVectorSwizzle( V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, (6 - (Elements)) & 3, (7 - (Elements)) & 3 );
}
//-------------------------------------------------------------------------------------
// Permute Templates
//-------------------------------------------------------------------------------------
namespace Internal
{
// Slow path fallback for permutes that do not map to a single SSE opcode.
template<uint32_t Shuffle, bool WhichX, bool WhichY, bool WhichZ, bool WhichW> struct PermuteHelper
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2)
{
static const XMVECTORU32 selectMask =
{ { {
WhichX ? 0xFFFFFFFF : 0,
WhichY ? 0xFFFFFFFF : 0,
WhichZ ? 0xFFFFFFFF : 0,
WhichW ? 0xFFFFFFFF : 0,
} } };
XMVECTOR shuffled1 = _mm_permute_ps(v1, Shuffle);
XMVECTOR shuffled2 = _mm_permute_ps(v2, Shuffle);
XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
return _mm_or_ps(masked1, masked2);
}
};
// Fast path for permutes that only read from the first vector.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, false, false>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return _mm_permute_ps(v1, Shuffle); }
};
// Fast path for permutes that only read from the second vector.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, true, true>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return _mm_permute_ps(v2, Shuffle); }
};
// Fast path for permutes that read XY from the first vector, ZW from the second.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, true, true>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v1, v2, Shuffle); }
};
// Fast path for permutes that read XY from the second vector, ZW from the first.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, false, false>
{
static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v2, v1, Shuffle); }
};
};
// General permute template
template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW>
inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2)
{
static_assert(PermuteX <= 7, "PermuteX template parameter out of range");
static_assert(PermuteY <= 7, "PermuteY template parameter out of range");
static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range");
static_assert(PermuteW <= 7, "PermuteW template parameter out of range");
const uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
const bool WhichX = PermuteX > 3;
const bool WhichY = PermuteY > 3;
const bool WhichZ = PermuteZ > 3;
const bool WhichW = PermuteW > 3;
return AVX::Internal::PermuteHelper<Shuffle, WhichX, WhichY, WhichZ, WhichW>::Permute(V1, V2);
}
// Special-case permute templates
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,3>(FXMVECTOR V1, FXMVECTOR) { return V1; }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,7>(FXMVECTOR, FXMVECTOR V2) { return V2; }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x1); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x2); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x3); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x4); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x5); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x6); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x7); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x8); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x9); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xA); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xB); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xC); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xD); }
template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xE); }
//-------------------------------------------------------------------------------------
// Swizzle Templates
//-------------------------------------------------------------------------------------
// General swizzle template
template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t SwizzleW>
inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V)
{
static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
return _mm_permute_ps( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) );
}
// Specialized swizzles
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; }
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); }
template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); }
//-------------------------------------------------------------------------------------
// Other Templates
//-------------------------------------------------------------------------------------
template<uint32_t Elements>
inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2)
{
static_assert( Elements < 4, "Elements template parameter out of range" );
return AVX::XMVectorPermute<Elements, (Elements + 1), (Elements + 2), (Elements + 3)>(V1, V2);
}
template<uint32_t Elements>
inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V)
{
static_assert( Elements < 4, "Elements template parameter out of range" );
return AVX::XMVectorSwizzle<Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3>(V);
}
template<uint32_t Elements>
inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V)
{
static_assert( Elements < 4, "Elements template parameter out of range" );
return AVX::XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V);
}
} // namespace AVX
} // namespace DirectX;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,95 @@
//-------------------------------------------------------------------------------------
// DirectXMathBE.h -- Big-endian swap extensions for SIMD C++ Math library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64)
#include <tmmintrin.h>
#endif
#include <DirectXMath.h>
namespace DirectX
{
inline XMVECTOR XM_CALLCONV XMVectorEndian
(
FXMVECTOR V
)
{
#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
static const XMVECTORU32 idx = { { { 0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu } } };
uint8x8x2_t tbl;
tbl.val[0] = vreinterpret_u8_f32(vget_low_f32(V));
tbl.val[1] = vreinterpret_u8_f32(vget_high_f32(V));
const uint8x8_t rL = vtbl2_u8(tbl, vget_low_u32(idx));
const uint8x8_t rH = vtbl2_u8(tbl, vget_high_u32(idx));
return vcombine_f32(vreinterpret_f32_u8(rL), vreinterpret_f32_u8(rH));
#else
XMVECTORU32 E;
E.v = V;
uint32_t value = E.u[0];
E.u[0] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
value = E.u[1];
E.u[1] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
value = E.u[2];
E.u[2] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
value = E.u[3];
E.u[3] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
return E.v;
#endif
}
#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64)
namespace SSSE3
{
inline bool XMVerifySSSE3Support()
{
// Should return true on AMD Bulldozer, Intel Core i7/i5/i3, Intel Atom, or later processors
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = { -1 };
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 0);
#endif
if ( CPUInfo[0] < 1 )
return false;
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 1);
#endif
// Check for SSSE3 instruction set.
return ( (CPUInfo[2] & 0x200) != 0 );
}
inline XMVECTOR XM_CALLCONV XMVectorEndian
(
FXMVECTOR V
)
{
static const XMVECTORU32 idx = { { { 0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu } } };
__m128i Result = _mm_shuffle_epi8( _mm_castps_si128(V), idx );
return _mm_castsi128_ps( Result );
}
} // namespace SSSE3
#endif // X86 || X64
} // namespace DirectX

View File

@ -0,0 +1,471 @@
//-------------------------------------------------------------------------------------
// DirectXMathF16C.h -- F16C/CVT16 extensions for SIMD C++ Math library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
#error F16C not supported on ARM platform
#endif
#include <DirectXMath.h>
#include <DirectXPackedVector.h>
namespace DirectX
{
namespace F16C
{
inline bool XMVerifyF16CSupport()
{
// Should return true for AMD "Piledriver" and Intel "Ivy Bridge" processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = { -1 };
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 0);
#endif
if ( CPUInfo[0] < 1 )
return false;
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 1);
#endif
// We check for F16C, AVX, OSXSAVE, and SSE4.1
return ( (CPUInfo[2] & 0x38080000 ) == 0x38080000 );
}
//-------------------------------------------------------------------------------------
// Data conversion
//-------------------------------------------------------------------------------------
inline float XMConvertHalfToFloat( PackedVector::HALF Value )
{
__m128i V1 = _mm_cvtsi32_si128( static_cast<int>(Value) );
__m128 V2 = _mm_cvtph_ps( V1 );
return _mm_cvtss_f32( V2 );
}
inline PackedVector::HALF XMConvertFloatToHalf( float Value )
{
__m128 V1 = _mm_set_ss( Value );
__m128i V2 = _mm_cvtps_ph( V1, 0 );
return static_cast<PackedVector::HALF>( _mm_cvtsi128_si32(V2) );
}
inline float* XMConvertHalfToFloatStream
(
_Out_writes_bytes_(sizeof(float) + OutputStride * (HalfCount - 1)) float* pOutputStream,
_In_ size_t OutputStride,
_In_reads_bytes_(2 + InputStride * (HalfCount - 1)) const PackedVector::HALF* pInputStream,
_In_ size_t InputStride,
_In_ size_t HalfCount
)
{
using namespace PackedVector;
assert(pOutputStream);
assert(pInputStream);
assert(InputStride >= sizeof(HALF));
assert(OutputStride >= sizeof(float));
auto pHalf = reinterpret_cast<const uint8_t*>(pInputStream);
auto pFloat = reinterpret_cast<uint8_t*>(pOutputStream);
size_t i = 0;
size_t four = HalfCount >> 2;
if (four > 0)
{
if (InputStride == sizeof(HALF))
{
if (OutputStride == sizeof(float))
{
if ((reinterpret_cast<uintptr_t>(pFloat) & 0xF) == 0)
{
// Packed input, aligned & packed output
for (size_t j = 0; j < four; ++j)
{
__m128i HV = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(pHalf));
pHalf += InputStride * 4;
__m128 FV = _mm_cvtph_ps(HV);
_mm_stream_ps(reinterpret_cast<float*>(pFloat), FV);
pFloat += OutputStride * 4;
i += 4;
}
}
else
{
// Packed input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128i HV = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(pHalf));
pHalf += InputStride * 4;
__m128 FV = _mm_cvtph_ps(HV);
_mm_storeu_ps(reinterpret_cast<float*>(pFloat), FV);
pFloat += OutputStride * 4;
i += 4;
}
}
}
else
{
// Packed input, scattered output
for (size_t j = 0; j < four; ++j)
{
__m128i HV = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(pHalf));
pHalf += InputStride * 4;
__m128 FV = _mm_cvtph_ps(HV);
_mm_store_ss(reinterpret_cast<float*>(pFloat), FV);
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 1);
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 2);
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 3);
pFloat += OutputStride;
i += 4;
}
}
}
else if (OutputStride == sizeof(float))
{
if ((reinterpret_cast<uintptr_t>(pFloat) & 0xF) == 0)
{
// Scattered input, aligned & packed output
for (size_t j = 0; j < four; ++j)
{
uint16_t H1 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H2 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H3 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H4 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
__m128i HV = _mm_setzero_si128();
HV = _mm_insert_epi16(HV, H1, 0);
HV = _mm_insert_epi16(HV, H2, 1);
HV = _mm_insert_epi16(HV, H3, 2);
HV = _mm_insert_epi16(HV, H4, 3);
__m128 FV = _mm_cvtph_ps(HV);
_mm_stream_ps(reinterpret_cast<float*>(pFloat), FV);
pFloat += OutputStride * 4;
i += 4;
}
}
else
{
// Scattered input, packed output
for (size_t j = 0; j < four; ++j)
{
uint16_t H1 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H2 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H3 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H4 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
__m128i HV = _mm_setzero_si128();
HV = _mm_insert_epi16(HV, H1, 0);
HV = _mm_insert_epi16(HV, H2, 1);
HV = _mm_insert_epi16(HV, H3, 2);
HV = _mm_insert_epi16(HV, H4, 3);
__m128 FV = _mm_cvtph_ps(HV);
_mm_storeu_ps(reinterpret_cast<float*>(pFloat), FV);
pFloat += OutputStride * 4;
i += 4;
}
}
}
else
{
// Scattered input, scattered output
for (size_t j = 0; j < four; ++j)
{
uint16_t H1 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H2 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H3 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
uint16_t H4 = *reinterpret_cast<const HALF*>(pHalf);
pHalf += InputStride;
__m128i HV = _mm_setzero_si128();
HV = _mm_insert_epi16(HV, H1, 0);
HV = _mm_insert_epi16(HV, H2, 1);
HV = _mm_insert_epi16(HV, H3, 2);
HV = _mm_insert_epi16(HV, H4, 3);
__m128 FV = _mm_cvtph_ps(HV);
_mm_store_ss(reinterpret_cast<float*>(pFloat), FV);
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 1);
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 2);
pFloat += OutputStride;
*reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 3);
pFloat += OutputStride;
i += 4;
}
}
}
for (; i < HalfCount; ++i)
{
*reinterpret_cast<float*>(pFloat) = XMConvertHalfToFloat(reinterpret_cast<const HALF*>(pHalf)[0]);
pHalf += InputStride;
pFloat += OutputStride;
}
return pOutputStream;
}
inline PackedVector::HALF* XMConvertFloatToHalfStream
(
_Out_writes_bytes_(2 + OutputStride * (FloatCount - 1)) PackedVector::HALF* pOutputStream,
_In_ size_t OutputStride,
_In_reads_bytes_(sizeof(float) + InputStride * (FloatCount - 1)) const float* pInputStream,
_In_ size_t InputStride,
_In_ size_t FloatCount
)
{
using namespace PackedVector;
assert(pOutputStream);
assert(pInputStream);
assert(InputStride >= sizeof(float));
assert(OutputStride >= sizeof(HALF));
auto pFloat = reinterpret_cast<const uint8_t*>(pInputStream);
auto pHalf = reinterpret_cast<uint8_t*>(pOutputStream);
size_t i = 0;
size_t four = FloatCount >> 2;
if (four > 0)
{
if (InputStride == sizeof(float))
{
if (OutputStride == sizeof(HALF))
{
if ((reinterpret_cast<uintptr_t>(pFloat) & 0xF) == 0)
{
// Aligned and packed input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_load_ps(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride * 4;
__m128i HV = _mm_cvtps_ph(FV, 0);
_mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
pHalf += OutputStride * 4;
i += 4;
}
}
else
{
// Packed input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_loadu_ps(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride * 4;
__m128i HV = _mm_cvtps_ph(FV, 0);
_mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
pHalf += OutputStride * 4;
i += 4;
}
}
}
else
{
if ((reinterpret_cast<uintptr_t>(pFloat) & 0xF) == 0)
{
// Aligned & packed input, scattered output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_load_ps(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride * 4;
__m128i HV = _mm_cvtps_ph(FV, 0);
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 1));
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 2));
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 3));
pHalf += OutputStride;
i += 4;
}
}
else
{
// Packed input, scattered output
for (size_t j = 0; j < four; ++j)
{
__m128 FV = _mm_loadu_ps(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride * 4;
__m128i HV = _mm_cvtps_ph(FV, 0);
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 1));
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 2));
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 3));
pHalf += OutputStride;
i += 4;
}
}
}
}
else if (OutputStride == sizeof(HALF))
{
// Scattered input, packed output
for (size_t j = 0; j < four; ++j)
{
__m128 FV1 = _mm_load_ss(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride;
__m128 FV2 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride;
__m128 FV3 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride;
__m128 FV4 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride;
__m128 FV = _mm_blend_ps(FV1, FV2, 0x2);
__m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
FV = _mm_blend_ps(FV, FT, 0xC);
__m128i HV = _mm_cvtps_ph(FV, 0);
_mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
pHalf += OutputStride * 4;
i += 4;
}
}
else
{
// Scattered input, scattered output
for (size_t j = 0; j < four; ++j)
{
__m128 FV1 = _mm_load_ss(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride;
__m128 FV2 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride;
__m128 FV3 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride;
__m128 FV4 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
pFloat += InputStride;
__m128 FV = _mm_blend_ps(FV1, FV2, 0x2);
__m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
FV = _mm_blend_ps(FV, FT, 0xC);
__m128i HV = _mm_cvtps_ph(FV, 0);
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 1));
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 2));
pHalf += OutputStride;
*reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 3));
pHalf += OutputStride;
i += 4;
}
}
}
for (; i < FloatCount; ++i)
{
*reinterpret_cast<HALF*>(pHalf) = XMConvertFloatToHalf(reinterpret_cast<const float*>(pFloat)[0]);
pFloat += InputStride;
pHalf += OutputStride;
}
return pOutputStream;
}
//-------------------------------------------------------------------------------------
// Half2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMLoadHalf2( _In_ const PackedVector::XMHALF2* pSource )
{
assert(pSource);
__m128 V = _mm_load_ss( reinterpret_cast<const float*>(pSource) );
return _mm_cvtph_ps( _mm_castps_si128( V ) );
}
inline void XM_CALLCONV XMStoreHalf2( _Out_ PackedVector::XMHALF2* pDestination, _In_ FXMVECTOR V )
{
assert(pDestination);
__m128i V1 = _mm_cvtps_ph( V, 0 );
_mm_store_ss( reinterpret_cast<float*>(pDestination), _mm_castsi128_ps(V1) );
}
//-------------------------------------------------------------------------------------
// Half4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMLoadHalf4( _In_ const PackedVector::XMHALF4* pSource )
{
assert(pSource);
__m128i V = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pSource) );
return _mm_cvtph_ps( V );
}
inline void XM_CALLCONV XMStoreHalf4( _Out_ PackedVector::XMHALF4* pDestination, _In_ FXMVECTOR V )
{
assert(pDestination);
__m128i V1 = _mm_cvtps_ph( V, 0 );
_mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), V1 );
}
} // namespace F16C
} // namespace DirectX

View File

@ -0,0 +1,391 @@
//-------------------------------------------------------------------------------------
// DirectXMathFMA3.h -- FMA3 extensions for SIMD C++ Math library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
#error FMA3 not supported on ARM platform
#endif
#include <DirectXMath.h>
namespace DirectX
{
namespace FMA3
{
inline bool XMVerifyFMA3Support()
{
// Should return true for AMD "Pildriver" and Intel "Haswell" processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 0);
#endif
if ( CPUInfo[0] < 1 )
return false;
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 1);
#endif
// We check for FMA3, AVX, OSXSAVE
return ( (CPUInfo[2] & 0x18001000) == 0x18001000 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_fmadd_ps( V1, V2, V3 );
}
inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_fnmadd_ps( V1, V2, V3 );
}
//-------------------------------------------------------------------------------------
// Vector2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector2Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_mul_ps( vResult, M.r[1] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector3
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector3Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_mul_ps( vResult, M.r[2] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
inline XMVECTOR XM_CALLCONV XMVector3Project
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
const float HalfViewportWidth = ViewportWidth * 0.5f;
const float HalfViewportHeight = ViewportHeight * 0.5f;
XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View);
Transform = FMA3::XMMatrixMultiply(Transform, Projection);
XMVECTOR Result = FMA3::XMVector3TransformCoord(V, Transform);
Result = FMA3::XMVectorMultiplyAdd(Result, Scale, Offset);
return Result;
}
inline XMVECTOR XM_CALLCONV XMVector3Unproject
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } };
XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
Scale = XMVectorReciprocal(Scale);
XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
Offset = FMA3::XMVectorMultiplyAdd(Scale, Offset, D.v);
XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View);
Transform = FMA3::XMMatrixMultiply(Transform, Projection);
Transform = XMMatrixInverse(nullptr, Transform);
XMVECTOR Result = FMA3::XMVectorMultiplyAdd(V, Scale, Offset);
return FMA3::XMVector3TransformCoord(Result, Transform);
}
//-------------------------------------------------------------------------------------
// Vector4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector4Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
vResult = _mm_mul_ps( vResult, M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_fmadd_ps( vTemp, M.r[2], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Matrix
//-------------------------------------------------------------------------------------
inline XMMATRIX XM_CALLCONV XMMatrixMultiply
(
CXMMATRIX M1,
CXMMATRIX M2
)
{
XMMATRIX mResult;
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[0] = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[1] = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[2] = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
mResult.r[3] = vX;
return mResult;
}
inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
(
FXMMATRIX M1,
CXMMATRIX M2
)
{
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r0 = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r1 = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r2 = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_fmadd_ps(vY,M2.r[1],vX);
vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
vX = _mm_fmadd_ps(vW,M2.r[3],vX);
__m128 r3 = vX;
// x.x,x.y,y.x,y.y
XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
// x.z,x.w,y.z,y.w
XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
// z.x,z.y,w.x,w.y
XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
// z.z,z.w,w.z,w.w
XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
XMMATRIX mResult;
// x.x,y.x,z.x,w.x
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
// x.y,y.y,z.y,w.y
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
// x.z,y.z,z.z,w.z
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
// x.w,y.w,z.w,w.w
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
return mResult;
}
} // namespace FMA3
} // namespace DirectX;

View File

@ -0,0 +1,415 @@
//-------------------------------------------------------------------------------------
// DirectXMathFMA4.h -- FMA4 extensions for SIMD C++ Math library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
#error FMA4 not supported on ARM platform
#endif
#include <DirectXMath.h>
#include <ammintrin.h>
#ifdef __GNUC__
#include <x86intrin.h>
#endif
namespace DirectX
{
namespace FMA4
{
inline bool XMVerifyFMA4Support()
{
// Should return true for AMD Bulldozer processors
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 0);
#endif
if ( CPUInfo[0] < 1 )
return false;
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 1);
#endif
// We check for AVX, OSXSAVE (required to access FMA4)
if ( (CPUInfo[2] & 0x18000000) != 0x18000000 )
return false;
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(0x80000000, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 0x80000000);
#endif
if ( uint32_t(CPUInfo[0]) < 0x80000001u )
return false;
// We check for FMA4
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(0x80000001, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 0x80000001);
#endif
return ( CPUInfo[2] & 0x10000 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_macc_ps( V1, V2, V3 );
}
inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
(
FXMVECTOR V1,
FXMVECTOR V2,
FXMVECTOR V3
)
{
return _mm_nmacc_ps( V1, V2, V3 );
}
//-------------------------------------------------------------------------------------
// Vector2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector2Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_mul_ps( vResult, M.r[1] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector3
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector3Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
vResult = _mm_div_ps( vResult, W );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_mul_ps( vResult, M.r[2] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
inline XMVECTOR XM_CALLCONV XMVector3Project
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
const float HalfViewportWidth = ViewportWidth * 0.5f;
const float HalfViewportHeight = ViewportHeight * 0.5f;
XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
Transform = FMA4::XMMatrixMultiply(Transform, Projection);
XMVECTOR Result = FMA4::XMVector3TransformCoord(V, Transform);
Result = FMA4::XMVectorMultiplyAdd(Result, Scale, Offset);
return Result;
}
inline XMVECTOR XM_CALLCONV XMVector3Unproject
(
FXMVECTOR V,
float ViewportX,
float ViewportY,
float ViewportWidth,
float ViewportHeight,
float ViewportMinZ,
float ViewportMaxZ,
CXMMATRIX Projection,
CXMMATRIX View,
CXMMATRIX World
)
{
static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } };
XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
Scale = XMVectorReciprocal(Scale);
XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
Offset = FMA4::XMVectorMultiplyAdd(Scale, Offset, D.v);
XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
Transform = FMA4::XMMatrixMultiply(Transform, Projection);
Transform = XMMatrixInverse(nullptr, Transform);
XMVECTOR Result = FMA4::XMVectorMultiplyAdd(V, Scale, Offset);
return FMA4::XMVector3TransformCoord(Result, Transform);
}
//-------------------------------------------------------------------------------------
// Vector4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector4Transform
(
FXMVECTOR V,
CXMMATRIX M
)
{
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
vResult = _mm_mul_ps( vResult, M.r[3] );
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
vResult = _mm_macc_ps( vTemp, M.r[2], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
return vResult;
}
//-------------------------------------------------------------------------------------
// Matrix
//-------------------------------------------------------------------------------------
inline XMMATRIX XM_CALLCONV XMMatrixMultiply
(
CXMMATRIX M1,
CXMMATRIX M2
)
{
XMMATRIX mResult;
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[0] = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[1] = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[2] = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
mResult.r[3] = vX;
return mResult;
}
inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
(
FXMMATRIX M1,
CXMMATRIX M2
)
{
// Use vW to hold the original row
XMVECTOR vW = M1.r[0];
// Splat the component X,Y,Z then W
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
// Perform the operation on the first row
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r0 = vX;
// Repeat for the other 3 rows
vW = M1.r[1];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r1 = vX;
vW = M1.r[2];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r2 = vX;
vW = M1.r[3];
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
vX = _mm_mul_ps(vX,M2.r[0]);
vX = _mm_macc_ps(vY,M2.r[1],vX);
vX = _mm_macc_ps(vZ,M2.r[2],vX);
vX = _mm_macc_ps(vW,M2.r[3],vX);
__m128 r3 = vX;
// x.x,x.y,y.x,y.y
XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
// x.z,x.w,y.z,y.w
XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
// z.x,z.y,w.x,w.y
XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
// z.z,z.w,w.z,w.w
XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
XMMATRIX mResult;
// x.x,y.x,z.x,w.x
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
// x.y,y.y,z.y,w.y
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
// x.z,y.z,z.z,w.z
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
// x.w,y.w,z.w,w.w
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
return mResult;
}
} // namespace FMA4
} // namespace DirectX;

View File

@ -0,0 +1,111 @@
//-------------------------------------------------------------------------------------
// DirectXMathSSE3.h -- SSE3 extensions for SIMD C++ Math library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
#error SSE3 not supported on ARM platform
#endif
#include <pmmintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
namespace SSE3
{
inline bool XMVerifySSE3Support()
{
// Should return true on AMD Athlon 64, AMD Phenom, and Intel Pentium 4 or later processors
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = { -1 };
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 0);
#endif
if ( CPUInfo[0] < 1 )
return false;
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 1);
#endif
// We only check for SSE3 instruction set. SSSE3 instructions are not used.
return ( (CPUInfo[2] & 0x1) != 0 );
}
inline XMVECTOR XM_CALLCONV XMVector2Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_hadd_ps(vTemp,vTemp);
return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(0,0,0,0));
}
inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
{
return SSE3::XMVector2Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector3Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_and_ps( vTemp, g_XMMask3 );
vTemp = _mm_hadd_ps(vTemp,vTemp);
return _mm_hadd_ps(vTemp,vTemp);
}
inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
{
return SSE3::XMVector3Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector4Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_hadd_ps( vTemp, vTemp );
return _mm_hadd_ps( vTemp, vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
{
return SSE3::XMVector4Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle_0022( FXMVECTOR V )
{
return _mm_moveldup_ps(V);
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle_1133( FXMVECTOR V )
{
return _mm_movehdup_ps(V);
}
} // namespace SSE3
} // namespace DirectX

View File

@ -0,0 +1,417 @@
//-------------------------------------------------------------------------------------
// DirectXMathSSE4.h -- SSE4.1 extensions for SIMD C++ Math library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
#error SSE4 not supported on ARM platform
#endif
#include <smmintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
namespace SSE4
{
inline bool XMVerifySSE4Support()
{
// Should return true on AMD Bulldozer, Intel Core 2 ("Penryn"), and Intel Core i7 ("Nehalem") or later processors
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = { -1 };
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 0);
#endif
if ( CPUInfo[0] < 1 )
return false;
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
__cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 1);
#endif
// We only check for SSE4.1 instruction set. SSE4.2 instructions are not used.
return ( (CPUInfo[2] & 0x80000) == 0x80000 );
}
//-------------------------------------------------------------------------------------
// Vector
//-------------------------------------------------------------------------------------
#ifdef __clang__
#pragma clang diagnostic ignored "-Wundefined-reinterpret-cast"
#endif
inline void XM_CALLCONV XMVectorGetYPtr(_Out_ float *y, _In_ FXMVECTOR V)
{
assert( y != nullptr );
*reinterpret_cast<int*>(y) = _mm_extract_ps( V, 1 );
}
inline void XM_CALLCONV XMVectorGetZPtr(_Out_ float *z, _In_ FXMVECTOR V)
{
assert( z != nullptr );
*reinterpret_cast<int*>(z) = _mm_extract_ps( V, 2 );
}
inline void XM_CALLCONV XMVectorGetWPtr(_Out_ float *w, _In_ FXMVECTOR V)
{
assert( w != nullptr );
*reinterpret_cast<int*>(w) = _mm_extract_ps( V, 3 );
}
inline uint32_t XM_CALLCONV XMVectorGetIntY(FXMVECTOR V)
{
__m128i V1 = _mm_castps_si128( V );
return static_cast<uint32_t>( _mm_extract_epi32( V1, 1 ) );
}
inline uint32_t XM_CALLCONV XMVectorGetIntZ(FXMVECTOR V)
{
__m128i V1 = _mm_castps_si128( V );
return static_cast<uint32_t>( _mm_extract_epi32( V1, 2 ) );
}
inline uint32_t XM_CALLCONV XMVectorGetIntW(FXMVECTOR V)
{
__m128i V1 = _mm_castps_si128( V );
return static_cast<uint32_t>( _mm_extract_epi32( V1, 3 ) );
}
inline void XM_CALLCONV XMVectorGetIntYPtr(_Out_ uint32_t *y, _In_ FXMVECTOR V)
{
assert( y != nullptr );
__m128i V1 = _mm_castps_si128( V );
*y = static_cast<uint32_t>( _mm_extract_epi32( V1, 1 ) );
}
inline void XM_CALLCONV XMVectorGetIntZPtr(_Out_ uint32_t *z, _In_ FXMVECTOR V)
{
assert( z != nullptr );
__m128i V1 = _mm_castps_si128( V );
*z = static_cast<uint32_t>( _mm_extract_epi32( V1, 2 ) );
}
inline void XM_CALLCONV XMVectorGetIntWPtr(_Out_ uint32_t *w, _In_ FXMVECTOR V)
{
assert( w != nullptr );
__m128i V1 = _mm_castps_si128( V );
*w = static_cast<uint32_t>( _mm_extract_epi32( V1, 3 ) );
}
inline XMVECTOR XM_CALLCONV XMVectorSetY(FXMVECTOR V, float y)
{
XMVECTOR vResult = _mm_set_ss(y);
vResult = _mm_insert_ps( V, vResult, 0x10 );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVectorSetZ(FXMVECTOR V, float z)
{
XMVECTOR vResult = _mm_set_ss(z);
vResult = _mm_insert_ps( V, vResult, 0x20 );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVectorSetW(FXMVECTOR V, float w)
{
XMVECTOR vResult = _mm_set_ss(w);
vResult = _mm_insert_ps( V, vResult, 0x30 );
return vResult;
}
inline XMVECTOR XM_CALLCONV XMVectorSetIntY(FXMVECTOR V, uint32_t y)
{
__m128i vResult = _mm_castps_si128( V );
vResult = _mm_insert_epi32( vResult, static_cast<int>(y), 1 );
return _mm_castsi128_ps( vResult );
}
inline XMVECTOR XM_CALLCONV XMVectorSetIntZ(FXMVECTOR V, uint32_t z)
{
__m128i vResult = _mm_castps_si128( V );
vResult = _mm_insert_epi32( vResult, static_cast<int>(z), 2 );
return _mm_castsi128_ps( vResult );
}
inline XMVECTOR XM_CALLCONV XMVectorSetIntW(FXMVECTOR V, uint32_t w)
{
__m128i vResult = _mm_castps_si128( V );
vResult = _mm_insert_epi32( vResult, static_cast<int>(w), 3 );
return _mm_castsi128_ps( vResult );
}
inline XMVECTOR XM_CALLCONV XMVectorRound( FXMVECTOR V )
{
return _mm_round_ps( V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
}
inline XMVECTOR XM_CALLCONV XMVectorTruncate( FXMVECTOR V )
{
return _mm_round_ps( V, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC );
}
inline XMVECTOR XM_CALLCONV XMVectorFloor( FXMVECTOR V )
{
return _mm_floor_ps( V );
}
inline XMVECTOR XM_CALLCONV XMVectorCeiling( FXMVECTOR V )
{
return _mm_ceil_ps( V );
}
//-------------------------------------------------------------------------------------
// Vector2
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector2Dot( FXMVECTOR V1, FXMVECTOR V2 )
{
return _mm_dp_ps( V1, V2, 0x3f );
}
inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
{
return SSE4::XMVector2Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
return _mm_rsqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLength( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
return _mm_div_ps( g_XMOne, vLengthSq );
}
inline XMVECTOR XM_CALLCONV XMVector2LengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector2Length( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector2NormalizeEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, V);
}
inline XMVECTOR XM_CALLCONV XMVector2Normalize( FXMVECTOR V )
{
XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x3f );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Create zero with a single instruction
XMVECTOR vZeroMask = _mm_setzero_ps();
// Test for a divide by zero (Must be FP to detect -0.0)
vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Reciprocal mul to perform the normalization
vResult = _mm_div_ps(V,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vZeroMask);
// Select qnan or result based on infinite length
XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
vResult = _mm_or_ps(vTemp1,vTemp2);
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector3
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector3Dot( FXMVECTOR V1, FXMVECTOR V2 )
{
return _mm_dp_ps( V1, V2, 0x7f );
}
inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
{
return SSE4::XMVector3Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
return _mm_rsqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLength( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
return _mm_div_ps( g_XMOne, vLengthSq );
}
inline XMVECTOR XM_CALLCONV XMVector3LengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector3Length( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector3NormalizeEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, V);
}
inline XMVECTOR XM_CALLCONV XMVector3Normalize( FXMVECTOR V )
{
XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x7f );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Create zero with a single instruction
XMVECTOR vZeroMask = _mm_setzero_ps();
// Test for a divide by zero (Must be FP to detect -0.0)
vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Divide to perform the normalization
vResult = _mm_div_ps(V,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vZeroMask);
// Select qnan or result based on infinite length
XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
vResult = _mm_or_ps(vTemp1,vTemp2);
return vResult;
}
//-------------------------------------------------------------------------------------
// Vector4
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMVector4Dot( FXMVECTOR V1, FXMVECTOR V2 )
{
return _mm_dp_ps( V1, V2, 0xff );
}
inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
{
return SSE4::XMVector4Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
return _mm_rsqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLength( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
return _mm_div_ps( g_XMOne, vLengthSq );
}
inline XMVECTOR XM_CALLCONV XMVector4LengthEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4Length( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
return _mm_sqrt_ps( vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4NormalizeEst( FXMVECTOR V )
{
XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, V);
}
inline XMVECTOR XM_CALLCONV XMVector4Normalize( FXMVECTOR V )
{
XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0xff );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Create zero with a single instruction
XMVECTOR vZeroMask = _mm_setzero_ps();
// Test for a divide by zero (Must be FP to detect -0.0)
vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Divide to perform the normalization
vResult = _mm_div_ps(V,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vZeroMask);
// Select qnan or result based on infinite length
XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
vResult = _mm_or_ps(vTemp1,vTemp2);
return vResult;
}
//-------------------------------------------------------------------------------------
// Plane
//-------------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst( FXMVECTOR P )
{
XMVECTOR vTemp = _mm_dp_ps( P, P, 0x7f );
XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
return _mm_mul_ps(vResult, P);
}
inline XMVECTOR XM_CALLCONV XMPlaneNormalize( FXMVECTOR P )
{
XMVECTOR vLengthSq = _mm_dp_ps( P, P, 0x7f );
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
// Reciprocal mul to perform the normalization
vResult = _mm_div_ps(P,vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult,vLengthSq);
return vResult;
}
} // namespace SSE4
} // namespace DirectX

208
vendor/directxmath-3.19.0/HISTORY.md vendored Normal file
View File

@ -0,0 +1,208 @@
# DirectXMath
https://github.com/Microsoft/DirectXMath
Release available for download on [GitHub](https://github.com/microsoft/DirectXMath/releases)
## Release History
### February 2024 (3.19)
* Fix to address MinGW issue with ``__cpuid`` in cpuid.h vs. intrin.h
* Additional updates for clang/LLVM and GNUC
* Minor comment updates
### December 2023 (3.18b)
* Hot-fix to address ``-Wunsafe-buffer-usage`` warnings from clang v16
* Hot-fix to address MinGW issue with ``__cpuid`` in cpuid.h vs. intrin.h
* CMake project updates including pkg-config file generation
### December 2022 (3.18)
* C++20 spaceship operators for XMFLOAT2, XMFLOAT3, etc. when building with ``/std:c++20 /Zc:_cplusplus``
* Improved conformance for ARM64 when using `/Zc:arm64-aliased-neon-types-`
* Minor code review
* CMake project updated to require 3.20 or later
* Added Azure Dev Ops Pipeline YAML files
### May 2022 (3.17b)
* Hot-fix to address ``-Wreserved-identifier`` warnings with clang v13
* C++20 spaceship operators for XMFLOAT2, XMFLOAT3, etc. when building with ``/std:c++20 /Zc:_cplusplus``
* Minor CMake project update
### January 2022 (3.17)
* Added ColorsLinear namespace to DirectXColors.h with linear versions of .NET colors
* Optimized the ``XMMatrixRotationRollPitchYaw(FromVector)`` functions
* Fixed overread problem for 16bpp GPU types Load functions:
* ``XMUNIBBLE4``, ``XMU555``, ``XMU565``, ``XMBYTEN2``, ``XMBYTE2``, ``XMUBYTEN2``, ``XMUBYTE2``
* ``XM_CACHE_LINE_SIZE`` updated for ARM/ARM64 targets to 128 bytes
* A few comments added to improve IntelliSense experience
* Conformance improvements for GNU compiler
* Minor code cleanup
### January 2021 (3.16b)
* Hot-fixes to resolve build breaks for clang/LLVM and GCC on ARM64
* ``XM_ALIGNED_DATA`` and ``XM_ALIGNED_STRUCT`` macros updated to use C++17 ``alignas`` when available
### December 2020 (3.16)
* Added ``XMVectorLog10`` / ``XMVectorExp10``
* Added ``XMColorRGBToYUV_UHD`` / ``XMColorYUVToRGB_UHD`` for Rec. 2020 YUV
* Added optional ``rhcoords`` parameter for BoundingFrustum ``CreateFromMatrix``
* Added use of Intel&reg; Short Vector Matrix Library (SVML) supported by VS 2019
* Opt-in with ``_XM_SVML_INTRINSICS_``; opt-out with ``_XM_DISABLE_INTEL_SVML_``
* Fixed denorm handling for ``XMConvertFloatToHalf``
* Fixed flush (too small for denorm) handling for ``XMStoreFloat3PK``
* Fixed clamping bug in ``XMStoreByteN4``
* Cleaned up ARM-NEON intrinsics type issues for improved portability on GNUC
* Fixed ``GXMVECTOR`` for x86 ``__vectorcall``
* Code review
### April 2020 (3.15)
* Added ``XMMatrixVectorTensorProduct`` for creating a matrix from two vectors
* Use of m256 registers and FMA3 with ``/arch:AVX2`` for stream and some matrix functions
* Optimized load/stores for SSE2 float2 & float3 functions
* Optimized some instruction choices for better AMD CPU support
* Improved conformance for clang/LLVM, GCC, and MinGW compilers
* Code review (``constexpr`` / ``noexcept`` usage)
* Retired VS 2015 support
### August 2019 (3.14)
* Added float control around IsNan functions to resolve issue with VS 2019 with ``/fp:fast``
* XMVerifyCPUSupport updated for clang/LLVM cpuid implementation on x86/x64
* Added support for clang/LLVM built-in platform defines as well as the MSVC ones
* Cleaned up ARM-NEON intrinsics type issues for improved portability
* Removed unneeded malloc.h include in DirectXMath.h
* Whitespace cleanup
### July 2018 (3.13)
* ``XMFLOAT3X4``, ``XMFLOAT3X4A``, and associated Load/Store functions
* Move/copy constructors and assignment operators for C++ types
* Minor fix for XMVectorClamp behavior with NaN
* Fixed compilation warnings with VS 2017 (15.7 update), Intel C++ 18.0 compiler, and clang 6
* Retired VS 2013 support
* Minor code cleanup
### February 2018 (3.12)
* ARM64 use of fused multiply-accumulate intriniscs
* Conformance fix for XMConvertFloatToHalf
* Minor code cleanup
### June 2017 (3.11)
* AVX optimization of XMMatrixMultiply and XMMatrixMultiplyTranspose
* AVX2 optimization for XMVectorSplatX
* FMA3 optimization of XMVectorMultiplyAdd and XMVectorNegativeMultiplySubtract (implied by /arch:AVX2)
* Conformance fixes to support compilation with Clang 3.7
### January 2017 (3.10)
* Added XMVectorSum for horizontal adds
* ARMv8 intrinsics use for ARM64 platform (division, rounding, half-precision conversion)
* Added SSE3 codepaths using opt-in ``_XM_SSE3_INTRINSICS_``
* XMVectorRound fix for no-intrinsics to match round to nearest (even)
* XMStoreFloat3SE fix when max channel isn't a perfect power of 2
* constexpr conformance fix and workaround for compiler bug in VS 2015 RTM
* Remove support for VS 2012 compilers
* Remove ``__vector4i`` deprecated type
### June 2016 (3.09)
* Includes support for additional optimizations when built with /arch:AVX or /arch:AVX2
* Added use of constexpr for type constructors, XMConvertToRadians, and XMConvertToDegrees
* Marked ``__vector4i``, ``XMXDEC4``, ``XMDECN4``, ``XMDEC4``, and associated Load & Store functions as deprecated.
+ These are vestiges of Xbox 360 support and will be removed in a future release
* Renamed parameter in XMMatrixPerspectiveFov* to reduce user confusion when relying on IntelliSense
* XMU565, XMUNIBBLE4 constructors take uint8_t instead of int8_t
### May 2016
* DirectXMath 3.08 released under the MIT license
### November 2015 (3.08)
* Added use of ``_mm_sfence`` for Stream methods
* Fixed bug with non-uniform scaling transforms for BoundingOrientedBox
* Added asserts for Near/FarZ in XMMatrix* methods
* Added use of ``=default`` for PODs with VS 2013/2015
* Additional SSE and ARM-NEON optimizations for PackedVector functions
### April 2015 (3.07)
* Fix customer reported bugs in BoundingBox methods
* Fix customer reported bug in XMStoreFloat3SE
* Fix customer reported bug in XMVectorATan2, XMVectorATan2Est
* Fix customer reported bug in XMVectorRound
### October 2013 (3.06)
* Fixed load/store of XMFLOAT3SE to properly match the ``DXGI_FORMAT_R9G9B9E5_SHAREDEXP``
* Added ``XMLoadUDecN4_XR`` and ``XMStoreUDecN4_XR`` to match ``DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM``
* Added ``XMColorRGBToSRGB`` and ``XMColorSRGBToRGB`` to convert linear RGB <-> sRGB
### July 2013 (3.05)
* Use x86/x64 ``__vectorcall`` calling-convention when available (``XM_CALLCONV``, ``HXMVECTOR``, ``FXMMATRIX`` introduced)
* Fixed bug with XMVectorFloor and XMVectorCeiling when given whole odd numbers (i.e. 105.0)
* Improved XMVectorRound algorithm
* ARM-NEON optimizations for XMVectorExp2, XMVectorLog2, XMVectorExpE, and XMVectorLogE
* ARM-NEON code paths use multiply-by-scalar intrinsics when supported
* Additional optimizations for ARM-NEON Stream functions
* Fixed potential warning C4723 using ``operator/`` or ``operator/=``
### March 2013 (3.04)
* ``XMVectorExp2``, ``XMVectorLog2``, ``XMVectorExpE``, and ``XMVectorLogE`` functions added to provide base-e support in addition to the existing base-2 support
* ``XMVectorExp`` and ``XMVectorLog`` are now aliases for XMVectorExp2 and XMVectorLog2
* Additional optimizations for Stream functions
* XMVector3Cross now ensures w component is zero on ARM
* XMConvertHalfToFloat and XMConvertFloatToHalf now use IEEE 754 standard float16 behavior for INF/QNAN
* Updated matrix version Transform for BoundingOrientedBox and BoundingFrustum to handle scaling
### March 2012 (3.03)
* *breaking change* Removed union members from XMMATRIX type to make it a fully 'opaque' type
* Marked single-parameter C++ constructors for XMFLOAT2, XMFLOAT2A, XMFLOAT3, XMFLOAT3A, XMFLOAT4, and XMFLOAT4A explicit
### February 2012 (3.02)
* ARM-NEON intrinsics (selected by default for the ARM platform)
* Reworked XMVectorPermute, change of ``XM_PERMUTE_`` defines, removal of XMVectorPermuteControl
* Addition of ``XM_SWIZZLE_`` defines
* Optimizations for transcendental functions
* Template forms for permute, swizzle, shift-left, rotate-left, rotation-right, and insert
* Removal of deprecated types and functions
+ ``XM_CACHE_LINE_SIZE`` define, XMVectorExpEst, XMVectorLogEst, XMVectorPowEst, XMVectorSinHEs, XMVectorCosHEst, XMVectorTanHEst, XMVector2InBoundsR, XMVector3InBoundsR, XMVector4InBoundsR
* Removed ``XM_STRICT_VECTOR4``; XMVECTOR in NO-INTRINSICS always defined without .x, .y, .z, .w, .v, or .u
* Additional bounding types
* SAL fixes and improvements
### September 2011 (3.00)
* Renamed and reorganized the headers
* Introduced C++ namespaces
* Removed the Xbox 360-specific GPU types
+ HENDN3, XMHEND3, XMUHENDN3, XMUHEND3, XMDHENN3, XMDHEN3, XMUDHENN3, XMUDHEN3, XMXICON4, XMXICO4, XMICON4, XMICO4, XMUICON4, XMUICO4
### July 2012 (XNAMath 2.05)
* Template forms have been added for `XMVectorPermute`, `XMVectorSwizzle`, `XMVectorShiftLeft`, `XMVectorRotateLeft`, `XMVectorRotateRight`, and `XMVectorInsert`
* The `XM_STRICT_XMMATRIX` compilation define has been added for opaque `XMMATRIX`.
* Stream stride and count arguments have been changed to `size_t`
* The ``pDeterminant`` parameter of `XMMatrixInverse` is now optional
* Additional operator= overloads for `XMBYTEN4`, `XMBYTE4`, `XMUBYTEN4`, and `XMUBYTE4` types are now available
### February 2011 (XNAMath 2.04)
* Addition of new data types and associated load-store functions:
+ `XMBYTEN2, XMBYTE2, XMUBYTEN2, XMUBYTE2`
+ `XMLoadByteN2, XMLoadByte2, XMLoadUByteN2, XMLoadUByte2`
+ `XMStoreByteN2, XMStoreByte2, XMStoreUByteN2, XMStoreUByte2`
+ `XMINT2, XMUINT2, XMINT3, XMUINT3, XMINT4, XMUINT4`
+ `XMLoadSInt2, XMLoadUInt2, XMLoadSInt3, XMLoadUInt3, XMLoadSInt4, XMLoadUInt4`
+ `XMStoreSInt2, XMStoreUInt2, XMStoreSInt3, XMStoreUInt3, XMStoreSInt4, XMStoreUInt4`
* Marked most single-parameter C++ constructors with `explicit` keyword
* Corrected range issues with SSE implementations of `XMVectorFloor` and `XMVectorCeiling`
### June 2010 (XNAMath 2.03)
* Addition of ``XMVectorDivide`` to optimize SSE2 vector division operations
* Unified handling of floating-point specials between the Windows SSE2 and no-intrinsics implementations
* Use of Visual Studio style SAL annotations
* Modifications to the C++ declarations for `XMFLOAT2A/3A/4A/4X3A/4X4A` to better support these types in C++ templates
### February 2010 (XNAMath 2.02)
* Fixes to `XMStoreColor`, `XMQuaternionRotationMatrix`, `XMVectorATan2`, and `XMVectorATan2Est`
### August 2009 (XNAMath 2.01)
* Adds ``XM_STRICT_VECTOR4``. This opt-in directive disallows the usage of XboxMath-like member accessors such as .x, .y, and .z. This makes it easier to write portable XNA Math code.
* Added conversion support for the following Windows graphics formats:
+ 16-bit color formats (565, 555X, 5551)
+ 4-bits per channel color formats (4444)
+ Unique Direct3D 10/11 formats (``DXGI_FORMAT_R9G9B9E5_SHAREDEXP`` and ``DXGI_FORMAT_R11G11B10_FLOAT``)
### March 2009 (XNAMath 2.00)
* Initial release (based on the Xbox 360 Xbox math library)

View File

@ -0,0 +1,370 @@
//-------------------------------------------------------------------------------------
// DirectXCollision.h -- C++ Collision Math library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#include "DirectXMath.h"
namespace DirectX
{
enum ContainmentType
{
DISJOINT = 0,
INTERSECTS = 1,
CONTAINS = 2
};
enum PlaneIntersectionType
{
FRONT = 0,
INTERSECTING = 1,
BACK = 2
};
struct BoundingBox;
struct BoundingOrientedBox;
struct BoundingFrustum;
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable:4324 4820)
// C4324: alignment padding warnings
// C4820: Off by default noise
#endif
//-------------------------------------------------------------------------------------
// Bounding sphere
//-------------------------------------------------------------------------------------
struct BoundingSphere
{
XMFLOAT3 Center; // Center of the sphere.
float Radius; // Radius of the sphere.
// Creators
BoundingSphere() noexcept : Center(0, 0, 0), Radius(1.f) {}
BoundingSphere(const BoundingSphere&) = default;
BoundingSphere& operator=(const BoundingSphere&) = default;
BoundingSphere(BoundingSphere&&) = default;
BoundingSphere& operator=(BoundingSphere&&) = default;
constexpr BoundingSphere(_In_ const XMFLOAT3& center, _In_ float radius) noexcept
: Center(center), Radius(radius) {}
// Methods
void XM_CALLCONV Transform(_Out_ BoundingSphere& Out, _In_ FXMMATRIX M) const noexcept;
void XM_CALLCONV Transform(_Out_ BoundingSphere& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
// Transform the sphere
ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept;
ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept;
ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
bool Intersects(_In_ const BoundingBox& box) const noexcept;
bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
// Triangle-sphere test
PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept;
// Plane-sphere test
bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
// Ray-sphere test
ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
// Test sphere against six planes (see BoundingFrustum::GetPlanes)
// Static methods
static void CreateMerged(_Out_ BoundingSphere& Out, _In_ const BoundingSphere& S1, _In_ const BoundingSphere& S2) noexcept;
static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingBox& box) noexcept;
static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingOrientedBox& box) noexcept;
static void CreateFromPoints(_Out_ BoundingSphere& Out, _In_ size_t Count,
_In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept;
static void CreateFromFrustum(_Out_ BoundingSphere& Out, _In_ const BoundingFrustum& fr) noexcept;
};
//-------------------------------------------------------------------------------------
// Axis-aligned bounding box
//-------------------------------------------------------------------------------------
struct BoundingBox
{
static constexpr size_t CORNER_COUNT = 8;
XMFLOAT3 Center; // Center of the box.
XMFLOAT3 Extents; // Distance from the center to each side.
// Creators
BoundingBox() noexcept : Center(0, 0, 0), Extents(1.f, 1.f, 1.f) {}
BoundingBox(const BoundingBox&) = default;
BoundingBox& operator=(const BoundingBox&) = default;
BoundingBox(BoundingBox&&) = default;
BoundingBox& operator=(BoundingBox&&) = default;
constexpr BoundingBox(_In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents) noexcept
: Center(center), Extents(extents) {}
// Methods
void XM_CALLCONV Transform(_Out_ BoundingBox& Out, _In_ FXMMATRIX M) const noexcept;
void XM_CALLCONV Transform(_Out_ BoundingBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept;
// Gets the 8 corners of the box
ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept;
ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept;
ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
bool Intersects(_In_ const BoundingBox& box) const noexcept;
bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
// Triangle-Box test
PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept;
// Plane-box test
bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
// Ray-Box test
ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
// Test box against six planes (see BoundingFrustum::GetPlanes)
// Static methods
static void CreateMerged(_Out_ BoundingBox& Out, _In_ const BoundingBox& b1, _In_ const BoundingBox& b2) noexcept;
static void CreateFromSphere(_Out_ BoundingBox& Out, _In_ const BoundingSphere& sh) noexcept;
static void XM_CALLCONV CreateFromPoints(_Out_ BoundingBox& Out, _In_ FXMVECTOR pt1, _In_ FXMVECTOR pt2) noexcept;
static void CreateFromPoints(_Out_ BoundingBox& Out, _In_ size_t Count,
_In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept;
};
//-------------------------------------------------------------------------------------
// Oriented bounding box
//-------------------------------------------------------------------------------------
struct BoundingOrientedBox
{
static constexpr size_t CORNER_COUNT = 8;
XMFLOAT3 Center; // Center of the box.
XMFLOAT3 Extents; // Distance from the center to each side.
XMFLOAT4 Orientation; // Unit quaternion representing rotation (box -> world).
// Creators
BoundingOrientedBox() noexcept : Center(0, 0, 0), Extents(1.f, 1.f, 1.f), Orientation(0, 0, 0, 1.f) {}
BoundingOrientedBox(const BoundingOrientedBox&) = default;
BoundingOrientedBox& operator=(const BoundingOrientedBox&) = default;
BoundingOrientedBox(BoundingOrientedBox&&) = default;
BoundingOrientedBox& operator=(BoundingOrientedBox&&) = default;
constexpr BoundingOrientedBox(_In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents, _In_ const XMFLOAT4& orientation) noexcept
: Center(center), Extents(extents), Orientation(orientation) {}
// Methods
void XM_CALLCONV Transform(_Out_ BoundingOrientedBox& Out, _In_ FXMMATRIX M) const noexcept;
void XM_CALLCONV Transform(_Out_ BoundingOrientedBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept;
// Gets the 8 corners of the box
ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept;
ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept;
ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
bool Intersects(_In_ const BoundingBox& box) const noexcept;
bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
// Triangle-OrientedBox test
PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept;
// Plane-OrientedBox test
bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
// Ray-OrientedBox test
ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
// Test OrientedBox against six planes (see BoundingFrustum::GetPlanes)
// Static methods
static void CreateFromBoundingBox(_Out_ BoundingOrientedBox& Out, _In_ const BoundingBox& box) noexcept;
static void CreateFromPoints(_Out_ BoundingOrientedBox& Out, _In_ size_t Count,
_In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept;
};
//-------------------------------------------------------------------------------------
// Bounding frustum
//-------------------------------------------------------------------------------------
struct BoundingFrustum
{
static constexpr size_t CORNER_COUNT = 8;
XMFLOAT3 Origin; // Origin of the frustum (and projection).
XMFLOAT4 Orientation; // Quaternion representing rotation.
float RightSlope; // Positive X (X/Z)
float LeftSlope; // Negative X
float TopSlope; // Positive Y (Y/Z)
float BottomSlope; // Negative Y
float Near, Far; // Z of the near plane and far plane.
// Creators
BoundingFrustum() noexcept :
Origin(0, 0, 0), Orientation(0, 0, 0, 1.f), RightSlope(1.f), LeftSlope(-1.f),
TopSlope(1.f), BottomSlope(-1.f), Near(0), Far(1.f) {}
BoundingFrustum(const BoundingFrustum&) = default;
BoundingFrustum& operator=(const BoundingFrustum&) = default;
BoundingFrustum(BoundingFrustum&&) = default;
BoundingFrustum& operator=(BoundingFrustum&&) = default;
constexpr BoundingFrustum(_In_ const XMFLOAT3& origin, _In_ const XMFLOAT4& orientation,
_In_ float rightSlope, _In_ float leftSlope, _In_ float topSlope, _In_ float bottomSlope,
_In_ float nearPlane, _In_ float farPlane) noexcept
: Origin(origin), Orientation(orientation),
RightSlope(rightSlope), LeftSlope(leftSlope), TopSlope(topSlope), BottomSlope(bottomSlope),
Near(nearPlane), Far(farPlane) {}
BoundingFrustum(_In_ CXMMATRIX Projection, bool rhcoords = false) noexcept;
// Methods
void XM_CALLCONV Transform(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX M) const noexcept;
void XM_CALLCONV Transform(_Out_ BoundingFrustum& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept;
// Gets the 8 corners of the frustum
ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept;
ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
ContainmentType Contains(_In_ const BoundingSphere& sp) const noexcept;
ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
// Frustum-Frustum test
bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
bool Intersects(_In_ const BoundingBox& box) const noexcept;
bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
// Triangle-Frustum test
PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept;
// Plane-Frustum test
bool XM_CALLCONV Intersects(_In_ FXMVECTOR rayOrigin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
// Ray-Frustum test
ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
_In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
// Test frustum against six planes (see BoundingFrustum::GetPlanes)
void GetPlanes(_Out_opt_ XMVECTOR* NearPlane, _Out_opt_ XMVECTOR* FarPlane, _Out_opt_ XMVECTOR* RightPlane,
_Out_opt_ XMVECTOR* LeftPlane, _Out_opt_ XMVECTOR* TopPlane, _Out_opt_ XMVECTOR* BottomPlane) const noexcept;
// Create 6 Planes representation of Frustum
// Static methods
static void XM_CALLCONV CreateFromMatrix(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX Projection, bool rhcoords = false) noexcept;
};
//-----------------------------------------------------------------------------
// Triangle intersection testing routines.
//-----------------------------------------------------------------------------
namespace TriangleTests
{
bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _In_ FXMVECTOR V0, _In_ GXMVECTOR V1, _In_ HXMVECTOR V2, _Out_ float& Dist) noexcept;
// Ray-Triangle
bool XM_CALLCONV Intersects(_In_ FXMVECTOR A0, _In_ FXMVECTOR A1, _In_ FXMVECTOR A2, _In_ GXMVECTOR B0, _In_ HXMVECTOR B1, _In_ HXMVECTOR B2) noexcept;
// Triangle-Triangle
PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, _In_ GXMVECTOR Plane) noexcept;
// Plane-Triangle
ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2,
_In_ GXMVECTOR Plane0, _In_ HXMVECTOR Plane1, _In_ HXMVECTOR Plane2,
_In_ CXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5) noexcept;
// Test a triangle against six planes at once (see BoundingFrustum::GetPlanes)
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
/****************************************************************************
*
* Implementation
*
****************************************************************************/
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4068 4365 4616 6001)
// C4068/4616: ignore unknown pragmas
// C4365: Off by default noise
// C6001: False positives
#endif
#ifdef _PREFAST_
#pragma prefast(push)
#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
#pragma prefast(disable : 26495, "Union initialization confuses /analyze")
#endif
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wfloat-equal"
#pragma clang diagnostic ignored "-Wunknown-warning-option"
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
#endif
#include "DirectXCollision.inl"
#ifdef __clang__
#pragma clang diagnostic pop
#endif
#ifdef _PREFAST_
#pragma prefast(pop)
#endif
#ifdef _MSC_VER
#pragma warning(pop)
#endif
} // namespace DirectX

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,312 @@
//-------------------------------------------------------------------------------------
// DirectXColors.h -- C++ Color Math library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#include "DirectXMath.h"
namespace DirectX
{
namespace Colors
{
// Standard colors (Red/Green/Blue/Alpha) in sRGB colorspace
XMGLOBALCONST XMVECTORF32 AliceBlue = { { { 0.941176534f, 0.972549081f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 AntiqueWhite = { { { 0.980392218f, 0.921568692f, 0.843137324f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Aqua = { { { 0.f, 1.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Aquamarine = { { { 0.498039246f, 1.f, 0.831372619f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Azure = { { { 0.941176534f, 1.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Beige = { { { 0.960784376f, 0.960784376f, 0.862745166f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Bisque = { { { 1.f, 0.894117713f, 0.768627524f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Black = { { { 0.f, 0.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 BlanchedAlmond = { { { 1.f, 0.921568692f, 0.803921640f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Blue = { { { 0.f, 0.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 BlueViolet = { { { 0.541176498f, 0.168627456f, 0.886274576f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Brown = { { { 0.647058845f, 0.164705887f, 0.164705887f, 1.f } } };
XMGLOBALCONST XMVECTORF32 BurlyWood = { { { 0.870588303f, 0.721568644f, 0.529411793f, 1.f } } };
XMGLOBALCONST XMVECTORF32 CadetBlue = { { { 0.372549027f, 0.619607866f, 0.627451003f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Chartreuse = { { { 0.498039246f, 1.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Chocolate = { { { 0.823529482f, 0.411764741f, 0.117647067f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Coral = { { { 1.f, 0.498039246f, 0.313725501f, 1.f } } };
XMGLOBALCONST XMVECTORF32 CornflowerBlue = { { { 0.392156899f, 0.584313750f, 0.929411829f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Cornsilk = { { { 1.f, 0.972549081f, 0.862745166f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Crimson = { { { 0.862745166f, 0.078431375f, 0.235294133f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Cyan = { { { 0.f, 1.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkBlue = { { { 0.f, 0.f, 0.545098066f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkCyan = { { { 0.f, 0.545098066f, 0.545098066f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkGoldenrod = { { { 0.721568644f, 0.525490224f, 0.043137256f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkGray = { { { 0.662745118f, 0.662745118f, 0.662745118f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkGreen = { { { 0.f, 0.392156899f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkKhaki = { { { 0.741176486f, 0.717647076f, 0.419607878f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkMagenta = { { { 0.545098066f, 0.f, 0.545098066f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkOliveGreen = { { { 0.333333343f, 0.419607878f, 0.184313729f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkOrange = { { { 1.f, 0.549019635f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkOrchid = { { { 0.600000024f, 0.196078449f, 0.800000072f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkRed = { { { 0.545098066f, 0.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkSalmon = { { { 0.913725555f, 0.588235319f, 0.478431404f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkSeaGreen = { { { 0.560784340f, 0.737254918f, 0.545098066f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkSlateBlue = { { { 0.282352954f, 0.239215702f, 0.545098066f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkSlateGray = { { { 0.184313729f, 0.309803933f, 0.309803933f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkTurquoise = { { { 0.f, 0.807843208f, 0.819607913f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkViolet = { { { 0.580392182f, 0.f, 0.827451050f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DeepPink = { { { 1.f, 0.078431375f, 0.576470613f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DeepSkyBlue = { { { 0.f, 0.749019623f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DimGray = { { { 0.411764741f, 0.411764741f, 0.411764741f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DodgerBlue = { { { 0.117647067f, 0.564705908f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Firebrick = { { { 0.698039234f, 0.133333340f, 0.133333340f, 1.f } } };
XMGLOBALCONST XMVECTORF32 FloralWhite = { { { 1.f, 0.980392218f, 0.941176534f, 1.f } } };
XMGLOBALCONST XMVECTORF32 ForestGreen = { { { 0.133333340f, 0.545098066f, 0.133333340f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Fuchsia = { { { 1.f, 0.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Gainsboro = { { { 0.862745166f, 0.862745166f, 0.862745166f, 1.f } } };
XMGLOBALCONST XMVECTORF32 GhostWhite = { { { 0.972549081f, 0.972549081f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Gold = { { { 1.f, 0.843137324f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Goldenrod = { { { 0.854902029f, 0.647058845f, 0.125490203f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Gray = { { { 0.501960814f, 0.501960814f, 0.501960814f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Green = { { { 0.f, 0.501960814f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 GreenYellow = { { { 0.678431392f, 1.f, 0.184313729f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Honeydew = { { { 0.941176534f, 1.f, 0.941176534f, 1.f } } };
XMGLOBALCONST XMVECTORF32 HotPink = { { { 1.f, 0.411764741f, 0.705882370f, 1.f } } };
XMGLOBALCONST XMVECTORF32 IndianRed = { { { 0.803921640f, 0.360784322f, 0.360784322f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Indigo = { { { 0.294117659f, 0.f, 0.509803951f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Ivory = { { { 1.f, 1.f, 0.941176534f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Khaki = { { { 0.941176534f, 0.901960850f, 0.549019635f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Lavender = { { { 0.901960850f, 0.901960850f, 0.980392218f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LavenderBlush = { { { 1.f, 0.941176534f, 0.960784376f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LawnGreen = { { { 0.486274540f, 0.988235354f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LemonChiffon = { { { 1.f, 0.980392218f, 0.803921640f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightBlue = { { { 0.678431392f, 0.847058892f, 0.901960850f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightCoral = { { { 0.941176534f, 0.501960814f, 0.501960814f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightCyan = { { { 0.878431439f, 1.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = { { { 0.980392218f, 0.980392218f, 0.823529482f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightGray = { { { 0.827451050f, 0.827451050f, 0.827451050f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightGreen = { { { 0.564705908f, 0.933333397f, 0.564705908f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightPink = { { { 1.f, 0.713725507f, 0.756862819f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightSalmon = { { { 1.f, 0.627451003f, 0.478431404f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightSeaGreen = { { { 0.125490203f, 0.698039234f, 0.666666687f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightSkyBlue = { { { 0.529411793f, 0.807843208f, 0.980392218f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightSlateGray = { { { 0.466666698f, 0.533333361f, 0.600000024f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightSteelBlue = { { { 0.690196097f, 0.768627524f, 0.870588303f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightYellow = { { { 1.f, 1.f, 0.878431439f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Lime = { { { 0.f, 1.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LimeGreen = { { { 0.196078449f, 0.803921640f, 0.196078449f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Linen = { { { 0.980392218f, 0.941176534f, 0.901960850f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Magenta = { { { 1.f, 0.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Maroon = { { { 0.501960814f, 0.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumAquamarine = { { { 0.400000036f, 0.803921640f, 0.666666687f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumBlue = { { { 0.f, 0.f, 0.803921640f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumOrchid = { { { 0.729411781f, 0.333333343f, 0.827451050f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumPurple = { { { 0.576470613f, 0.439215720f, 0.858823597f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumSeaGreen = { { { 0.235294133f, 0.701960802f, 0.443137288f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumSlateBlue = { { { 0.482352972f, 0.407843173f, 0.933333397f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumSpringGreen = { { { 0.f, 0.980392218f, 0.603921592f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumTurquoise = { { { 0.282352954f, 0.819607913f, 0.800000072f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumVioletRed = { { { 0.780392230f, 0.082352944f, 0.521568656f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MidnightBlue = { { { 0.098039225f, 0.098039225f, 0.439215720f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MintCream = { { { 0.960784376f, 1.f, 0.980392218f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MistyRose = { { { 1.f, 0.894117713f, 0.882353008f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Moccasin = { { { 1.f, 0.894117713f, 0.709803939f, 1.f } } };
XMGLOBALCONST XMVECTORF32 NavajoWhite = { { { 1.f, 0.870588303f, 0.678431392f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Navy = { { { 0.f, 0.f, 0.501960814f, 1.f } } };
XMGLOBALCONST XMVECTORF32 OldLace = { { { 0.992156923f, 0.960784376f, 0.901960850f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Olive = { { { 0.501960814f, 0.501960814f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 OliveDrab = { { { 0.419607878f, 0.556862772f, 0.137254909f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Orange = { { { 1.f, 0.647058845f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 OrangeRed = { { { 1.f, 0.270588249f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Orchid = { { { 0.854902029f, 0.439215720f, 0.839215755f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PaleGoldenrod = { { { 0.933333397f, 0.909803987f, 0.666666687f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PaleGreen = { { { 0.596078455f, 0.984313786f, 0.596078455f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PaleTurquoise = { { { 0.686274529f, 0.933333397f, 0.933333397f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PaleVioletRed = { { { 0.858823597f, 0.439215720f, 0.576470613f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PapayaWhip = { { { 1.f, 0.937254965f, 0.835294187f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PeachPuff = { { { 1.f, 0.854902029f, 0.725490212f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Peru = { { { 0.803921640f, 0.521568656f, 0.247058839f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Pink = { { { 1.f, 0.752941251f, 0.796078503f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Plum = { { { 0.866666734f, 0.627451003f, 0.866666734f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PowderBlue = { { { 0.690196097f, 0.878431439f, 0.901960850f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Purple = { { { 0.501960814f, 0.f, 0.501960814f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Red = { { { 1.f, 0.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 RosyBrown = { { { 0.737254918f, 0.560784340f, 0.560784340f, 1.f } } };
XMGLOBALCONST XMVECTORF32 RoyalBlue = { { { 0.254901975f, 0.411764741f, 0.882353008f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SaddleBrown = { { { 0.545098066f, 0.270588249f, 0.074509807f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Salmon = { { { 0.980392218f, 0.501960814f, 0.447058856f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SandyBrown = { { { 0.956862807f, 0.643137276f, 0.376470625f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SeaGreen = { { { 0.180392161f, 0.545098066f, 0.341176480f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SeaShell = { { { 1.f, 0.960784376f, 0.933333397f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Sienna = { { { 0.627451003f, 0.321568638f, 0.176470593f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Silver = { { { 0.752941251f, 0.752941251f, 0.752941251f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SkyBlue = { { { 0.529411793f, 0.807843208f, 0.921568692f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SlateBlue = { { { 0.415686309f, 0.352941185f, 0.803921640f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SlateGray = { { { 0.439215720f, 0.501960814f, 0.564705908f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Snow = { { { 1.f, 0.980392218f, 0.980392218f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SpringGreen = { { { 0.f, 1.f, 0.498039246f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SteelBlue = { { { 0.274509817f, 0.509803951f, 0.705882370f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Tan = { { { 0.823529482f, 0.705882370f, 0.549019635f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Teal = { { { 0.f, 0.501960814f, 0.501960814f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Thistle = { { { 0.847058892f, 0.749019623f, 0.847058892f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Tomato = { { { 1.f, 0.388235331f, 0.278431386f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Transparent = { { { 0.f, 0.f, 0.f, 0.f } } };
XMGLOBALCONST XMVECTORF32 Turquoise = { { { 0.250980407f, 0.878431439f, 0.815686345f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Violet = { { { 0.933333397f, 0.509803951f, 0.933333397f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Wheat = { { { 0.960784376f, 0.870588303f, 0.701960802f, 1.f } } };
XMGLOBALCONST XMVECTORF32 White = { { { 1.f, 1.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 WhiteSmoke = { { { 0.960784376f, 0.960784376f, 0.960784376f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Yellow = { { { 1.f, 1.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 YellowGreen = { { { 0.603921592f, 0.803921640f, 0.196078449f, 1.f } } };
} // namespace Colors
namespace ColorsLinear
{
// Standard colors (Red/Green/Blue/Alpha) in linear colorspace
XMGLOBALCONST XMVECTORF32 AliceBlue = { { { 0.871367335f, 0.938685894f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 AntiqueWhite = { { { 0.955973506f, 0.830770075f, 0.679542601f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Aqua = { { { 0.f, 1.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Aquamarine = { { { 0.212230787f, 1.f, 0.658374965f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Azure = { { { 0.871367335f, 1.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Beige = { { { 0.913098991f, 0.913098991f, 0.715693772f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Bisque = { { { 1.f, 0.775822461f, 0.552011609f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Black = { { { 0.f, 0.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 BlanchedAlmond = { { { 1.f, 0.830770075f, 0.610495746f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Blue = { { { 0.f, 0.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 BlueViolet = { { { 0.254152179f, 0.024157630f, 0.760524750f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Brown = { { { 0.376262218f, 0.023153365f, 0.023153365f, 1.f } } };
XMGLOBALCONST XMVECTORF32 BurlyWood = { { { 0.730461001f, 0.479320228f, 0.242281199f, 1.f } } };
XMGLOBALCONST XMVECTORF32 CadetBlue = { { { 0.114435382f, 0.341914445f, 0.351532698f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Chartreuse = { { { 0.212230787f, 1.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Chocolate = { { { 0.644479871f, 0.141263321f, 0.012983031f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Coral = { { { 1.f, 0.212230787f, 0.080219828f, 1.f } } };
XMGLOBALCONST XMVECTORF32 CornflowerBlue = { { { 0.127437726f, 0.300543845f, 0.846873462f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Cornsilk = { { { 1.f, 0.938685894f, 0.715693772f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Crimson = { { { 0.715693772f, 0.006995410f, 0.045186214f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Cyan = { { { 0.f, 1.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkBlue = { { { 0.f, 0.f, 0.258182913f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkCyan = { { { 0.f, 0.258182913f, 0.258182913f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkGoldenrod = { { { 0.479320228f, 0.238397658f, 0.003346536f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkGray = { { { 0.396755308f, 0.396755308f, 0.396755308f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkGreen = { { { 0.f, 0.127437726f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkKhaki = { { { 0.508881450f, 0.473531544f, 0.147027299f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkMagenta = { { { 0.258182913f, 0.f, 0.258182913f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkOliveGreen = { { { 0.090841733f, 0.147027299f, 0.028426038f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkOrange = { { { 1.f, 0.262250721f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkOrchid = { { { 0.318546832f, 0.031896040f, 0.603827536f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkRed = { { { 0.258182913f, 0.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkSalmon = { { { 0.814846814f, 0.304987371f, 0.194617867f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkSeaGreen = { { { 0.274677366f, 0.502886593f, 0.258182913f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkSlateBlue = { { { 0.064803280f, 0.046665095f, 0.258182913f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkSlateGray = { { { 0.028426038f, 0.078187428f, 0.078187428f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkTurquoise = { { { 0.f, 0.617206752f, 0.637597024f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DarkViolet = { { { 0.296138316f, 0.f, 0.651405811f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DeepPink = { { { 1.f, 0.006995410f, 0.291770697f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DeepSkyBlue = { { { 0.f, 0.520995677f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DimGray = { { { 0.141263321f, 0.141263321f, 0.141263321f, 1.f } } };
XMGLOBALCONST XMVECTORF32 DodgerBlue = { { { 0.012983031f, 0.278894335f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Firebrick = { { { 0.445201248f, 0.015996292f, 0.015996292f, 1.f } } };
XMGLOBALCONST XMVECTORF32 FloralWhite = { { { 1.f, 0.955973506f, 0.871367335f, 1.f } } };
XMGLOBALCONST XMVECTORF32 ForestGreen = { { { 0.015996292f, 0.258182913f, 0.015996292f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Fuchsia = { { { 1.f, 0.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Gainsboro = { { { 0.715693772f, 0.715693772f, 0.715693772f, 1.f } } };
XMGLOBALCONST XMVECTORF32 GhostWhite = { { { 0.938685894f, 0.938685894f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Gold = { { { 1.f, 0.679542601f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Goldenrod = { { { 0.701102138f, 0.376262218f, 0.014443844f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Gray = { { { 0.215860531f, 0.215860531f, 0.215860531f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Green = { { { 0.f, 0.215860531f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 GreenYellow = { { { 0.417885154f, 1.f, 0.028426038f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Honeydew = { { { 0.871367335f, 1.f, 0.871367335f, 1.f } } };
XMGLOBALCONST XMVECTORF32 HotPink = { { { 1.f, 0.141263321f, 0.456411064f, 1.f } } };
XMGLOBALCONST XMVECTORF32 IndianRed = { { { 0.610495746f, 0.107023112f, 0.107023112f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Indigo = { { { 0.070360109f, 0.f, 0.223227978f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Ivory = { { { 1.f, 1.f, 0.871367335f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Khaki = { { { 0.871367335f, 0.791298151f, 0.262250721f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Lavender = { { { 0.791298151f, 0.791298151f, 0.955973506f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LavenderBlush = { { { 1.f, 0.871367335f, 0.913098991f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LawnGreen = { { { 0.201556295f, 0.973445475f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LemonChiffon = { { { 1.f, 0.955973506f, 0.610495746f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightBlue = { { { 0.417885154f, 0.686685443f, 0.791298151f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightCoral = { { { 0.871367335f, 0.215860531f, 0.215860531f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightCyan = { { { 0.745404482f, 1.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = { { { 0.955973506f, 0.955973506f, 0.644479871f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightGray = { { { 0.651405811f, 0.651405811f, 0.651405811f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightGreen = { { { 0.278894335f, 0.854992807f, 0.278894335f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightPink = { { { 1.f, 0.467783839f, 0.533276618f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightSalmon = { { { 1.f, 0.351532698f, 0.194617867f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightSeaGreen = { { { 0.014443844f, 0.445201248f, 0.401977867f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightSkyBlue = { { { 0.242281199f, 0.617206752f, 0.955973506f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightSlateGray = { { { 0.184475034f, 0.246201396f, 0.318546832f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightSteelBlue = { { { 0.434153706f, 0.552011609f, 0.730461001f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LightYellow = { { { 1.f, 1.f, 0.745404482f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Lime = { { { 0.f, 1.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 LimeGreen = { { { 0.031896040f, 0.610495746f, 0.031896040f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Linen = { { { 0.955973506f, 0.871367335f, 0.791298151f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Magenta = { { { 1.f, 0.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Maroon = { { { 0.215860531f, 0.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumAquamarine = { { { 0.132868364f, 0.610495746f, 0.401977867f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumBlue = { { { 0.f, 0.f, 0.610495746f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumOrchid = { { { 0.491020888f, 0.090841733f, 0.651405811f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumPurple = { { { 0.291770697f, 0.162029430f, 0.708376050f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumSeaGreen = { { { 0.045186214f, 0.450785846f, 0.165132239f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumSlateBlue = { { { 0.198069349f, 0.138431653f, 0.854992807f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumSpringGreen = { { { 0.f, 0.955973506f, 0.323143244f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumTurquoise = { { { 0.064803280f, 0.637597024f, 0.603827536f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MediumVioletRed = { { { 0.571125031f, 0.007499032f, 0.234550655f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MidnightBlue = { { { 0.009721218f, 0.009721218f, 0.162029430f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MintCream = { { { 0.913098991f, 1.f, 0.955973506f, 1.f } } };
XMGLOBALCONST XMVECTORF32 MistyRose = { { { 1.f, 0.775822461f, 0.752942443f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Moccasin = { { { 1.f, 0.775822461f, 0.462077051f, 1.f } } };
XMGLOBALCONST XMVECTORF32 NavajoWhite = { { { 1.f, 0.730461001f, 0.417885154f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Navy = { { { 0.f, 0.f, 0.215860531f, 1.f } } };
XMGLOBALCONST XMVECTORF32 OldLace = { { { 0.982250869f, 0.913098991f, 0.791298151f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Olive = { { { 0.215860531f, 0.215860531f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 OliveDrab = { { { 0.147027299f, 0.270497859f, 0.016807375f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Orange = { { { 1.f, 0.376262218f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 OrangeRed = { { { 1.f, 0.059511241f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Orchid = { { { 0.701102138f, 0.162029430f, 0.672443330f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PaleGoldenrod = { { { 0.854992807f, 0.806952477f, 0.401977867f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PaleGreen = { { { 0.313988745f, 0.964686573f, 0.313988745f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PaleTurquoise = { { { 0.428690553f, 0.854992807f, 0.854992807f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PaleVioletRed = { { { 0.708376050f, 0.162029430f, 0.291770697f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PapayaWhip = { { { 1.f, 0.863157392f, 0.665387452f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PeachPuff = { { { 1.f, 0.701102138f, 0.485149980f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Peru = { { { 0.610495746f, 0.234550655f, 0.049706575f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Pink = { { { 1.f, 0.527115345f, 0.597202003f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Plum = { { { 0.723055363f, 0.351532698f, 0.723055363f, 1.f } } };
XMGLOBALCONST XMVECTORF32 PowderBlue = { { { 0.434153706f, 0.745404482f, 0.791298151f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Purple = { { { 0.215860531f, 0.f, 0.215860531f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Red = { { { 1.f, 0.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 RosyBrown = { { { 0.502886593f, 0.274677366f, 0.274677366f, 1.f } } };
XMGLOBALCONST XMVECTORF32 RoyalBlue = { { { 0.052860655f, 0.141263321f, 0.752942443f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SaddleBrown = { { { 0.258182913f, 0.059511241f, 0.006512091f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Salmon = { { { 0.955973506f, 0.215860531f, 0.168269455f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SandyBrown = { { { 0.904661357f, 0.371237785f, 0.116970696f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SeaGreen = { { { 0.027320892f, 0.258182913f, 0.095307484f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SeaShell = { { { 1.f, 0.913098991f, 0.854992807f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Sienna = { { { 0.351532698f, 0.084376216f, 0.026241222f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Silver = { { { 0.527115345f, 0.527115345f, 0.527115345f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SkyBlue = { { { 0.242281199f, 0.617206752f, 0.830770075f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SlateBlue = { { { 0.144128501f, 0.102241747f, 0.610495746f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SlateGray = { { { 0.162029430f, 0.215860531f, 0.278894335f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Snow = { { { 1.f, 0.955973506f, 0.955973506f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SpringGreen = { { { 0.f, 1.f, 0.212230787f, 1.f } } };
XMGLOBALCONST XMVECTORF32 SteelBlue = { { { 0.061246071f, 0.223227978f, 0.456411064f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Tan = { { { 0.644479871f, 0.456411064f, 0.262250721f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Teal = { { { 0.f, 0.215860531f, 0.215860531f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Thistle = { { { 0.686685443f, 0.520995677f, 0.686685443f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Tomato = { { { 1.f, 0.124771863f, 0.063010029f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Transparent = { { { 0.f, 0.f, 0.f, 0.f } } };
XMGLOBALCONST XMVECTORF32 Turquoise = { { { 0.051269468f, 0.745404482f, 0.630757332f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Violet = { { { 0.854992807f, 0.223227978f, 0.854992807f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Wheat = { { { 0.913098991f, 0.730461001f, 0.450785846f, 1.f } } };
XMGLOBALCONST XMVECTORF32 White = { { { 1.f, 1.f, 1.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 WhiteSmoke = { { { 0.913098991f, 0.913098991f, 0.913098991f, 1.f } } };
XMGLOBALCONST XMVECTORF32 Yellow = { { { 1.f, 1.f, 0.f, 1.f } } };
XMGLOBALCONST XMVECTORF32 YellowGreen = { { { 0.323143244f, 0.610495746f, 0.031896040f, 1.f } } };
} // namespace ColorsLinear
} // namespace DirectX

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

21
vendor/directxmath-3.19.0/LICENSE vendored Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) Microsoft Corporation.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE

View File

@ -0,0 +1,241 @@
//-------------------------------------------------------------------------------------
// DirectXMatrixStack.h -- DirectXMath C++ Matrix Stack
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <memory>
#include <new>
#ifdef _WIN32
#include <malloc.h>
#endif
#include <DirectXMath.h>
namespace DirectX
{
class MatrixStack
{
public:
MatrixStack(size_t startSize = 16) noexcept(false) :
m_stackSize(0),
m_current(0),
m_stack(nullptr)
{
assert(startSize > 0);
Allocate(startSize);
LoadIdentity();
}
MatrixStack(MatrixStack&&) = default;
MatrixStack& operator= (MatrixStack&&) = default;
MatrixStack(MatrixStack const&) = delete;
MatrixStack& operator= (MatrixStack const&) = delete;
const XMMATRIX XM_CALLCONV Top() const noexcept { return m_stack[m_current]; }
const XMMATRIX* GetTop() const noexcept { return &m_stack[m_current]; }
size_t Size() const noexcept { return (m_current + 1); }
void Pop()
{
if (m_current > 0)
{
--m_current;
}
}
void Push()
{
++m_current;
if (m_current >= m_stackSize)
{
Allocate(m_stackSize * 2);
}
// Replicate the original top of the matrix stack.
m_stack[m_current] = m_stack[m_current - 1];
}
// Loads identity into the top of the matrix stack.
void LoadIdentity() noexcept
{
m_stack[m_current] = XMMatrixIdentity();
}
// Load a matrix into the top of the matrix stack.
void XM_CALLCONV LoadMatrix(FXMMATRIX matrix) noexcept
{
m_stack[m_current] = matrix;
}
// Multiply a matrix by the top of the stack, store result in top.
void XM_CALLCONV MultiplyMatrix(FXMMATRIX matrix) noexcept
{
m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], matrix);
}
// Pre-multiplies a matrix by the top of the stack, store result in top.
void XM_CALLCONV MultiplyMatrixLocal(FXMMATRIX matrix) noexcept
{
m_stack[m_current] = XMMatrixMultiply(matrix, m_stack[m_current]);
}
// Add a rotation about X to stack top.
void XM_CALLCONV RotateX(float angle) noexcept
{
XMMATRIX mat = XMMatrixRotationX(angle);
m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
}
void XM_CALLCONV RotateXLocal(float angle) noexcept
{
XMMATRIX mat = XMMatrixRotationX(angle);
m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
}
// Add a rotation about Y to stack top.
void XM_CALLCONV RotateY(float angle) noexcept
{
XMMATRIX mat = XMMatrixRotationY(angle);
m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
}
void XM_CALLCONV RotateYLocal(float angle) noexcept
{
XMMATRIX mat = XMMatrixRotationY(angle);
m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
}
// Add a rotation about Z to stack top.
void XM_CALLCONV RotateZ(float angle) noexcept
{
XMMATRIX mat = XMMatrixRotationZ(angle);
m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
}
void XM_CALLCONV RotateZLocal(float angle) noexcept
{
XMMATRIX mat = XMMatrixRotationZ(angle);
m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
}
// Add a rotation around an axis to stack top.
void XM_CALLCONV RotateAxis(FXMVECTOR axis, float angle) noexcept
{
XMMATRIX mat = XMMatrixRotationAxis(axis, angle);
m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
}
void XM_CALLCONV RotateAxisLocal(FXMVECTOR axis, float angle) noexcept
{
XMMATRIX mat = XMMatrixRotationAxis(axis, angle);
m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
}
// Add a rotation by roll/pitch/yaw to the stack top.
void RotateRollPitchYaw(float pitch, float yaw, float roll) noexcept
{
XMMATRIX mat = XMMatrixRotationRollPitchYaw(pitch, yaw, roll);
m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
}
void RotateRollPitchYawLocal(float pitch, float yaw, float roll) noexcept
{
XMMATRIX mat = XMMatrixRotationRollPitchYaw(pitch, yaw, roll);
m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
}
// Add a rotation by a quaternion stack top.
void XM_CALLCONV RotateByQuaternion(FXMVECTOR quat) noexcept
{
XMMATRIX mat = XMMatrixRotationQuaternion(quat);
m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
}
void XM_CALLCONV RotateByQuaternionLocal(FXMVECTOR quat) noexcept
{
XMMATRIX mat = XMMatrixRotationQuaternion(quat);
m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
}
// Add a scale to the stack top.
void Scale(float x, float y, float z) noexcept
{
XMMATRIX mat = XMMatrixScaling(x, y, z);
m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
}
void ScaleLocal(float x, float y, float z) noexcept
{
XMMATRIX mat = XMMatrixScaling(x, y, z);
m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
}
// Add a translation to the stack top.
void Translate(float x, float y, float z) noexcept
{
XMMATRIX mat = XMMatrixTranslation(x, y, z);
m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
}
void TranslateLocal(float x, float y, float z) noexcept
{
XMMATRIX mat = XMMatrixTranslation(x, y, z);
m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
}
private:
struct matrix_deleter
{
void operator()(void* p) noexcept
{
#ifdef _WIN32
_aligned_free(p);
#else
free(p);
#endif
}
};
void Allocate(size_t newSize)
{
#ifdef _WIN32
void* ptr = _aligned_malloc(newSize * sizeof(XMMATRIX), 16);
#else
// This C++17 Standard Library function is currently NOT
// implemented for the Microsoft Standard C++ Library.
void* ptr = aligned_alloc(16, newSize * sizeof(XMMATRIX));
#endif
if (!ptr)
throw std::bad_alloc();
if (m_stack)
{
assert(newSize >= m_stackSize);
memcpy(ptr, m_stack.get(), sizeof(XMMATRIX) * m_stackSize);
}
m_stack.reset(reinterpret_cast<XMMATRIX*>(ptr));
m_stackSize = newSize;
}
size_t m_stackSize;
size_t m_current;
std::unique_ptr<XMMATRIX[], matrix_deleter> m_stack;
};
} // namespace DirectX

121
vendor/directxmath-3.19.0/README.md vendored Normal file
View File

@ -0,0 +1,121 @@
![DirectX Logo](https://raw.githubusercontent.com/wiki/Microsoft/DirectXMath/X_jpg.jpg)
# DirectXMath
https://github.com/Microsoft/DirectXMath
Copyright (c) Microsoft Corporation.
**February 2024**
This package contains the DirectXMath library, an all inline SIMD C++ linear algebra library for use in games and graphics apps.
This code is designed to build with Visual Studio 2019 (16.11), Visual Studio 2022, or clang/LLVM for Windows. It is recommended that you make use of the latest updates.
These components are designed to work without requiring any content from the legacy DirectX SDK. For details, see [Where is the DirectX SDK?](https://aka.ms/dxsdk).
## Directory Layout
* ``Inc\``
+ DirectXMath Files (in the DirectX C++ namespace)
* DirectXMath.h - Core library
* DirectXPackedVector.h - Load/Store functions and types for working with various compressed GPU formats
* DirectXColors.h - .NET-style Color defines in sRGB and linear color space
* DirectXCollision.h - Bounding volume collision library
* ``Extentions\``
+ Advanced instruction set variants for guarded codepaths
* DirectXMathSSE3.h - SSE3
* DirectXMathBE.h - Supplemental SSE3 (SSSE3)
* DirectXMathSSE4.h - SSE4.1
* DirectXMathAVX.h - Advanced Vector Extensions (AVX)
* DirectXMathAVX2.h - Advanced Vector Extensions 2 (AVX2)
* DirectXMathF16C.h - Half-precision conversions (F16C)
* DirectXMathFMA3.h - Fused multiply-accumulate (FMA3)
* DirectXMathFMA4.h - Fused multiply-accumulate (FMA4)
* ``SHMath\``
+ Spherical Harmonics math functions
* DirectXSH.h - Header for SHMath functions
* DirectXSH.cpp, DirectXSHD3D11.cpp, DirectXSHD3D12.cpp - Implementation
* ``XDSP\``
+ XDSP.h - Digital Signal Processing helper functions
* ``build\``
+ Contains YAML files for the build pipelines along with some miscellaneous build files and scripts.
## Documentation
Documentation is available on the [Microsoft Docs](https://docs.microsoft.com/en-us/windows/desktop/dxmath/directxmath-portal). Additional information can be found on the [project wiki](https://github.com/microsoft/DirectXMath/wiki).
## Compiler support
Officially the library is supported with Microsoft Visual C++ 2019 or later, clang/LLVM v12 or later, and GCC 9 or later. It should also compile with the Intel C++ and MinGW compilers.
When building with clang/LLVM or other GNU C compilers, the ``_XM_NO_XMVECTOR_OVERLOADS_`` control define is set because these compilers do not support creating operator overloads for the ``XMVECTOR`` type. You can choose to enable this preprocessor define explicitly to do the same thing with Visual C++ for improved portability.
To build for non-Windows platforms, you need to provide a ``sal.h`` header in your include path. You can obtain an open source version from [GitHub](https://raw.githubusercontent.com/dotnet/runtime/main/src/coreclr/pal/inc/rt/sal.h).
With GCC, the SAL annotation preprocessor symbols can conflict with the GNU implementation of the Standard C++ Library. The workaround is to include the system headers before including DirectXMath:
```
#include <algorithm>
#include <utility>
#include <DirectXMath.h>
```
## Notices
All content and source code for this package are subject to the terms of the [MIT License](https://github.com/microsoft/DirectXMath/blob/main/LICENSE).
For the latest version of DirectXMath, bug reports, etc. please visit the project site on [GitHub](https://github.com/microsoft/DirectXMath).
## Release Notes
* The clang/LLVM toolset currently does not respect the ``float_control`` pragma for SSE instrinsics. Therefore, the use of ``/fp:fast`` is not recommended on clang/LLVM until this issue is fixed. See [55713](https://github.com/llvm/llvm-project/issues/55713).
## Support
For questions, consider using [Stack Overflow](https://stackoverflow.com/questions/tagged/directxmath) with the *directxmath* tag, or the [DirectX Discord Server](https://discord.gg/directx) in the *dx12-developers* or *dx9-dx11-developers* channel.
For bug reports and feature requests, please use GitHub [issues](https://github.com/microsoft/DirectXMath/issues) for this project.
## Contributing
This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
## Code of Conduct
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
## Trademarks
This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party's policies.
## Credits
The xboxmath library was originated by Matt Bronder with contributions from Sakphong Chanbai and David Hefner for the Xbox 360.
The xnamath library for the DirectX SDK and Xbox XDK was the work of Chuck Walbourn and Becky Heineman based on xboxmath, with contributions from Jeremy Gup, Dan Haffner, Matt Lee, Casey Meekhof, Rich Sauer, Jason Strayer, and Xiaoyue Zheng.
The DirectXMath library for the Windows SDK and Xbox One XDK is the work of Chuck Walbourn based on xnamath, with contributions from Darren Anderson, Matt Lee, Aaron Rodriguez Hernandez, Yuichi Ito, Reza Nourai, Rich Sauer, and Jason Strayer.
Thanks to Dave Eberly for his contributions particularly in improving the transcendental functions.
Thanks to Bruce Dawson for his help with the rounding functions.
Thanks to Andrew Farrier for the fixes to ``XMVerifyCPUSupport`` to properly support clang.
Thanks to Scott Matloff for his help in getting the library updated to use Intel SVML for VS 2019.

41
vendor/directxmath-3.19.0/SECURITY.md vendored Normal file
View File

@ -0,0 +1,41 @@
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.7 BLOCK -->
## Security
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
## Reporting Security Issues
**Please do not report security vulnerabilities through public GitHub issues.**
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
* Full paths of source file(s) related to the manifestation of the issue
* The location of the affected source code (tag/branch/commit or direct URL)
* Any special configuration required to reproduce the issue
* Step-by-step instructions to reproduce the issue
* Proof-of-concept or exploit code (if possible)
* Impact of the issue, including how an attacker might exploit the issue
This information will help us triage your report more quickly.
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
## Preferred Languages
We prefer all communications to be in English.
## Policy
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
<!-- END MICROSOFT SECURITY.MD BLOCK -->

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,72 @@
//-------------------------------------------------------------------------------------
// DirectXSH.h -- C++ Spherical Harmonics Math Library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/p/?LinkId=262885
//-------------------------------------------------------------------------------------
#pragma once
#define DIRECTX_SHMATH_VERSION 106
#include <DirectXMath.h>
namespace DirectX
{
constexpr size_t XM_SH_MINORDER = 2;
constexpr size_t XM_SH_MAXORDER = 6;
float* XM_CALLCONV XMSHEvalDirection(_Out_writes_(order*order) float *result, _In_ size_t order, _In_ FXMVECTOR dir) noexcept;
float* XM_CALLCONV XMSHRotate(_Out_writes_(order*order) float *result, _In_ size_t order, _In_ FXMMATRIX rotMatrix, _In_reads_(order*order) const float *input) noexcept;
float* XMSHRotateZ(_Out_writes_(order*order) float *result, _In_ size_t order, _In_ float angle, _In_reads_(order*order) const float *input) noexcept;
float* XMSHAdd(_Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *inputA, _In_reads_(order*order) const float *inputB) noexcept;
float* XMSHScale(_Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *input, _In_ float scale) noexcept;
float XMSHDot(_In_ size_t order, _In_reads_(order*order) const float *inputA, _In_reads_(order*order) const float *inputB) noexcept;
float* XMSHMultiply(_Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *inputF, _In_reads_(order*order) const float *inputG) noexcept;
float* XMSHMultiply2(_Out_writes_(4) float *result, _In_reads_(4) const float *inputF, _In_reads_(4) const float *inputG) noexcept;
float* XMSHMultiply3(_Out_writes_(9) float *result, _In_reads_(9) const float *inputF, _In_reads_(9) const float *inputG) noexcept;
float* XMSHMultiply4(_Out_writes_(16) float *result, _In_reads_(16) const float *inputF, _In_reads_(16) const float *inputG) noexcept;
float* XMSHMultiply5(_Out_writes_(25) float *result, _In_reads_(25) const float *inputF, _In_reads_(25) const float *inputG) noexcept;
float* XMSHMultiply6(_Out_writes_(36) float *result, _In_reads_(36) const float *inputF, _In_reads_(36) const float *inputG) noexcept;
bool XM_CALLCONV XMSHEvalDirectionalLight(
_In_ size_t order, _In_ FXMVECTOR dir, _In_ FXMVECTOR color,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
bool XM_CALLCONV XMSHEvalSphericalLight(
_In_ size_t order, _In_ FXMVECTOR pos, _In_ float radius, _In_ FXMVECTOR color,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
bool XM_CALLCONV XMSHEvalConeLight(
_In_ size_t order, _In_ FXMVECTOR dir, _In_ float radius, _In_ FXMVECTOR color,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
bool XM_CALLCONV XMSHEvalHemisphereLight(
_In_ size_t order, _In_ FXMVECTOR dir, _In_ FXMVECTOR topColor, _In_ FXMVECTOR bottomColor,
_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
#if defined(__d3d11_h__) || defined(__d3d11_x_h__)
HRESULT SHProjectCubeMap(
_In_ ID3D11DeviceContext *context, _In_ size_t order, _In_ ID3D11Texture2D *cubeMap,
_Out_writes_opt_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
#endif
#if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__)
HRESULT SHProjectCubeMap(
_In_ size_t order, _In_ const D3D12_RESOURCE_DESC& desc, _In_ const D3D12_SUBRESOURCE_DATA cubeMap[6],
_Out_writes_opt_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
#endif
} // namespace DirectX

View File

@ -0,0 +1,385 @@
//-------------------------------------------------------------------------------------
// DirectXSHD3D11.cpp -- C++ Spherical Harmonics Math Library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/p/?LinkId=262885
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma warning( disable : 4616 4619 4061 4265 4626 5039 )
// C4616/C4619 #pragma warning warnings
// C4061 numerator 'identifier' in switch of enum 'enumeration' is not explicitly handled by a case label
// C4265 class has virtual functions, but destructor is not virtual
// C4626 assignment operator was implicitly defined as deleted
// C5039 pointer or reference to potentially throwing function passed to extern C function under - EHc
#pragma warning(push)
#pragma warning(disable: 4365)
#endif
#include <d3d11_1.h>
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#include "DirectXSH.h"
#include <DirectXPackedVector.h>
#include <cassert>
#include <memory>
#include <malloc.h>
#include <wrl/client.h>
#ifdef __clang__
#pragma clang diagnostic ignored "-Wcovered-switch-default"
#pragma clang diagnostic ignored "-Wswitch-enum"
#pragma clang diagnostic ignored "-Wunknown-warning-option"
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
#endif
using namespace DirectX;
using Microsoft::WRL::ComPtr;
namespace
{
struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } };
using ScopedAlignedArrayXMVECTOR = std::unique_ptr<DirectX::XMVECTOR, aligned_deleter>;
//-------------------------------------------------------------------------------------
// This code is lifted from DirectXTex http://go.microsoft.com/fwlink/?LinkId=248926
// If you need additional DXGI format support, see DirectXTexConvert.cpp
//-------------------------------------------------------------------------------------
#define LOAD_SCANLINE( type, func )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = func( sPtr++ );\
}\
return true;\
}\
return false;
#define LOAD_SCANLINE3( type, func, defvec )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
XMVECTOR v = func( sPtr++ );\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\
}\
return true;\
}\
return false;
#define LOAD_SCANLINE2( type, func, defvec )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
XMVECTOR v = func( sPtr++ );\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\
}\
return true;\
}\
return false;
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 6101)
#endif
_Success_(return)
bool LoadScanline(
_Out_writes_(count) DirectX::XMVECTOR* pDestination,
size_t count,
_In_reads_bytes_(size) LPCVOID pSource,
size_t size,
DXGI_FORMAT format)
{
assert(pDestination && count > 0 && ((reinterpret_cast<uintptr_t>(pDestination) & 0xF) == 0));
assert(pSource && size > 0);
using namespace DirectX::PackedVector;
XMVECTOR* __restrict dPtr = pDestination;
if (!dPtr)
return false;
const XMVECTOR* ePtr = pDestination + count;
switch (format)
{
case DXGI_FORMAT_R32G32B32A32_FLOAT:
{
size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size;
memcpy_s(dPtr, sizeof(XMVECTOR)*count, pSource, msize);
}
return true;
case DXGI_FORMAT_R32G32B32_FLOAT:
LOAD_SCANLINE3(XMFLOAT3, XMLoadFloat3, g_XMIdentityR3)
case DXGI_FORMAT_R16G16B16A16_FLOAT:
LOAD_SCANLINE(XMHALF4, XMLoadHalf4)
case DXGI_FORMAT_R32G32_FLOAT:
LOAD_SCANLINE2(XMFLOAT2, XMLoadFloat2, g_XMIdentityR3)
case DXGI_FORMAT_R11G11B10_FLOAT:
LOAD_SCANLINE3(XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3)
case DXGI_FORMAT_R16G16_FLOAT:
LOAD_SCANLINE2(XMHALF2, XMLoadHalf2, g_XMIdentityR3)
case DXGI_FORMAT_R32_FLOAT:
if (size >= sizeof(float))
{
const float* __restrict sPtr = reinterpret_cast<const float*>(pSource);
for (size_t icount = 0; icount < size; icount += sizeof(float))
{
XMVECTOR v = XMLoadFloat(sPtr++);
if (dPtr >= ePtr) break;
*(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000);
}
return true;
}
return false;
case DXGI_FORMAT_R16_FLOAT:
if (size >= sizeof(HALF))
{
const HALF * __restrict sPtr = reinterpret_cast<const HALF*>(pSource);
for (size_t icount = 0; icount < size; icount += sizeof(HALF))
{
if (dPtr >= ePtr) break;
*(dPtr++) = XMVectorSet(XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f);
}
return true;
}
return false;
default:
return false;
}
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
} // namespace anonymous
//-------------------------------------------------------------------------------------
// Projects a function represented in a cube map into spherical harmonics.
//
// http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx
//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT DirectX::SHProjectCubeMap(
ID3D11DeviceContext *context,
size_t order,
ID3D11Texture2D *cubeMap,
float *resultR,
float *resultG,
float* resultB) noexcept
{
if (!context || !cubeMap)
return E_INVALIDARG;
if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER)
return E_INVALIDARG;
D3D11_TEXTURE2D_DESC desc;
cubeMap->GetDesc(&desc);
if ((desc.ArraySize != 6)
|| (desc.Width != desc.Height)
|| (desc.SampleDesc.Count > 1))
return E_FAIL;
switch (desc.Format)
{
case DXGI_FORMAT_R32G32B32A32_FLOAT:
case DXGI_FORMAT_R32G32B32_FLOAT:
case DXGI_FORMAT_R16G16B16A16_FLOAT:
case DXGI_FORMAT_R32G32_FLOAT:
case DXGI_FORMAT_R11G11B10_FLOAT:
case DXGI_FORMAT_R16G16_FLOAT:
case DXGI_FORMAT_R32_FLOAT:
case DXGI_FORMAT_R16_FLOAT:
// See LoadScanline to support more pixel formats
break;
default:
return E_FAIL;
}
//--- Create a staging resource copy (if needed) to be able to read data
ID3D11Texture2D* texture = nullptr;
ComPtr<ID3D11Texture2D> staging;
if (!(desc.CPUAccessFlags & D3D11_CPU_ACCESS_READ))
{
D3D11_TEXTURE2D_DESC sdesc = desc;
sdesc.BindFlags = 0;
sdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
sdesc.Usage = D3D11_USAGE_STAGING;
ComPtr<ID3D11Device> device;
context->GetDevice(&device);
HRESULT hr = device->CreateTexture2D(&sdesc, nullptr, &staging);
if (FAILED(hr))
return hr;
context->CopyResource(staging.Get(), cubeMap);
texture = staging.Get();
}
else
texture = cubeMap;
assert(texture != nullptr);
//--- Setup for SH projection
ScopedAlignedArrayXMVECTOR scanline(reinterpret_cast<XMVECTOR*>(_aligned_malloc(sizeof(XMVECTOR)*desc.Width, 16)));
if (!scanline)
return E_OUTOFMEMORY;
assert(desc.Width > 0);
float fSize = static_cast<float>(desc.Width);
float fPicSize = 1.0f / fSize;
// index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w
// linear function x*S +B, 1st constraint means B is (-1+1/W), plug into
// second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did
// this was incorrect - but only for computing the differential solid
// angle, where the final value was 1.0 instead of 1-1/w...
float fB = -1.0f + 1.0f / fSize;
float fS = (desc.Width > 1) ? (2.0f*(1.0f - 1.0f / fSize) / (fSize - 1.0f)) : 0.f;
// clear out accumulation variables
float fWt = 0.0f;
if (resultR)
memset(resultR, 0, sizeof(float)*order*order);
if (resultG)
memset(resultG, 0, sizeof(float)*order*order);
if (resultB)
memset(resultB, 0, sizeof(float)*order*order);
float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};
float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};
//--- Process each face of the cubemap
for (UINT face = 0; face < 6; ++face)
{
UINT dindex = D3D11CalcSubresource(0, face, desc.MipLevels);
D3D11_MAPPED_SUBRESOURCE mapped;
HRESULT hr = context->Map(texture, dindex, D3D11_MAP_READ, 0, &mapped);
if (FAILED(hr))
return hr;
const uint8_t *pSrc = reinterpret_cast<const uint8_t*>(mapped.pData);
for (UINT y = 0; y < desc.Height; ++y)
{
XMVECTOR* ptr = scanline.get();
if (!LoadScanline(ptr, desc.Width, pSrc, mapped.RowPitch, desc.Format))
{
context->Unmap(texture, dindex);
return E_FAIL;
}
const float v = float(y) * fS + fB;
XMVECTOR* pixel = ptr;
for (UINT x = 0; x < desc.Width; ++x, ++pixel)
{
const float u = float(x) * fS + fB;
float ix, iy, iz;
switch (face)
{
case 0: // Positive X
iz = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;
iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
ix = 1.0f;
break;
case 1: // Negative X
iz = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
ix = -1;
break;
case 2: // Positive Y
iz = -1.0f + (2.0f * float(y) + 1.0f) * fPicSize;
iy = 1.0f;
ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
break;
case 3: // Negative Y
iz = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
iy = -1.0f;
ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
break;
case 4: // Positive Z
iz = 1.0f;
iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
break;
case 5: // Negative Z
iz = -1.0f;
iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
ix = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;
break;
default:
ix = iy = iz = 0.f;
assert(false);
break;
}
XMVECTOR dir = XMVectorSet(ix, iy, iz, 0);
dir = XMVector3Normalize(dir);
const float fDiffSolid = 4.0f / ((1.0f + u * u + v * v)*sqrtf(1.0f + u * u + v * v));
fWt += fDiffSolid;
XMSHEvalDirection(shBuff, order, dir);
XMFLOAT3A clr;
XMStoreFloat3A(&clr, *pixel);
if (resultR) XMSHAdd(resultR, order, resultR, XMSHScale(shBuffB, order, shBuff, clr.x*fDiffSolid));
if (resultG) XMSHAdd(resultG, order, resultG, XMSHScale(shBuffB, order, shBuff, clr.y*fDiffSolid));
if (resultB) XMSHAdd(resultB, order, resultB, XMSHScale(shBuffB, order, shBuff, clr.z*fDiffSolid));
}
pSrc += mapped.RowPitch;
}
context->Unmap(texture, dindex);
}
const float fNormProj = (4.0f*XM_PI) / fWt;
if (resultR) XMSHScale(resultR, order, resultR, fNormProj);
if (resultG) XMSHScale(resultG, order, resultG, fNormProj);
if (resultB) XMSHScale(resultB, order, resultB, fNormProj);
return S_OK;
}

View File

@ -0,0 +1,341 @@
//-------------------------------------------------------------------------------------
// DirectXSHD3D12.cpp -- C++ Spherical Harmonics Math Library
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/p/?LinkId=262885
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma warning( disable : 4616 4619 4061 4265 4626 5039 )
// C4616/C4619 #pragma warning warnings
// C4061 numerator 'identifier' in switch of enum 'enumeration' is not explicitly handled by a case label
// C4265 class has virtual functions, but destructor is not virtual
// C4626 assignment operator was implicitly defined as deleted
// C5039 pointer or reference to potentially throwing function passed to extern C function under - EHc
#endif
#include <d3d12.h>
#include "DirectXSH.h"
#include <DirectXPackedVector.h>
#include <cassert>
#include <memory>
#include <malloc.h>
#include <wrl/client.h>
#ifdef __clang__
#pragma clang diagnostic ignored "-Wcovered-switch-default"
#pragma clang diagnostic ignored "-Wswitch-enum"
#pragma clang diagnostic ignored "-Wunknown-warning-option"
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
#endif
using namespace DirectX;
using Microsoft::WRL::ComPtr;
namespace
{
struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } };
using ScopedAlignedArrayXMVECTOR = std::unique_ptr<DirectX::XMVECTOR, aligned_deleter>;
//-------------------------------------------------------------------------------------
// This code is lifted from DirectXTex http://go.microsoft.com/fwlink/?LinkId=248926
// If you need additional DXGI format support, see DirectXTexConvert.cpp
//-------------------------------------------------------------------------------------
#define LOAD_SCANLINE( type, func )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = func( sPtr++ );\
}\
return true;\
}\
return false;
#define LOAD_SCANLINE3( type, func, defvec )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
XMVECTOR v = func( sPtr++ );\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\
}\
return true;\
}\
return false;
#define LOAD_SCANLINE2( type, func, defvec )\
if ( size >= sizeof(type) )\
{\
const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
{\
XMVECTOR v = func( sPtr++ );\
if ( dPtr >= ePtr ) break;\
*(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\
}\
return true;\
}\
return false;
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 6101)
#endif
_Success_(return)
bool LoadScanline(
_Out_writes_(count) DirectX::XMVECTOR* pDestination,
size_t count,
_In_reads_bytes_(size) LPCVOID pSource,
size_t size,
DXGI_FORMAT format)
{
assert(pDestination && count > 0 && ((reinterpret_cast<uintptr_t>(pDestination) & 0xF) == 0));
assert(pSource && size > 0);
using namespace DirectX::PackedVector;
XMVECTOR* __restrict dPtr = pDestination;
if (!dPtr)
return false;
const XMVECTOR* ePtr = pDestination + count;
switch (format)
{
case DXGI_FORMAT_R32G32B32A32_FLOAT:
{
size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size;
memcpy_s(dPtr, sizeof(XMVECTOR)*count, pSource, msize);
}
return true;
case DXGI_FORMAT_R32G32B32_FLOAT:
LOAD_SCANLINE3(XMFLOAT3, XMLoadFloat3, g_XMIdentityR3)
case DXGI_FORMAT_R16G16B16A16_FLOAT:
LOAD_SCANLINE(XMHALF4, XMLoadHalf4)
case DXGI_FORMAT_R32G32_FLOAT:
LOAD_SCANLINE2(XMFLOAT2, XMLoadFloat2, g_XMIdentityR3)
case DXGI_FORMAT_R11G11B10_FLOAT:
LOAD_SCANLINE3(XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3)
case DXGI_FORMAT_R16G16_FLOAT:
LOAD_SCANLINE2(XMHALF2, XMLoadHalf2, g_XMIdentityR3)
case DXGI_FORMAT_R32_FLOAT:
if (size >= sizeof(float))
{
const float* __restrict sPtr = reinterpret_cast<const float*>(pSource);
for (size_t icount = 0; icount < size; icount += sizeof(float))
{
XMVECTOR v = XMLoadFloat(sPtr++);
if (dPtr >= ePtr) break;
*(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000);
}
return true;
}
return false;
case DXGI_FORMAT_R16_FLOAT:
if (size >= sizeof(HALF))
{
const HALF * __restrict sPtr = reinterpret_cast<const HALF*>(pSource);
for (size_t icount = 0; icount < size; icount += sizeof(HALF))
{
if (dPtr >= ePtr) break;
*(dPtr++) = XMVectorSet(XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f);
}
return true;
}
return false;
default:
return false;
}
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
} // namespace anonymous
//-------------------------------------------------------------------------------------
// Projects a function represented in a cube map into spherical harmonics.
//
// http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx
//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT DirectX::SHProjectCubeMap(
size_t order,
const D3D12_RESOURCE_DESC& desc,
const D3D12_SUBRESOURCE_DATA cubeMap[6],
float *resultR,
float *resultG,
float *resultB) noexcept
{
if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER)
return E_INVALIDARG;
if (desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D
|| (desc.DepthOrArraySize != 6)
|| (desc.Width != desc.Height)
|| (desc.SampleDesc.Count > 1))
return E_FAIL;
switch (desc.Format)
{
case DXGI_FORMAT_R32G32B32A32_FLOAT:
case DXGI_FORMAT_R32G32B32_FLOAT:
case DXGI_FORMAT_R16G16B16A16_FLOAT:
case DXGI_FORMAT_R32G32_FLOAT:
case DXGI_FORMAT_R11G11B10_FLOAT:
case DXGI_FORMAT_R16G16_FLOAT:
case DXGI_FORMAT_R32_FLOAT:
case DXGI_FORMAT_R16_FLOAT:
// See LoadScanline to support more pixel formats
break;
default:
return E_FAIL;
}
//--- Setup for SH projection
ScopedAlignedArrayXMVECTOR scanline(reinterpret_cast<XMVECTOR*>(_aligned_malloc(static_cast<size_t>(sizeof(XMVECTOR)*desc.Width), 16)));
if (!scanline)
return E_OUTOFMEMORY;
assert(desc.Width > 0);
float fSize = static_cast<float>(desc.Width);
float fPicSize = 1.0f / fSize;
// index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w
// linear function x*S +B, 1st constraint means B is (-1+1/W), plug into
// second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did
// this was incorrect - but only for computing the differential solid
// angle, where the final value was 1.0 instead of 1-1/w...
float fB = -1.0f + 1.0f / fSize;
float fS = (desc.Width > 1) ? (2.0f*(1.0f - 1.0f / fSize) / (fSize - 1.0f)) : 0.f;
// clear out accumulation variables
float fWt = 0.0f;
if (resultR)
memset(resultR, 0, sizeof(float)*order*order);
if (resultG)
memset(resultG, 0, sizeof(float)*order*order);
if (resultB)
memset(resultB, 0, sizeof(float)*order*order);
float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};
float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};
//--- Process each face of the cubemap
for (UINT face = 0; face < 6; ++face)
{
if (!cubeMap[face].pData)
return E_POINTER;
const uint8_t *pSrc = reinterpret_cast<const uint8_t*>(cubeMap[face].pData);
for (UINT y = 0; y < desc.Height; ++y)
{
XMVECTOR* ptr = scanline.get();
if (!LoadScanline(ptr, static_cast<size_t>(desc.Width), pSrc, static_cast<size_t>(cubeMap[face].RowPitch), desc.Format))
{
return E_FAIL;
}
const float v = float(y) * fS + fB;
XMVECTOR* pixel = ptr;
for (UINT x = 0; x < desc.Width; ++x, ++pixel)
{
const float u = float(x) * fS + fB;
float ix, iy, iz;
switch (face)
{
case 0: // Positive X
iz = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;
iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
ix = 1.0f;
break;
case 1: // Negative X
iz = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
ix = -1;
break;
case 2: // Positive Y
iz = -1.0f + (2.0f * float(y) + 1.0f) * fPicSize;
iy = 1.0f;
ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
break;
case 3: // Negative Y
iz = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
iy = -1.0f;
ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
break;
case 4: // Positive Z
iz = 1.0f;
iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
break;
case 5: // Negative Z
iz = -1.0f;
iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
ix = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;
break;
default:
ix = iy = iz = 0.f;
assert(false);
break;
}
XMVECTOR dir = XMVectorSet(ix, iy, iz, 0);
dir = XMVector3Normalize(dir);
const float fDiffSolid = 4.0f / ((1.0f + u * u + v * v)*sqrtf(1.0f + u * u + v * v));
fWt += fDiffSolid;
XMSHEvalDirection(shBuff, order, dir);
XMFLOAT3A clr;
XMStoreFloat3A(&clr, *pixel);
if (resultR) XMSHAdd(resultR, order, resultR, XMSHScale(shBuffB, order, shBuff, clr.x*fDiffSolid));
if (resultG) XMSHAdd(resultG, order, resultG, XMSHScale(shBuffB, order, shBuff, clr.y*fDiffSolid));
if (resultB) XMSHAdd(resultB, order, resultB, XMSHScale(shBuffB, order, shBuff, clr.z*fDiffSolid));
}
pSrc += cubeMap[face].RowPitch;
}
}
const float fNormProj = (4.0f*XM_PI) / fWt;
if (resultR) XMSHScale(resultR, order, resultR, fNormProj);
if (resultG) XMSHScale(resultG, order, resultG, fNormProj);
if (resultB) XMSHScale(resultB, order, resultB, fNormProj);
return S_OK;
}

View File

@ -0,0 +1,257 @@
//-------------------------------------------------------------------------------------
// Stereo3DMatrixHelper.cpp -- SIMD C++ Math helper for Stereo 3D matricies
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//-------------------------------------------------------------------------------------
#include "Stereo3DMatrixHelper.h"
using namespace DirectX;
namespace
{
inline bool StereoProjectionHelper
(
const STEREO_PARAMETERS& stereoParameters,
_Out_ float* fVirtualProjection,
_Out_ float* zNearWidth,
_Out_ float* zNearHeight,
float FovAngleY,
float AspectRatio,
float NearZ
)
{
// note that most people have difficulty fusing images into 3D
// if the separation equals even just the human average. by
// reducing the separation (interocular distance) by 1/2, we
// guarantee a larger subset of people will see full 3D
// the conservative setting should always be used. the only problem
// with the conservative setting is that the 3D effect will be less
// impressive on smaller screens (which makes sense, since your eye
// cannot be tricked as easily based on the smaller fov). to simulate
// the effect of a larger screen, use the liberal settings (debug only)
// Conservative Settings: * max acuity angle: 0.8f degrees * interoc distance: 1.25 inches
// Liberal Settings: * max acuity angle: 1.6f degrees * interoc distance: 2.5f inches
// maximum visual accuity angle allowed is 3.2 degrees for
// a physical scene, and 1.6 degrees for a virtual one.
// thus we cannot allow an object to appear any closer to
// the viewer than 1.6 degrees (divided by two for most
// half-angle calculations)
static const float fMaxStereoDistance = 780; // inches (should be between 10 and 20m)
static const float fMaxVisualAcuityAngle = 1.6f * (XM_PI / 180.0f); // radians
static const float fInterocularDistance = 1.25f; // inches
float fDisplayHeight = stereoParameters.fDisplaySizeInches / sqrtf(AspectRatio * AspectRatio + 1.0f);
float fDisplayWidth = fDisplayHeight * AspectRatio;
float fHalfInterocular = 0.5f * fInterocularDistance * stereoParameters.fStereoExaggerationFactor;
float fHalfPixelWidth = fDisplayWidth / stereoParameters.fPixelResolutionWidth * 0.5f;
float fHalfMaximumAcuityAngle = fMaxVisualAcuityAngle * 0.5f * stereoParameters.fStereoExaggerationFactor;
// float fHalfWidth = fDisplayWidth * 0.5f;
float fMaxSeparationAcuityAngle = atanf(fHalfInterocular / fMaxStereoDistance);
float fMaxSeparationDistance = fHalfPixelWidth / tanf(fMaxSeparationAcuityAngle);
float fRefinedMaxStereoDistance = fMaxStereoDistance - fMaxSeparationDistance;
float fFovHalfAngle = FovAngleY / 2.0f;
bool ComfortableResult = true;
if (fRefinedMaxStereoDistance < 0.0f || fMaxSeparationDistance > 0.1f * fMaxStereoDistance)
{
// Pixel resolution is too low to offer a comfortable stereo experience
ComfortableResult = false;
}
float fRefinedMaxSeparationAcuityAngle = atanf(fHalfInterocular / (fRefinedMaxStereoDistance));
float fPhysicalZNearDistance = fHalfInterocular / tanf(fHalfMaximumAcuityAngle);
// float fScalingFactor = fHalfMaximumAcuityAngle / atanf(fHalfInterocular / stereoParameters.fViewerDistanceInches);
float fNearZSeparation = tanf(fRefinedMaxSeparationAcuityAngle) * (fRefinedMaxStereoDistance - fPhysicalZNearDistance);
// float fNearZSeparation2 = fHalfInterocular * (fRefinedMaxStereoDistance - fPhysicalZNearDistance) / fRefinedMaxStereoDistance;
(*zNearHeight) = cosf(fFovHalfAngle) / sinf(fFovHalfAngle);
(*zNearWidth) = (*zNearHeight) / AspectRatio;
(*fVirtualProjection) = (fNearZSeparation * NearZ * (*zNearWidth * 4.0f)) / (2.0f * NearZ);
return ComfortableResult;
}
}
//------------------------------------------------------------------------------
void DirectX::StereoCreateDefaultParameters
(
STEREO_PARAMETERS& stereoParameters
)
{
// Default assumption is 1920x1200 resolution, a 22" LCD monitor, and a 2' viewing distance
stereoParameters.fViewerDistanceInches = 24.0f;
stereoParameters.fPixelResolutionWidth = 1920.0f;
stereoParameters.fPixelResolutionHeight = 1200.0f;
stereoParameters.fDisplaySizeInches = 22.0f;
stereoParameters.fStereoSeparationFactor = 1.0f;
stereoParameters.fStereoExaggerationFactor = 1.0f;
}
//------------------------------------------------------------------------------
XMMATRIX DirectX::StereoProjectionFovLH
(
_In_opt_ const STEREO_PARAMETERS* pStereoParameters,
STEREO_CHANNEL Channel,
float FovAngleY,
float AspectRatio,
float NearZ,
float FarZ,
STEREO_MODE StereoMode
)
{
assert(Channel == STEREO_CHANNEL_LEFT || Channel == STEREO_CHANNEL_RIGHT);
assert(StereoMode == STEREO_MODE_NORMAL || StereoMode == STEREO_MODE_INVERTED);
assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
STEREO_PARAMETERS DefaultParameters = {};
if (pStereoParameters == nullptr)
{
StereoCreateDefaultParameters(DefaultParameters);
pStereoParameters = &DefaultParameters;
}
assert(pStereoParameters->fStereoSeparationFactor >= 0.0f && pStereoParameters->fStereoSeparationFactor <= 1.0f);
assert(pStereoParameters->fStereoExaggerationFactor >= 1.0f && pStereoParameters->fStereoExaggerationFactor <= 2.0f);
float fVirtualProjection = 0.0f;
float zNearWidth = 0.0f;
float zNearHeight = 0.0f;
StereoProjectionHelper(*pStereoParameters, &fVirtualProjection, &zNearWidth, &zNearHeight, FovAngleY, AspectRatio, NearZ);
fVirtualProjection *= pStereoParameters->fStereoSeparationFactor; // incorporate developer defined bias
//
// By applying a translation, we are forcing our cameras to be parallel
//
float fInvertedAngle = atanf(fVirtualProjection / (2.0f * NearZ));
XMMATRIX proj = XMMatrixPerspectiveFovLH(FovAngleY, AspectRatio, NearZ, FarZ);
XMMATRIX patchedProjection;
if (Channel == STEREO_CHANNEL_LEFT)
{
if (StereoMode > STEREO_MODE_NORMAL)
{
XMMATRIX rots = XMMatrixRotationY(fInvertedAngle);
XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0);
patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj);
}
else
{
XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0);
patchedProjection = XMMatrixMultiply(trans, proj);
}
}
else
{
if (StereoMode > STEREO_MODE_NORMAL)
{
XMMATRIX rots = XMMatrixRotationY(-fInvertedAngle);
XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0);
patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj);
}
else
{
XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0);
patchedProjection = XMMatrixMultiply(trans, proj);
}
}
return patchedProjection;
}
//------------------------------------------------------------------------------
XMMATRIX DirectX::StereoProjectionFovRH
(
_In_opt_ const STEREO_PARAMETERS* pStereoParameters,
STEREO_CHANNEL Channel,
float FovAngleY,
float AspectRatio,
float NearZ,
float FarZ,
STEREO_MODE StereoMode
)
{
assert(Channel == STEREO_CHANNEL_LEFT || Channel == STEREO_CHANNEL_RIGHT);
assert(StereoMode == STEREO_MODE_NORMAL || StereoMode == STEREO_MODE_INVERTED);
assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
STEREO_PARAMETERS DefaultParameters = {};
if (pStereoParameters == nullptr)
{
StereoCreateDefaultParameters(DefaultParameters);
pStereoParameters = &DefaultParameters;
}
assert(pStereoParameters->fStereoSeparationFactor >= 0.0f && pStereoParameters->fStereoSeparationFactor <= 1.0f);
assert(pStereoParameters->fStereoExaggerationFactor >= 1.0f && pStereoParameters->fStereoExaggerationFactor <= 2.0f);
float fVirtualProjection = 0.0f;
float zNearWidth = 0.0f;
float zNearHeight = 0.0f;
StereoProjectionHelper(*pStereoParameters, &fVirtualProjection, &zNearWidth, &zNearHeight, FovAngleY, AspectRatio, NearZ);
fVirtualProjection *= pStereoParameters->fStereoSeparationFactor; // incorporate developer defined bias
//
// By applying a translation, we are forcing our cameras to be parallel
//
float fInvertedAngle = atanf(fVirtualProjection / (2.0f * NearZ));
XMMATRIX proj = XMMatrixPerspectiveFovRH(FovAngleY, AspectRatio, NearZ, FarZ);
//
// By applying a translation, we are forcing our cameras to be parallel
//
XMMATRIX patchedProjection;
if (Channel == STEREO_CHANNEL_LEFT)
{
if (StereoMode > STEREO_MODE_NORMAL)
{
XMMATRIX rots = XMMatrixRotationY(fInvertedAngle);
XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0);
patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj);
}
else
{
XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0);
patchedProjection = XMMatrixMultiply(trans, proj);
}
}
else
{
if (StereoMode > STEREO_MODE_NORMAL)
{
XMMATRIX rots = XMMatrixRotationY(-fInvertedAngle);
XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0);
patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj);
}
else
{
XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0);
patchedProjection = XMMatrixMultiply(trans, proj);
}
}
return patchedProjection;
}

View File

@ -0,0 +1,64 @@
//-------------------------------------------------------------------------------------
// Stereo3DMatrixHelper.h -- SIMD C++ Math helper for Stereo 3D matrices
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//-------------------------------------------------------------------------------------
#pragma once
#include "DirectXMath.h"
namespace DirectX
{
// Enumeration for stereo channels (left and right).
enum STEREO_CHANNEL
{
STEREO_CHANNEL_LEFT = 0,
STEREO_CHANNEL_RIGHT
};
// Enumeration for stereo mode (normal or inverted).
enum STEREO_MODE
{
STEREO_MODE_NORMAL = 0,
STEREO_MODE_INVERTED,
};
//------------------------------------------------------------------------------
//
// Stereo calibration settings
//
// * Viewer distance to the display
// * Physical display size
// * Render resolution
//
// The stereo separation factor indicates how much separation is between the left and right
// eyes. 0 is no separation, 1 is full separation. It defaults to 1.0.
//
// The debug stereo exaggeration factor indicates how much to increase the interocular spacing and
// maximum acuity angle from comfortable defaults. For retail builds, this value should always
// be 1.0, but during development, on small screens, this value can be raised to up to 2.0 in
// order to exaggerate the 3D effect. Values over 1.0 may cause discomfort on normal sized
// displays. It defaults to 1.0.
//
struct STEREO_PARAMETERS
{
float fViewerDistanceInches;
float fDisplaySizeInches;
float fPixelResolutionWidth;
float fPixelResolutionHeight;
float fStereoSeparationFactor;
float fStereoExaggerationFactor;
};
void StereoCreateDefaultParameters(STEREO_PARAMETERS& stereoParameters);
XMMATRIX StereoProjectionFovLH(_In_opt_ const STEREO_PARAMETERS* pStereoParameters,
STEREO_CHANNEL Channel, float FovAngleY, float AspectRatio, float NearZ, float FarZ,
STEREO_MODE StereoMode = STEREO_MODE_NORMAL);
XMMATRIX StereoProjectionFovRH(_In_opt_ const STEREO_PARAMETERS* pStereoParameters,
STEREO_CHANNEL Channel, float FovAngleY, float AspectRatio, float NearZ, float FarZ,
STEREO_MODE StereoMode = STEREO_MODE_NORMAL);
}

880
vendor/directxmath-3.19.0/XDSP/XDSP.h vendored Normal file
View File

@ -0,0 +1,880 @@
//--------------------------------------------------------------------------------------
// File: XDSP.h
//
// DirectXMath based Digital Signal Processing (DSP) functions for audio,
// primarily Fast Fourier Transform (FFT)
//
// All buffer parameters must be 16-byte aligned
//
// All FFT functions support only single-precision floating-point audio
//
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615557
//--------------------------------------------------------------------------------------
#pragma once
#include <cassert>
#include <DirectXMath.h>
#include <cstdint>
#include <cstring>
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 6001 6262)
#endif
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunknown-warning-option"
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
#endif
namespace XDSP
{
using XMVECTOR = DirectX::XMVECTOR;
using FXMVECTOR = DirectX::FXMVECTOR;
using GXMVECTOR = DirectX::GXMVECTOR;
using CXMVECTOR = DirectX::CXMVECTOR;
using XMFLOAT4A = DirectX::XMFLOAT4A;
inline bool ISPOWEROF2(size_t n) { return (((n)&((n)-1)) == 0 && (n) != 0); }
// Parallel multiplication of four complex numbers, assuming real and imaginary values are stored in separate vectors.
inline void XM_CALLCONV vmulComplex(
_Out_ XMVECTOR& rResult, _Out_ XMVECTOR& iResult,
_In_ FXMVECTOR r1, _In_ FXMVECTOR i1, _In_ FXMVECTOR r2, _In_ GXMVECTOR i2) noexcept
{
using namespace DirectX;
// (r1, i1) * (r2, i2) = (r1r2 - i1i2, r1i2 + r2i1)
const XMVECTOR vr1r2 = XMVectorMultiply(r1, r2);
const XMVECTOR vr1i2 = XMVectorMultiply(r1, i2);
rResult = XMVectorNegativeMultiplySubtract(i1, i2, vr1r2); // real: (r1*r2 - i1*i2)
iResult = XMVectorMultiplyAdd(r2, i1, vr1i2); // imaginary: (r1*i2 + r2*i1)
}
inline void XM_CALLCONV vmulComplex(
_Inout_ XMVECTOR& r1, _Inout_ XMVECTOR& i1, _In_ FXMVECTOR r2, _In_ FXMVECTOR i2) noexcept
{
using namespace DirectX;
// (r1, i1) * (r2, i2) = (r1r2 - i1i2, r1i2 + r2i1)
const XMVECTOR vr1r2 = XMVectorMultiply(r1, r2);
const XMVECTOR vr1i2 = XMVectorMultiply(r1, i2);
r1 = XMVectorNegativeMultiplySubtract(i1, i2, vr1r2); // real: (r1*r2 - i1*i2)
i1 = XMVectorMultiplyAdd(r2, i1, vr1i2); // imaginary: (r1*i2 + r2*i1)
}
//----------------------------------------------------------------------------------
// Radix-4 decimation-in-time FFT butterfly.
// This version assumes that all four elements of the butterfly are
// adjacent in a single vector.
//
// Compute the product of the complex input vector and the
// 4-element DFT matrix:
// | 1 1 1 1 | | (r1X,i1X) |
// | 1 -j -1 j | | (r1Y,i1Y) |
// | 1 -1 1 -1 | | (r1Z,i1Z) |
// | 1 j -1 -j | | (r1W,i1W) |
//
// This matrix can be decomposed into two simpler ones to reduce the
// number of additions needed. The decomposed matrices look like this:
// | 1 0 1 0 | | 1 0 1 0 |
// | 0 1 0 -j | | 1 0 -1 0 |
// | 1 0 -1 0 | | 0 1 0 1 |
// | 0 1 0 j | | 0 1 0 -1 |
//
// Combine as follows:
// | 1 0 1 0 | | (r1X,i1X) | | (r1X + r1Z, i1X + i1Z) |
// Temp = | 1 0 -1 0 | * | (r1Y,i1Y) | = | (r1X - r1Z, i1X - i1Z) |
// | 0 1 0 1 | | (r1Z,i1Z) | | (r1Y + r1W, i1Y + i1W) |
// | 0 1 0 -1 | | (r1W,i1W) | | (r1Y - r1W, i1Y - i1W) |
//
// | 1 0 1 0 | | (rTempX,iTempX) | | (rTempX + rTempZ, iTempX + iTempZ) |
// Result = | 0 1 0 -j | * | (rTempY,iTempY) | = | (rTempY + iTempW, iTempY - rTempW) |
// | 1 0 -1 0 | | (rTempZ,iTempZ) | | (rTempX - rTempZ, iTempX - iTempZ) |
// | 0 1 0 j | | (rTempW,iTempW) | | (rTempY - iTempW, iTempY + rTempW) |
//----------------------------------------------------------------------------------
inline void ButterflyDIT4_1 (_Inout_ XMVECTOR& r1, _Inout_ XMVECTOR& i1) noexcept
{
using namespace DirectX;
// sign constants for radix-4 butterflies
static const XMVECTORF32 vDFT4SignBits1 = { { { 1.0f, -1.0f, 1.0f, -1.0f } } };
static const XMVECTORF32 vDFT4SignBits2 = { { { 1.0f, 1.0f, -1.0f, -1.0f } } };
static const XMVECTORF32 vDFT4SignBits3 = { { { 1.0f, -1.0f, -1.0f, 1.0f } } };
// calculating Temp
// [r1X| r1X|r1Y| r1Y] + [r1Z|-r1Z|r1W|-r1W]
// [i1X| i1X|i1Y| i1Y] + [i1Z|-i1Z|i1W|-i1W]
const XMVECTOR r1L = XMVectorSwizzle<0, 0, 1, 1>(r1);
const XMVECTOR r1H = XMVectorSwizzle<2, 2, 3, 3>(r1);
const XMVECTOR i1L = XMVectorSwizzle<0, 0, 1, 1>(i1);
const XMVECTOR i1H = XMVectorSwizzle<2, 2, 3, 3>(i1);
const XMVECTOR rTemp = XMVectorMultiplyAdd(r1H, vDFT4SignBits1, r1L);
const XMVECTOR iTemp = XMVectorMultiplyAdd(i1H, vDFT4SignBits1, i1L);
// calculating Result
const XMVECTOR rZrWiZiW = XMVectorPermute<2, 3, 6, 7>(rTemp, iTemp); // [rTempZ|rTempW|iTempZ|iTempW]
const XMVECTOR rZiWrZiW = XMVectorSwizzle<0, 3, 0, 3>(rZrWiZiW); // [rTempZ|iTempW|rTempZ|iTempW]
const XMVECTOR iZrWiZrW = XMVectorSwizzle<2, 1, 2, 1>(rZrWiZiW); // [rTempZ|iTempW|rTempZ|iTempW]
// [rTempX| rTempY| rTempX| rTempY] + [rTempZ| iTempW|-rTempZ|-iTempW]
// [iTempX| iTempY| iTempX| iTempY] + // [iTempZ|-rTempW|-iTempZ| rTempW]
const XMVECTOR rTempL = XMVectorSwizzle<0, 1, 0, 1>(rTemp);
const XMVECTOR iTempL = XMVectorSwizzle<0, 1, 0, 1>(iTemp);
r1 = XMVectorMultiplyAdd(rZiWrZiW, vDFT4SignBits2, rTempL);
i1 = XMVectorMultiplyAdd(iZrWiZrW, vDFT4SignBits3, iTempL);
}
//----------------------------------------------------------------------------------
// Radix-4 decimation-in-time FFT butterfly.
// This version assumes that elements of the butterfly are
// in different vectors, so that each vector in the input
// contains elements from four different butterflies.
// The four separate butterflies are processed in parallel.
//
// The calculations here are the same as the ones in the single-vector
// radix-4 DFT, but instead of being done on a single vector (X,Y,Z,W)
// they are done in parallel on sixteen independent complex values.
// There is no interdependence between the vector elements:
// | 1 0 1 0 | | (rIn0,iIn0) | | (rIn0 + rIn2, iIn0 + iIn2) |
// | 1 0 -1 0 | * | (rIn1,iIn1) | = Temp = | (rIn0 - rIn2, iIn0 - iIn2) |
// | 0 1 0 1 | | (rIn2,iIn2) | | (rIn1 + rIn3, iIn1 + iIn3) |
// | 0 1 0 -1 | | (rIn3,iIn3) | | (rIn1 - rIn3, iIn1 - iIn3) |
//
// | 1 0 1 0 | | (rTemp0,iTemp0) | | (rTemp0 + rTemp2, iTemp0 + iTemp2) |
// Result = | 0 1 0 -j | * | (rTemp1,iTemp1) | = | (rTemp1 + iTemp3, iTemp1 - rTemp3) |
// | 1 0 -1 0 | | (rTemp2,iTemp2) | | (rTemp0 - rTemp2, iTemp0 - iTemp2) |
// | 0 1 0 j | | (rTemp3,iTemp3) | | (rTemp1 - iTemp3, iTemp1 + rTemp3) |
//----------------------------------------------------------------------------------
inline void ButterflyDIT4_4(
_Inout_ XMVECTOR& r0,
_Inout_ XMVECTOR& r1,
_Inout_ XMVECTOR& r2,
_Inout_ XMVECTOR& r3,
_Inout_ XMVECTOR& i0,
_Inout_ XMVECTOR& i1,
_Inout_ XMVECTOR& i2,
_Inout_ XMVECTOR& i3,
_In_reads_(uStride * 4) const XMVECTOR* __restrict pUnityTableReal,
_In_reads_(uStride * 4) const XMVECTOR* __restrict pUnityTableImaginary,
_In_ size_t uStride,
_In_ const bool fLast) noexcept
{
using namespace DirectX;
assert(pUnityTableReal);
assert(pUnityTableImaginary);
assert(reinterpret_cast<uintptr_t>(pUnityTableReal) % 16 == 0);
assert(reinterpret_cast<uintptr_t>(pUnityTableImaginary) % 16 == 0);
assert(ISPOWEROF2(uStride));
// calculating Temp
const XMVECTOR rTemp0 = XMVectorAdd(r0, r2);
const XMVECTOR iTemp0 = XMVectorAdd(i0, i2);
const XMVECTOR rTemp2 = XMVectorAdd(r1, r3);
const XMVECTOR iTemp2 = XMVectorAdd(i1, i3);
const XMVECTOR rTemp1 = XMVectorSubtract(r0, r2);
const XMVECTOR iTemp1 = XMVectorSubtract(i0, i2);
const XMVECTOR rTemp3 = XMVectorSubtract(r1, r3);
const XMVECTOR iTemp3 = XMVectorSubtract(i1, i3);
XMVECTOR rTemp4 = XMVectorAdd(rTemp0, rTemp2);
XMVECTOR iTemp4 = XMVectorAdd(iTemp0, iTemp2);
XMVECTOR rTemp5 = XMVectorAdd(rTemp1, iTemp3);
XMVECTOR iTemp5 = XMVectorSubtract(iTemp1, rTemp3);
XMVECTOR rTemp6 = XMVectorSubtract(rTemp0, rTemp2);
XMVECTOR iTemp6 = XMVectorSubtract(iTemp0, iTemp2);
XMVECTOR rTemp7 = XMVectorSubtract(rTemp1, iTemp3);
XMVECTOR iTemp7 = XMVectorAdd(iTemp1, rTemp3);
// calculating Result
// vmulComplex(rTemp0, iTemp0, rTemp0, iTemp0, pUnityTableReal[0], pUnityTableImaginary[0]); // first one is always trivial
vmulComplex(rTemp5, iTemp5, pUnityTableReal[uStride], pUnityTableImaginary[uStride]);
vmulComplex(rTemp6, iTemp6, pUnityTableReal[uStride * 2], pUnityTableImaginary[uStride * 2]);
vmulComplex(rTemp7, iTemp7, pUnityTableReal[uStride * 3], pUnityTableImaginary[uStride * 3]);
if (fLast)
{
ButterflyDIT4_1(rTemp4, iTemp4);
ButterflyDIT4_1(rTemp5, iTemp5);
ButterflyDIT4_1(rTemp6, iTemp6);
ButterflyDIT4_1(rTemp7, iTemp7);
}
r0 = rTemp4; i0 = iTemp4;
r1 = rTemp5; i1 = iTemp5;
r2 = rTemp6; i2 = iTemp6;
r3 = rTemp7; i3 = iTemp7;
}
//==================================================================================
// F-U-N-C-T-I-O-N-S
//==================================================================================
//----------------------------------------------------------------------------------
// DESCRIPTION:
// 4-sample FFT.
//
// PARAMETERS:
// pReal - [inout] real components, must have at least uCount elements
// pImaginary - [inout] imaginary components, must have at least uCount elements
// uCount - [in] number of FFT iterations
//----------------------------------------------------------------------------------
inline void FFT4(
_Inout_updates_(uCount) XMVECTOR* __restrict pReal,
_Inout_updates_(uCount) XMVECTOR* __restrict pImaginary,
const size_t uCount = 1) noexcept
{
assert(pReal);
assert(pImaginary);
assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
assert(ISPOWEROF2(uCount));
for (size_t uIndex = 0; uIndex < uCount; ++uIndex)
{
ButterflyDIT4_1(pReal[uIndex], pImaginary[uIndex]);
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// 8-sample FFT.
//
// PARAMETERS:
// pReal - [inout] real components, must have at least uCount*2 elements
// pImaginary - [inout] imaginary components, must have at least uCount*2 elements
// uCount - [in] number of FFT iterations
//----------------------------------------------------------------------------------
inline void FFT8(
_Inout_updates_(uCount * 2) XMVECTOR* __restrict pReal,
_Inout_updates_(uCount * 2) XMVECTOR* __restrict pImaginary,
_In_ const size_t uCount = 1) noexcept
{
using namespace DirectX;
assert(pReal);
assert(pImaginary);
assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
assert(ISPOWEROF2(uCount));
static const XMVECTORF32 wr1 = { { { 1.0f, 0.70710677f, 0.0f, -0.70710677f } } };
static const XMVECTORF32 wi1 = { { { 0.0f, -0.70710677f, -1.0f, -0.70710677f } } };
static const XMVECTORF32 wr2 = { { { -1.0f, -0.70710677f, 0.0f, 0.70710677f } } };
static const XMVECTORF32 wi2 = { { { 0.0f, 0.70710677f, 1.0f, 0.70710677f } } };
for (size_t uIndex = 0; uIndex < uCount; ++uIndex)
{
XMVECTOR* __restrict pR = pReal + uIndex * 2;
XMVECTOR* __restrict pI = pImaginary + uIndex * 2;
XMVECTOR oddsR = XMVectorPermute<1, 3, 5, 7>(pR[0], pR[1]);
XMVECTOR evensR = XMVectorPermute<0, 2, 4, 6>(pR[0], pR[1]);
XMVECTOR oddsI = XMVectorPermute<1, 3, 5, 7>(pI[0], pI[1]);
XMVECTOR evensI = XMVectorPermute<0, 2, 4, 6>(pI[0], pI[1]);
ButterflyDIT4_1(oddsR, oddsI);
ButterflyDIT4_1(evensR, evensI);
XMVECTOR r, i;
vmulComplex(r, i, oddsR, oddsI, wr1, wi1);
pR[0] = XMVectorAdd(evensR, r);
pI[0] = XMVectorAdd(evensI, i);
vmulComplex(r, i, oddsR, oddsI, wr2, wi2);
pR[1] = XMVectorAdd(evensR, r);
pI[1] = XMVectorAdd(evensI, i);
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// 16-sample FFT.
//
// PARAMETERS:
// pReal - [inout] real components, must have at least uCount*4 elements
// pImaginary - [inout] imaginary components, must have at least uCount*4 elements
// uCount - [in] number of FFT iterations
//----------------------------------------------------------------------------------
inline void FFT16(
_Inout_updates_(uCount * 4) XMVECTOR* __restrict pReal,
_Inout_updates_(uCount * 4) XMVECTOR* __restrict pImaginary,
_In_ const size_t uCount = 1) noexcept
{
using namespace DirectX;
assert(pReal);
assert(pImaginary);
assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
assert(ISPOWEROF2(uCount));
static const XMVECTORF32 aUnityTableReal[4] = {
{ { { 1.0f, 1.0f, 1.0f, 1.0f } } },
{ { { 1.0f, 0.92387950f, 0.70710677f, 0.38268343f } } },
{ { { 1.0f, 0.70710677f, -4.3711388e-008f, -0.70710677f } } },
{ { { 1.0f, 0.38268343f, -0.70710677f, -0.92387950f } } }
};
static const XMVECTORF32 aUnityTableImaginary[4] =
{
{ { { -0.0f, -0.0f, -0.0f, -0.0f } } },
{ { { -0.0f, -0.38268343f, -0.70710677f, -0.92387950f } } },
{ { { -0.0f, -0.70710677f, -1.0f, -0.70710677f } } },
{ { { -0.0f, -0.92387950f, -0.70710677f, 0.38268343f } } }
};
for (size_t uIndex = 0; uIndex < uCount; ++uIndex)
{
ButterflyDIT4_4(pReal[uIndex * 4],
pReal[uIndex * 4 + 1],
pReal[uIndex * 4 + 2],
pReal[uIndex * 4 + 3],
pImaginary[uIndex * 4],
pImaginary[uIndex * 4 + 1],
pImaginary[uIndex * 4 + 2],
pImaginary[uIndex * 4 + 3],
reinterpret_cast<const XMVECTOR*>(aUnityTableReal),
reinterpret_cast<const XMVECTOR*>(aUnityTableImaginary),
1, true);
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// 2^N-sample FFT.
//
// REMARKS:
// For FFTs length 16 and below, call FFT16(), FFT8(), or FFT4().
//
// PARAMETERS:
// pReal - [inout] real components, must have at least (uLength*uCount)/4 elements
// pImaginary - [inout] imaginary components, must have at least (uLength*uCount)/4 elements
// pUnityTable - [in] unity table, must have at least uLength*uCount elements, see FFTInitializeUnityTable()
// uLength - [in] FFT length in samples, must be a power of 2 > 16
// uCount - [in] number of FFT iterations
//----------------------------------------------------------------------------------
inline void FFT (
_Inout_updates_((uLength * uCount) / 4) XMVECTOR* __restrict pReal,
_Inout_updates_((uLength * uCount) / 4) XMVECTOR* __restrict pImaginary,
_In_reads_(uLength * uCount) const XMVECTOR* __restrict pUnityTable,
_In_ const size_t uLength,
_In_ const size_t uCount = 1) noexcept
{
assert(pReal);
assert(pImaginary);
assert(pUnityTable);
assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
assert(reinterpret_cast<uintptr_t>(pUnityTable) % 16 == 0);
assert(uLength > 16);
_Analysis_assume_(uLength > 16);
assert(ISPOWEROF2(uLength));
assert(ISPOWEROF2(uCount));
const XMVECTOR* __restrict pUnityTableReal = pUnityTable;
const XMVECTOR* __restrict pUnityTableImaginary = pUnityTable + (uLength >> 2);
const size_t uTotal = uCount * uLength;
const size_t uTotal_vectors = uTotal >> 2;
const size_t uStage_vectors = uLength >> 2;
const size_t uStage_vectors_mask = uStage_vectors - 1;
const size_t uStride = uLength >> 4; // stride between butterfly elements
const size_t uStrideMask = uStride - 1;
const size_t uStride2 = uStride * 2;
const size_t uStride3 = uStride * 3;
const size_t uStrideInvMask = ~uStrideMask;
for (size_t uIndex=0; uIndex < (uTotal_vectors >> 2); ++uIndex)
{
const size_t n = ((uIndex & uStrideInvMask) << 2) + (uIndex & uStrideMask);
ButterflyDIT4_4(pReal[n],
pReal[n + uStride],
pReal[n + uStride2],
pReal[n + uStride3],
pImaginary[n ],
pImaginary[n + uStride],
pImaginary[n + uStride2],
pImaginary[n + uStride3],
pUnityTableReal + (n & uStage_vectors_mask),
pUnityTableImaginary + (n & uStage_vectors_mask),
uStride, false);
}
if (uLength > 16 * 4)
{
FFT(pReal, pImaginary, pUnityTable + (uLength >> 1), uLength >> 2, uCount * 4);
}
else if (uLength == 16 * 4)
{
FFT16(pReal, pImaginary, uCount * 4);
}
else if (uLength == 8 * 4)
{
FFT8(pReal, pImaginary, uCount * 4);
}
else if (uLength == 4 * 4)
{
FFT4(pReal, pImaginary, uCount * 4);
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// Initializes unity roots lookup table used by FFT functions.
// Once initialized, the table need not be initialized again unless a
// different FFT length is desired.
//
// REMARKS:
// The unity tables of FFT length 16 and below are hard coded into the
// respective FFT functions and so need not be initialized.
//
// PARAMETERS:
// pUnityTable - [out] unity table, receives unity roots lookup table, must have at least uLength elements
// uLength - [in] FFT length in frames, must be a power of 2 > 16
//----------------------------------------------------------------------------------
inline void FFTInitializeUnityTable (_Out_writes_(uLength) XMVECTOR* __restrict pUnityTable, _In_ size_t uLength) noexcept
{
using namespace DirectX;
assert(pUnityTable);
assert(uLength > 16);
_Analysis_assume_(uLength > 16);
assert(ISPOWEROF2(uLength));
// initialize unity table for recursive FFT lengths: uLength, uLength/4, uLength/16... > 16
// pUnityTable[0 to uLength*4-1] contains real components for current FFT length
// pUnityTable[uLength*4 to uLength*8-1] contains imaginary components for current FFT length
static const XMVECTORF32 vXM0123 = { { { 0.0f, 1.0f, 2.0f, 3.0f } } };
uLength >>= 2;
XMVECTOR vlStep = XMVectorReplicate(XM_PIDIV2 / float(uLength));
do
{
uLength >>= 2;
XMVECTOR vJP = vXM0123;
for (size_t j = 0; j < uLength; ++j)
{
XMVECTOR vSin, vCos;
XMVECTOR viJP, vlS;
pUnityTable[j] = g_XMOne;
pUnityTable[j + uLength * 4] = XMVectorZero();
vlS = XMVectorMultiply(vJP, vlStep);
XMVectorSinCos(&vSin, &vCos, vlS);
pUnityTable[j + uLength] = vCos;
pUnityTable[j + uLength * 5] = XMVectorMultiply(vSin, g_XMNegativeOne);
viJP = XMVectorAdd(vJP, vJP);
vlS = XMVectorMultiply(viJP, vlStep);
XMVectorSinCos(&vSin, &vCos, vlS);
pUnityTable[j + uLength * 2] = vCos;
pUnityTable[j + uLength * 6] = XMVectorMultiply(vSin, g_XMNegativeOne);
viJP = XMVectorAdd(viJP, vJP);
vlS = XMVectorMultiply(viJP, vlStep);
XMVectorSinCos(&vSin, &vCos, vlS);
pUnityTable[j + uLength * 3] = vCos;
pUnityTable[j + uLength * 7] = XMVectorMultiply(vSin, g_XMNegativeOne);
vJP = XMVectorAdd(vJP, g_XMFour);
}
vlStep = XMVectorMultiply(vlStep, g_XMFour);
pUnityTable += uLength * 8;
} while (uLength > 4);
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// The FFT functions generate output in bit reversed order.
// Use this function to re-arrange them into order of increasing frequency.
//
// REMARKS:
// Exponential values and bits correspond, so the reversed upper index can be omitted depending on the number of exponents.
//
// PARAMETERS:
// pOutput - [out] output buffer, receives samples in order of increasing frequency, cannot overlap pInput, must have at least (1<<uLog2Length)/4 elements
// pInput - [in] input buffer, samples in bit reversed order as generated by FFT functions, cannot overlap pOutput, must have at least (1<<uLog2Length)/4 elements
// uLog2Length - [in] LOG (base 2) of FFT length in samples, must be >= 2
//----------------------------------------------------------------------------------
inline void FFTUnswizzle (
_Out_writes_((1 << uLog2Length) / 4) XMVECTOR* __restrict pOutput,
_In_reads_((1 << uLog2Length) / 4) const XMVECTOR* __restrict pInput,
_In_ const size_t uLog2Length) noexcept
{
assert(pOutput);
assert(pInput);
assert(uLog2Length >= 2);
_Analysis_assume_(uLog2Length >= 2);
float* __restrict pfOutput = reinterpret_cast<float*>(pOutput);
const size_t uLength = size_t(1) << (uLog2Length - 2);
static const unsigned char cSwizzleTable[256] = {
0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0, 0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0,
0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4, 0x24, 0x64, 0xA4, 0xE4, 0x34, 0x74, 0xB4, 0xF4,
0x08, 0x48, 0x88, 0xC8, 0x18, 0x58, 0x98, 0xD8, 0x28, 0x68, 0xA8, 0xE8, 0x38, 0x78, 0xB8, 0xF8,
0x0C, 0x4C, 0x8C, 0xCC, 0x1C, 0x5C, 0x9C, 0xDC, 0x2C, 0x6C, 0xAC, 0xEC, 0x3C, 0x7C, 0xBC, 0xFC,
0x01, 0x41, 0x81, 0xC1, 0x11, 0x51, 0x91, 0xD1, 0x21, 0x61, 0xA1, 0xE1, 0x31, 0x71, 0xB1, 0xF1,
0x05, 0x45, 0x85, 0xC5, 0x15, 0x55, 0x95, 0xD5, 0x25, 0x65, 0xA5, 0xE5, 0x35, 0x75, 0xB5, 0xF5,
0x09, 0x49, 0x89, 0xC9, 0x19, 0x59, 0x99, 0xD9, 0x29, 0x69, 0xA9, 0xE9, 0x39, 0x79, 0xB9, 0xF9,
0x0D, 0x4D, 0x8D, 0xCD, 0x1D, 0x5D, 0x9D, 0xDD, 0x2D, 0x6D, 0xAD, 0xED, 0x3D, 0x7D, 0xBD, 0xFD,
0x02, 0x42, 0x82, 0xC2, 0x12, 0x52, 0x92, 0xD2, 0x22, 0x62, 0xA2, 0xE2, 0x32, 0x72, 0xB2, 0xF2,
0x06, 0x46, 0x86, 0xC6, 0x16, 0x56, 0x96, 0xD6, 0x26, 0x66, 0xA6, 0xE6, 0x36, 0x76, 0xB6, 0xF6,
0x0A, 0x4A, 0x8A, 0xCA, 0x1A, 0x5A, 0x9A, 0xDA, 0x2A, 0x6A, 0xAA, 0xEA, 0x3A, 0x7A, 0xBA, 0xFA,
0x0E, 0x4E, 0x8E, 0xCE, 0x1E, 0x5E, 0x9E, 0xDE, 0x2E, 0x6E, 0xAE, 0xEE, 0x3E, 0x7E, 0xBE, 0xFE,
0x03, 0x43, 0x83, 0xC3, 0x13, 0x53, 0x93, 0xD3, 0x23, 0x63, 0xA3, 0xE3, 0x33, 0x73, 0xB3, 0xF3,
0x07, 0x47, 0x87, 0xC7, 0x17, 0x57, 0x97, 0xD7, 0x27, 0x67, 0xA7, 0xE7, 0x37, 0x77, 0xB7, 0xF7,
0x0B, 0x4B, 0x8B, 0xCB, 0x1B, 0x5B, 0x9B, 0xDB, 0x2B, 0x6B, 0xAB, 0xEB, 0x3B, 0x7B, 0xBB, 0xFB,
0x0F, 0x4F, 0x8F, 0xCF, 0x1F, 0x5F, 0x9F, 0xDF, 0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF
};
if ((uLog2Length & 1) == 0)
{
// even powers of two
const size_t uRev32 = 32 - uLog2Length;
for (size_t uIndex = 0; uIndex < uLength; ++uIndex)
{
XMFLOAT4A f4a;
XMStoreFloat4A(&f4a, pInput[uIndex]);
const size_t n = uIndex * 4;
const size_t uAddr = (static_cast<size_t>(cSwizzleTable[n & 0xff]) << 24) |
(static_cast<size_t>(cSwizzleTable[(n >> 8) & 0xff]) << 16) |
(static_cast<size_t>(cSwizzleTable[(n >> 16) & 0xff]) << 8) |
(static_cast<size_t>(cSwizzleTable[(n >> 24)]));
pfOutput[uAddr >> uRev32] = f4a.x;
pfOutput[(0x40000000 | uAddr) >> uRev32] = f4a.y;
pfOutput[(0x80000000 | uAddr) >> uRev32] = f4a.z;
pfOutput[(0xC0000000 | uAddr) >> uRev32] = f4a.w;
}
}
else
{
// odd powers of two
const size_t uRev7 = size_t(1) << (uLog2Length - 3);
const size_t uRev32 = 32 - (uLog2Length - 3);
for (size_t uIndex = 0; uIndex < uLength; ++uIndex)
{
XMFLOAT4A f4a;
XMStoreFloat4A(&f4a, pInput[uIndex]);
const size_t n = (uIndex >> 1);
size_t uAddr = (((static_cast<size_t>(cSwizzleTable[n & 0xff]) << 24) |
(static_cast<size_t>(cSwizzleTable[(n >> 8) & 0xff]) << 16) |
(static_cast<size_t>(cSwizzleTable[(n >> 16) & 0xff]) << 8) |
(static_cast<size_t>(cSwizzleTable[(n >> 24)]))) >> uRev32) |
((uIndex & 1) * uRev7 * 4);
pfOutput[uAddr] = f4a.x;
uAddr += uRev7;
pfOutput[uAddr] = f4a.y;
uAddr += uRev7;
pfOutput[uAddr] = f4a.z;
uAddr += uRev7;
pfOutput[uAddr] = f4a.w;
}
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// Convert complex components to polar form.
//
// PARAMETERS:
// pOutput - [out] output buffer, receives samples in polar form, must have at least uLength/4 elements
// pInputReal - [in] input buffer (real components), must have at least uLength/4 elements
// pInputImaginary - [in] input buffer (imaginary components), must have at least uLength/4 elements
// uLength - [in] FFT length in samples, must be a power of 2 >= 4
//----------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma warning(suppress: 6101)
#endif
inline void FFTPolar(
_Out_writes_(uLength / 4) XMVECTOR* __restrict pOutput,
_In_reads_(uLength / 4) const XMVECTOR* __restrict pInputReal,
_In_reads_(uLength / 4) const XMVECTOR* __restrict pInputImaginary,
_In_ const size_t uLength) noexcept
{
using namespace DirectX;
assert(pOutput);
assert(pInputReal);
assert(pInputImaginary);
assert(uLength >= 4);
_Analysis_assume_(uLength >= 4);
assert(ISPOWEROF2(uLength));
const float flOneOverLength = 1.0f / float(uLength);
// result = sqrtf((real/uLength)^2 + (imaginary/uLength)^2) * 2
const XMVECTOR vOneOverLength = XMVectorReplicate(flOneOverLength);
for (size_t uIndex = 0; uIndex < (uLength >> 2); ++uIndex)
{
XMVECTOR vReal = XMVectorMultiply(pInputReal[uIndex], vOneOverLength);
XMVECTOR vImaginary = XMVectorMultiply(pInputImaginary[uIndex], vOneOverLength);
XMVECTOR vRR = XMVectorMultiply(vReal, vReal);
XMVECTOR vII = XMVectorMultiply(vImaginary, vImaginary);
XMVECTOR vRRplusII = XMVectorAdd(vRR, vII);
XMVECTOR vTotal = XMVectorSqrt(vRRplusII);
pOutput[uIndex] = XMVectorAdd(vTotal, vTotal);
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// Deinterleaves audio samples
//
// REMARKS:
// For example, audio of the form [LRLRLR] becomes [LLLRRR].
//
// PARAMETERS:
// pOutput - [out] output buffer, receives samples in deinterleaved form, cannot overlap pInput, must have at least (uChannelCount*uFrameCount)/4 elements
// pInput - [in] input buffer, cannot overlap pOutput, must have at least (uChannelCount*uFrameCount)/4 elements
// uChannelCount - [in] number of channels, must be > 1
// uFrameCount - [in] number of frames of valid data, must be > 0
//----------------------------------------------------------------------------------
inline void Deinterleave (
_Out_writes_((uChannelCount * uFrameCount) / 4) XMVECTOR* __restrict pOutput,
_In_reads_((uChannelCount * uFrameCount) / 4) const XMVECTOR* __restrict pInput,
_In_ const size_t uChannelCount,
_In_ const size_t uFrameCount) noexcept
{
assert(pOutput);
assert(pInput);
assert(uChannelCount > 1);
assert(uFrameCount > 0);
float* __restrict pfOutput = reinterpret_cast<float* __restrict>(pOutput);
const float* __restrict pfInput = reinterpret_cast<const float* __restrict>(pInput);
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
for (size_t uFrame = 0; uFrame < uFrameCount; ++uFrame)
{
pfOutput[uChannel * uFrameCount + uFrame] = pfInput[uFrame * uChannelCount + uChannel];
}
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// Interleaves audio samples
//
// REMARKS:
// For example, audio of the form [LLLRRR] becomes [LRLRLR].
//
// PARAMETERS:
// pOutput - [out] output buffer, receives samples in interleaved form, cannot overlap pInput, must have at least (uChannelCount*uFrameCount)/4 elements
// pInput - [in] input buffer, cannot overlap pOutput, must have at least (uChannelCount*uFrameCount)/4 elements
// uChannelCount - [in] number of channels, must be > 1
// uFrameCount - [in] number of frames of valid data, must be > 0
//----------------------------------------------------------------------------------
inline void Interleave(
_Out_writes_((uChannelCount * uFrameCount) / 4) XMVECTOR* __restrict pOutput,
_In_reads_((uChannelCount * uFrameCount) / 4) const XMVECTOR* __restrict pInput,
_In_ const size_t uChannelCount,
_In_ const size_t uFrameCount) noexcept
{
assert(pOutput);
assert(pInput);
assert(uChannelCount > 1);
assert(uFrameCount > 0);
float* __restrict pfOutput = reinterpret_cast<float* __restrict>(pOutput);
const float* __restrict pfInput = reinterpret_cast<const float* __restrict>(pInput);
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
for (size_t uFrame = 0; uFrame < uFrameCount; ++uFrame)
{
pfOutput[uFrame * uChannelCount + uChannel] = pfInput[uChannel * uFrameCount + uFrame];
}
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// This function applies a 2^N-sample FFT and unswizzles the result such
// that the samples are in order of increasing frequency.
// Audio is first deinterleaved if multichannel.
//
// PARAMETERS:
// pReal - [inout] real components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
// pImaginary - [out] imaginary components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
// pUnityTable - [in] unity table, must have at least (1<<uLog2Length) elements, see FFTInitializeUnityTable()
// uChannelCount - [in] number of channels, must be within [1, 6]
// uLog2Length - [in] LOG (base 2) of FFT length in frames, must within [2, 9]
//----------------------------------------------------------------------------------
inline void FFTInterleaved(
_Inout_updates_(((1 << uLog2Length) * uChannelCount) / 4) XMVECTOR* __restrict pReal,
_Out_writes_(((1 << uLog2Length) * uChannelCount) / 4) XMVECTOR* __restrict pImaginary,
_In_reads_(1 << uLog2Length) const XMVECTOR* __restrict pUnityTable,
_In_ const size_t uChannelCount,
_In_ const size_t uLog2Length) noexcept
{
assert(pReal);
assert(pImaginary);
assert(pUnityTable);
assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
assert(reinterpret_cast<uintptr_t>(pUnityTable) % 16 == 0);
assert(uChannelCount > 0 && uChannelCount <= 6);
assert(uLog2Length >= 2 && uLog2Length <= 9);
XM_ALIGNED_DATA(16) XMVECTOR vRealTemp[768];
XM_ALIGNED_DATA(16) XMVECTOR vImaginaryTemp[768];
const size_t uLength = size_t(1) << uLog2Length;
if (uChannelCount > 1)
{
Deinterleave(vRealTemp, pReal, uChannelCount, uLength);
}
else
{
memcpy_s(vRealTemp, sizeof(vRealTemp), pReal, (uLength >> 2) * sizeof(XMVECTOR));
}
memset(vImaginaryTemp, 0, (uChannelCount * (uLength >> 2)) * sizeof(XMVECTOR));
if (uLength > 16)
{
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
FFT(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)], pUnityTable, uLength);
}
}
else if (uLength == 16)
{
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
FFT16(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
}
}
else if (uLength == 8)
{
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
FFT8(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
}
}
else if (uLength == 4)
{
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
FFT4(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
}
}
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
FFTUnswizzle(&pReal[uChannel * (uLength >> 2)], &vRealTemp[uChannel * (uLength >> 2)], uLog2Length);
FFTUnswizzle(&pImaginary[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)], uLog2Length);
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// This function applies a 2^N-sample inverse FFT.
// Audio is interleaved if multichannel.
//
// PARAMETERS:
// pReal - [inout] real components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
// pImaginary - [in] imaginary components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
// pUnityTable - [in] unity table, must have at least (1<<uLog2Length) elements, see FFTInitializeUnityTable()
// uChannelCount - [in] number of channels, must be > 0
// uLog2Length - [in] LOG (base 2) of FFT length in frames, must within [2, 9]
//----------------------------------------------------------------------------------
inline void IFFTDeinterleaved(
_Inout_updates_(((1 << uLog2Length) * uChannelCount) / 4) XMVECTOR* __restrict pReal,
_In_reads_(((1 << uLog2Length) * uChannelCount) / 4) const XMVECTOR* __restrict pImaginary,
_In_reads_(1 << uLog2Length) const XMVECTOR* __restrict pUnityTable,
_In_ const size_t uChannelCount,
_In_ const size_t uLog2Length) noexcept
{
using namespace DirectX;
assert(pReal);
assert(pImaginary);
assert(pUnityTable);
assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
assert(reinterpret_cast<uintptr_t>(pUnityTable) % 16 == 0);
assert(uChannelCount > 0 && uChannelCount <= 6);
_Analysis_assume_(uChannelCount > 0 && uChannelCount <= 6);
assert(uLog2Length >= 2 && uLog2Length <= 9);
_Analysis_assume_(uLog2Length >= 2 && uLog2Length <= 9);
XM_ALIGNED_DATA(16) XMVECTOR vRealTemp[768] = {};
XM_ALIGNED_DATA(16) XMVECTOR vImaginaryTemp[768] = {};
const size_t uLength = size_t(1) << uLog2Length;
const XMVECTOR vRnp = XMVectorReplicate(1.0f / float(uLength));
const XMVECTOR vRnm = XMVectorReplicate(-1.0f / float(uLength));
for (size_t u = 0; u < uChannelCount * (uLength >> 2); u++)
{
vRealTemp[u] = XMVectorMultiply(pReal[u], vRnp);
vImaginaryTemp[u] = XMVectorMultiply(pImaginary[u], vRnm);
}
if (uLength > 16)
{
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
FFT(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)], pUnityTable, uLength);
}
}
else if (uLength == 16)
{
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
FFT16(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
}
}
else if (uLength == 8)
{
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
FFT8(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
}
}
else if (uLength == 4)
{
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
FFT4(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
}
}
for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
{
FFTUnswizzle(&vImaginaryTemp[uChannel * (uLength >> 2)], &vRealTemp[uChannel * (uLength >> 2)], uLog2Length);
}
if (uChannelCount > 1)
{
Interleave(pReal, vImaginaryTemp, uChannelCount, uLength);
}
else
{
memcpy_s(pReal, uLength * uChannelCount * sizeof(float), vImaginaryTemp, (uLength >> 2) * sizeof(XMVECTOR));
}
}
} // namespace XDSP
#ifdef __clang__
#pragma clang diagnostic pop
#endif
#ifdef _MSC_VER
#pragma warning(pop)
#endif

View File

@ -0,0 +1,121 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
# http://go.microsoft.com/fwlink/?LinkID=615560
# Builds the library and test suite using CMake.
schedules:
- cron: "0 0 * * *"
displayName: 'Nightly build'
branches:
include:
- main
trigger: none
pr: none
resources:
repositories:
- repository: self
type: git
ref: refs/heads/main
name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
variables:
VS_GENERATOR: 'Visual Studio 17 2022'
WIN10_SDK: '10.0.19041.0'
WIN11_SDK: '10.0.22000.0'
GITHUB_PAT: $(GITHUBPUBLICTOKEN)
pool:
vmImage: windows-2022
jobs:
- job: CMAKE_BUILD
displayName: CMake using VS Generator BUILD_TESTING=ON
cancelTimeoutInMinutes: 1
steps:
- checkout: self
clean: true
fetchTags: false
- task: CmdLine@2
displayName: Fetch Tests
inputs:
script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
- task: CMake@1
displayName: 'CMake (MSVC): Config x64'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -B out -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
- task: CMake@1
displayName: 'CMake (MSVC): Build x64 Debug'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out -v --config Debug
- task: CMake@1
displayName: 'CMake (MSVC): Build x64 Release'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out -v --config RelWithDebInfo
- task: CMake@1
displayName: 'CMake (MSVC): Config x86'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: '-G "$(VS_GENERATOR)" -A Win32 -B out2 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
- task: CMake@1
displayName: 'CMake (MSVC): Build x86 Debug'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out2 -v --config Debug
- task: CMake@1
displayName: 'CMake (MSVC): Build x86 Release'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out2 -v --config RelWithDebInfo
- task: CMake@1
displayName: 'CMake (MSVC): Config ARM64'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: '-G "$(VS_GENERATOR)" -A ARM64 -B out3 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
- task: CMake@1
displayName: 'CMake (MSVC): Build ARM64 Debug'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out3 -v --config Debug
- task: CMake@1
displayName: 'CMake (MSVC): Build ARM64 Release'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out3 -v --config RelWithDebInfo
- task: CMake@1
displayName: 'CMake (ClangCl): Config x64'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -T clangcl -B out4 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
- task: CMake@1
displayName: 'CMake (ClangCl): Build x64 Debug'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out4 -v --config Debug
- task: CMake@1
displayName: 'CMake (ClangCl): Build x64 Release'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out4 -v --config RelWithDebInfo
- task: CMake@1
displayName: 'CMake (ClangCl): Config ARM64'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: '-G "$(VS_GENERATOR)" -A ARM64 -T clangcl -B out5 -DCMAKE_SYSTEM_VERSION=$(WIN11_SDK)'
- task: CMake@1
displayName: 'CMake (ClangCl): Build ARM64 Debug'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out5 -v --config Debug
- task: CMake@1
displayName: 'CMake (ClangCl): Build ARM64 Release'
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out5 -v --config RelWithDebInfo

View File

@ -0,0 +1,117 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
# http://go.microsoft.com/fwlink/?LinkID=615560
# Builds the library and test suite using CMake.
schedules:
- cron: "0 0 * * *"
displayName: 'Nightly build'
branches:
include:
- main
trigger:
branches:
include:
- main
paths:
include:
- CMakeLists.txt
pr:
branches:
include:
- main
paths:
include:
- CMakeLists.txt
resources:
repositories:
- repository: self
type: git
ref: refs/heads/main
name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
variables:
VS_GENERATOR: 'Visual Studio 16 2019'
WIN10_SDK: '10.0.19041.0'
GITHUB_PAT: $(GITHUBPUBLICTOKEN)
pool:
vmImage: windows-2019
jobs:
- job: CMAKE_BUILD
displayName: CMake using VS Generator
cancelTimeoutInMinutes: 1
steps:
- checkout: self
clean: true
fetchTags: false
- task: CmdLine@2
displayName: Fetch Tests
inputs:
script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
- task: CMake@1
displayName: CMake (MSVC x64)
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -B out -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
- task: CMake@1
displayName: CMake (Build x64)
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out -v
- task: CMake@1
displayName: CMake Test (MSVC x64)
inputs:
cwd: Tests
cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -B out -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
- task: CMake@1
displayName: CMake Test (Build x64)
inputs:
cwd: Tests
cmakeArgs: --build out -v
- task: CMake@1
displayName: CMake (MSVC ARM64)
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: '-G "$(VS_GENERATOR)" -A ARM64 -B out2 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
- task: CMake@1
displayName: CMake (Build ARM64)
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out2 -v
- task: CMake@1
displayName: CMake Test (MSVC ARM64)
inputs:
cwd: Tests
cmakeArgs: '-G "$(VS_GENERATOR)" -A ARM64 -B out2 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
- task: CMake@1
displayName: CMake Test (Build ARM64)
inputs:
cwd: Tests
cmakeArgs: --build out2 -v
- task: CMake@1
displayName: CMake (ClangCl)
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -T clangcl -B out3 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
- task: CMake@1
displayName: CMake (Build)
inputs:
cwd: '$(Build.SourcesDirectory)'
cmakeArgs: --build out3 -v
- task: CMake@1
displayName: CMake Test (ClangCL)
inputs:
cwd: Tests
cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -T clangcl -B out3 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
- task: CMake@1
displayName: CMake Test (Build)
inputs:
cwd: Tests
cmakeArgs: --build out3 -v

View File

@ -0,0 +1,290 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
# http://go.microsoft.com/fwlink/?LinkID=615560
# Builds the math3 test suite for DirectXMath.
schedules:
- cron: "0 0 * * *"
displayName: 'Nightly build'
branches:
include:
- main
trigger: none
pr: none
resources:
repositories:
- repository: self
type: git
ref: refs/heads/main
name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
pool:
vmImage: windows-2022
variables:
GITHUB_PAT: $(GITHUBPUBLICTOKEN)
jobs:
- job: BUILD_DEV17
displayName: 'Visual Studio 2022 (v143)'
cancelTimeoutInMinutes: 1
steps:
- checkout: self
clean: true
fetchTags: false
- task: DeleteFiles@1
displayName: Delete files from Tests
inputs:
SourceFolder: Tests
Contents: '**'
RemoveSourceFolder: true
RemoveDotFiles: true
- task: CmdLine@2
displayName: Fetch Tests
inputs:
script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86dbg
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86rel
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64dbg
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64rel
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln arm64dbg
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: ARM64
configuration: Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln arm64rel
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: ARM64
configuration: Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86dbg sse3
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: SSE3 Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86rel sse3
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: SSE3 Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64dbg sse3
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: SSE3 Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64rel sse3
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: SSE3 Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86dbg sse4
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: SSE4 Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86rel sse4
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: SSE4 Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64dbg sse4
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: SSE4 Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64rel sse4
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: SSE4 Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86dbg avx
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: AVX Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86rel avx
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: AVX Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64dbg avx
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: AVX Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64rel avx
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: AVX Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86dbg avx2
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: AVX2 Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86rel avx2
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: AVX2 Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64dbg avx2
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: AVX2 Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64rel avx2
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: AVX2 Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86dbg nointrinsics
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: NI Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86rel nointrinsics
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: NI Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64dbg nointrinsics
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: NI Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x64rel nointrinsics
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x64
configuration: NI Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln arm64dbg nointrinsics
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: ARM64
configuration: NI Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln arm86rel nointrinsics
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: ARM64
configuration: NI Release
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86dbg x87
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: x87 Debug
msbuildArchitecture: x64
- task: VSBuild@1
displayName: Build solution math3_2022.sln x86rel x87
inputs:
solution: Tests/math3/math3_2022.sln
vsVersion: 17.0
platform: x86
configuration: x87 Release
msbuildArchitecture: x64

View File

@ -0,0 +1,166 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
# http://go.microsoft.com/fwlink/?LinkID=615560
# Builds the library and test suite using the MinGW compiler.
schedules:
- cron: "0 0 * * *"
displayName: 'Nightly build'
branches:
include:
- main
trigger:
branches:
include:
- main
paths:
exclude:
- README.md
- HISTORY.md
- SECURITY.md
- LICENSE
pr:
branches:
include:
- main
paths:
exclude:
- README.md
- HISTORY.md
- SECURITY.md
- LICENSE
drafts: false
resources:
repositories:
- repository: self
type: git
ref: refs/heads/main
name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
pool:
vmImage: windows-2022
variables:
GITHUB_PAT: $(GITHUBPUBLICTOKEN)
URL_MINGW32: https://github.com/brechtsanders/winlibs_mingw/releases/download/12.2.0-14.0.6-10.0.0-ucrt-r2/winlibs-i686-posix-dwarf-gcc-12.2.0-llvm-14.0.6-mingw-w64ucrt-10.0.0-r2.zip
HASH_MINGW32: 'fcd1e11b896190da01c83d5b5fb0d37b7c61585e53446c2dab0009debc3915e757213882c35e35396329338de6f0222ba012e23a5af86932db45186a225d1272'
jobs:
- job: MINGW32_BUILD
displayName: 'Minimalist GNU for Windows (MinGW32)'
steps:
- checkout: self
clean: true
fetchTags: false
- task: CmdLine@2
displayName: Fetch Tests
inputs:
script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
workingDirectory: $(Build.SourcesDirectory)
- task: PowerShell@2
displayName: Install MinGW32
inputs:
targetType: inline
script: |
$ProgressPreference = 'SilentlyContinue'
Write-Host "Downloading winlibs..."
Invoke-WebRequest -Uri "$(URL_MINGW32)" -OutFile "gw32.zip"
Write-Host "Downloaded."
$fileHash = Get-FileHash -Algorithm SHA512 gw32.zip | ForEach { $_.Hash} | Out-String
$filehash = $fileHash.Trim()
Write-Host "##[debug]SHA512: " $fileHash
if ($fileHash -ne '$(HASH_MINGW32)') {
Write-Error -Message "##[error]Computed hash does not match!" -ErrorAction Stop
}
Write-Host "Extracting winlibs..."
Expand-Archive -LiteralPath 'gw32.zip'
Write-Host "Extracted."
Write-Host "Added to path: $env:BUILD_SOURCESDIRECTORY\gw32\mingw32\bin"
Write-Host "##vso[task.prependpath]$env:BUILD_SOURCESDIRECTORY\gw32\mingw32\bin"
workingDirectory: $(Build.SourcesDirectory)
- task: CmdLine@2
displayName: GCC version
inputs:
script: g++ --version
- task: CMake@1
displayName: CMake (MinGW32) Dbg
inputs:
cwd: Tests
cmakeArgs: -B out -DCMAKE_BUILD_TYPE="Debug" -DDXMATH_ARCHITECTURE=x86 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
- task: CMake@1
displayName: CMake (MinGW32) Build Dbg
inputs:
cwd: Tests
cmakeArgs: --build out
- task: CMake@1
displayName: CMake (MinGW32) Rel
inputs:
cwd: Tests
cmakeArgs: -B out2 -DCMAKE_BUILD_TYPE="RelWithDebInfo" -DDXMATH_ARCHITECTURE=x86 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
- task: CMake@1
displayName: CMake (MinGW32) Build Rel
inputs:
cwd: Tests
cmakeArgs: --build out2
- task: CMake@1
displayName: CMake (MinGW32) Dbg NI
inputs:
cwd: Tests
cmakeArgs: -B out3 -DCMAKE_BUILD_TYPE="Debug" -DBUILD_NO_INTRINSICS=ON -DDXMATH_ARCHITECTURE=x86 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
- task: CMake@1
displayName: CMake (MinGW32) Build Dbg NI
inputs:
cwd: Tests
cmakeArgs: --build out3
- job: MINGW64_BUILD
displayName: 'Minimalist GNU for Windows (MinGW-W64) BUILD_TESTING=ON'
steps:
- checkout: self
clean: true
fetchTags: false
- task: CmdLine@2
displayName: Fetch Tests
inputs:
script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
workingDirectory: $(Build.SourcesDirectory)
- task: CmdLine@2
displayName: GCC version
inputs:
script: g++ --version
- task: CMake@1
displayName: CMake (MinGW-W64) Dbg
inputs:
cwd: Tests
cmakeArgs: -B out -DCMAKE_BUILD_TYPE="Debug" -DDXMATH_ARCHITECTURE=x64 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
- task: CMake@1
displayName: CMake (MinGW-W64) Build Dbg
inputs:
cwd: Tests
cmakeArgs: --build out
- task: CMake@1
displayName: CMake (MinGW-W64) Rel
inputs:
cwd: Tests
cmakeArgs: -B out2 -DCMAKE_BUILD_TYPE="RelWithDebInfo" -DDXMATH_ARCHITECTURE=x64 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
- task: CMake@1
displayName: CMake (MinGW-W64) Build Rel
inputs:
cwd: Tests
cmakeArgs: --build out2
- task: CMake@1
displayName: CMake (MinGW-W64) Dbg NI
inputs:
cwd: Tests
cmakeArgs: -B out3 -DCMAKE_BUILD_TYPE="Debug" -DBUILD_NO_INTRINSICS=ON -DDXMATH_ARCHITECTURE=x64 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
- task: CMake@1
displayName: CMake (MinGW-W64) Build Dbg NI
inputs:
cwd: Tests
cmakeArgs: --build out3

View File

@ -0,0 +1,66 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
# http://go.microsoft.com/fwlink/?LinkID=615560
# Builds the math3 test suite for Windows Subsystem for Linux (WSL)
schedules:
- cron: "0 3 * * *"
displayName: 'Nightly build'
branches:
include:
- main
trigger: none
pr: none
resources:
repositories:
- repository: self
type: git
ref: refs/heads/main
name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
pool:
vmImage: ubuntu-22.04
variables:
GITHUB_PAT: $(GITHUBPUBLICTOKEN)
jobs:
- job: BUILD_WSL
displayName: 'Windows Subsystem for Linux (WSL)'
steps:
- checkout: self
clean: true
fetchTags: false
- task: CmdLine@2
displayName: Fetch tests
inputs:
script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
- task: CMake@1
displayName: DirectXMath Tests
inputs:
cwd: Tests
cmakeArgs: .
- task: PowerShell@2
displayName: Fetch SAL.H
inputs:
targetType: inline
script: |
$ProgressPreference = 'SilentlyContinue'
Invoke-WebRequest -Uri https://raw.githubusercontent.com/dotnet/runtime/v8.0.1/src/coreclr/pal/inc/rt/sal.h -OutFile $(Build.SourcesDirectory)/Inc/sal.h
$fileHash = Get-FileHash -Algorithm SHA512 $(Build.SourcesDirectory)/Inc/sal.h | ForEach { $_.Hash} | Out-String
$filehash = $fileHash.Trim()
Write-Host "##[debug]SHA512: " $filehash
if ($fileHash -ne "0f5a80b97564217db2ba3e4624cc9eb308e19cc9911dae21d983c4ab37003f4756473297ba81b386c498514cedc1ef5a3553d7002edc09aeb6a1335df973095f") {
Write-Error -Message "##[error]Computed hash does not match!" -ErrorAction Stop
}
- task: CMake@1
displayName: DirectXMath Tests Build
inputs:
cwd: Tests
cmakeArgs: --build . -v

View File

@ -0,0 +1,85 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
# http://go.microsoft.com/fwlink/?LinkID=615560
# Builds the math3 test suite for Windows Subsystem for Linux (WSL)
schedules:
- cron: "0 3 * * *"
displayName: 'Nightly build'
branches:
include:
- main
trigger:
branches:
include:
- main
paths:
exclude:
- README.md
- HISTORY.md
- SECURITY.md
- LICENSE
pr:
branches:
include:
- main
paths:
exclude:
- README.md
- HISTORY.md
- SECURITY.md
- LICENSE
drafts: false
resources:
repositories:
- repository: self
type: git
ref: refs/heads/main
name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
pool:
vmImage: ubuntu-20.04
variables:
GITHUB_PAT: $(GITHUBPUBLICTOKEN)
jobs:
- job: BUILD_WSL
displayName: 'Windows Subsystem for Linux (WSL)'
steps:
- checkout: self
clean: true
fetchTags: false
- task: CmdLine@2
displayName: Fetch tests
inputs:
script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
- task: CMake@1
displayName: DirectXMath Tests
inputs:
cwd: Tests
cmakeArgs: .
- task: PowerShell@2
displayName: Fetch SAL.H
inputs:
targetType: inline
script: |
$ProgressPreference = 'SilentlyContinue'
Invoke-WebRequest -Uri https://raw.githubusercontent.com/dotnet/runtime/v8.0.1/src/coreclr/pal/inc/rt/sal.h -OutFile $(Build.SourcesDirectory)/Inc/sal.h
$fileHash = Get-FileHash -Algorithm SHA512 $(Build.SourcesDirectory)/Inc/sal.h | ForEach { $_.Hash} | Out-String
$filehash = $fileHash.Trim()
Write-Host "##[debug]SHA512: " $filehash
if ($fileHash -ne "0f5a80b97564217db2ba3e4624cc9eb308e19cc9911dae21d983c4ab37003f4756473297ba81b386c498514cedc1ef5a3553d7002edc09aeb6a1335df973095f") {
Write-Error -Message "##[error]Computed hash does not match!" -ErrorAction Stop
}
- task: CMake@1
displayName: DirectXMath Tests Build
inputs:
cwd: Tests
cmakeArgs: --build . -v

View File

@ -0,0 +1,557 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
# http://go.microsoft.com/fwlink/?LinkID=615560
# Builds the math3 test suite for DirectXMath.
schedules:
- cron: "0 0 * * *"
displayName: 'Nightly build'
branches:
include:
- main
trigger:
branches:
include:
- main
paths:
exclude:
- README.md
- HISTORY.md
- SECURITY.md
- LICENSE
pr:
branches:
include:
- main
paths:
exclude:
- README.md
- HISTORY.md
- SECURITY.md
- LICENSE
drafts: false
resources:
repositories:
- repository: self
type: git
ref: refs/heads/main
name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
variables:
GITHUB_PAT: $(GITHUBPUBLICTOKEN)
Codeql.Enabled: true
pool:
vmImage: windows-2019
jobs:
- job: BUILD_DEV16
displayName: 'Visual Studio 2019 (v142)'
cancelTimeoutInMinutes: 1
steps:
- checkout: self
clean: true
fetchTags: false
- task: DeleteFiles@1
displayName: Delete files from Tests
inputs:
SourceFolder: Tests
Contents: '**'
RemoveSourceFolder: true
RemoveDotFiles: true
- task: CmdLine@2
displayName: Fetch Tests
inputs:
script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86dbg
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86rel
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64dbg
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64rel
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln arm64dbg
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: ARM64
configuration: Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln arm64rel
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: ARM64
configuration: Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86dbg sse3
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: SSE3 Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86rel sse3
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: SSE3 Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64dbg sse3
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: SSE3 Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64rel sse3
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: SSE3 Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86dbg sse4
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: SSE4 Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86rel sse4
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: SSE4 Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64dbg sse4
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: SSE4 Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64rel sse4
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: SSE4 Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86dbg avx
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: AVX Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86rel avx
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: AVX Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64dbg avx
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: AVX Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64rel avx
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: AVX Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86dbg avx2
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: AVX2 Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86rel avx2
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: AVX2 Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64dbg avx2
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: AVX2 Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64rel avx2
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: AVX2 Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86dbg nointrinsics
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: NI Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86rel nointrinsics
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: NI Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64dbg nointrinsics
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: NI Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x64rel nointrinsics
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x64
configuration: NI Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln arm64dbg nointrinsics
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: ARM64
configuration: NI Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln arm86rel nointrinsics
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: ARM64
configuration: NI Release
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86dbg x87
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: x87 Debug
- task: VSBuild@1
displayName: Build solution math3_2019.sln x86rel x87
inputs:
solution: Tests/math3/math3_2019.sln
vsVersion: 16.0
platform: x86
configuration: x87 Release
- task: VSBuild@1
displayName: Build solution shmath_2019.sln x64dbg
inputs:
solution: Tests/shmath/shmath_2019.sln
vsVersion: 16.0
platform: x64
configuration: Debug
- task: VSBuild@1
displayName: Build solution shmath_2019.sln x64rel
inputs:
solution: Tests/shmath/shmath_2019.sln
vsVersion: 16.0
platform: x64
configuration: Release
- task: VSBuild@1
displayName: Build solution shmath_2019.sln arm64dbg
inputs:
solution: Tests/shmath/shmath_2019.sln
vsVersion: 16.0
platform: ARM64
configuration: Debug
- task: VSBuild@1
displayName: Build solution shmath_2019.sln arm64rel
inputs:
solution: Tests/shmath/shmath_2019.sln
vsVersion: 16.0
platform: ARM64
configuration: Release
- task: VSBuild@1
displayName: Build solution XDSPTest_2019 x64dbg
inputs:
solution: Tests/xdsp/XDSPTest_2019.sln
vsVersion: 16.0
platform: x64
configuration: Debug
- task: VSBuild@1
displayName: Build solution XDSPTest_2019 x64rel
inputs:
solution: Tests/xdsp/XDSPTest_2019.sln
vsVersion: 16.0
platform: x64
configuration: Release
- task: VSBuild@1
displayName: Build solution XDSPTest_2019 arm64dbg
inputs:
solution: Tests/xdsp/XDSPTest_2019.sln
vsVersion: 16.0
platform: ARM64
configuration: Debug
- task: VSBuild@1
displayName: Build solution XDSPTest_2019 arm64rel
inputs:
solution: Tests/xdsp/XDSPTest_2019.sln
vsVersion: 16.0
platform: ARM64
configuration: Release
- job: BUILD_DEV15
displayName: 'Visual Studio 2019 (v141)'
steps:
- checkout: self
clean: true
fetchTags: false
- task: CmdLine@2
displayName: Fetch Tests
inputs:
script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86dbg
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86rel
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64dbg
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64rel
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86dbg sse3
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: SSE3 Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86rel sse3
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: SSE3 Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64dbg sse3
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: SSE3 Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64rel sse3
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: SSE3 Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86dbg sse4
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: SSE4 Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86rel sse4
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: SSE4 Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64dbg sse4
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: SSE4 Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64rel sse4
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: SSE4 Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86dbg avx
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: AVX Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86rel avx
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: AVX Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64dbg avx
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: AVX Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64rel avx
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: AVX Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86dbg avx2
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: AVX2 Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86rel avx2
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: AVX2 Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64dbg avx2
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: AVX2 Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64rel avx2
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: AVX2 Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86dbg nointrinsics
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: NI Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86rel nointrinsics
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: NI Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64dbg nointrinsics
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: NI Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x64rel nointrinsics
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x64
configuration: NI Release
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86dbg x87
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: x87 Debug
- task: VSBuild@1
displayName: Build solution math3_2017.sln x86rel x87
inputs:
solution: Tests/math3/math3_2017.sln
vsVersion: 16.0
platform: x86
configuration: x87 Release
- task: VSBuild@1
displayName: Build solution shmath_2017.sln x64dbg
inputs:
solution: Tests/shmath/shmath_2017.sln
vsVersion: 16.0
platform: x64
configuration: Debug
- task: VSBuild@1
displayName: Build solution shmath_2017.sln x64rel
inputs:
solution: Tests/shmath/shmath_2017.sln
vsVersion: 16.0
platform: x64
configuration: Release
- task: VSBuild@1
displayName: Build solution XDSPTest_2017 x64dbg
inputs:
solution: Tests/xdsp/XDSPTest_2017.sln
vsVersion: 16.0
platform: x64
configuration: Debug
- task: VSBuild@1
displayName: Build solution XDSPTest_2017 x64rel
inputs:
solution: Tests/xdsp/XDSPTest_2017.sln
vsVersion: 16.0
platform: x64
configuration: Release

View File

@ -0,0 +1,86 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
# http://go.microsoft.com/fwlink/?LinkID=615560
# Runs various SDL recommended tools on the code.
schedules:
- cron: "0 3 * * 0,3,5"
displayName: 'Three times a week'
branches:
include:
- main
trigger: none
pr: none
resources:
repositories:
- repository: self
type: git
ref: refs/heads/main
name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
variables:
VS_GENERATOR: 'Visual Studio 17 2022'
GITHUB_PAT: $(GITHUBPUBLICTOKEN)
pool:
vmImage: windows-2022
jobs:
- job: SDL_BUILD
displayName: 'Build using required SDL tools'
workspace:
clean: all
steps:
- checkout: self
clean: true
fetchTags: false
- task: NodeTool@0
displayName: 'NPM install'
inputs:
versionSpec: 14.x
- task: securedevelopmentteam.vss-secure-development-tools.build-task-credscan.CredScan@3
displayName: 'Run Credential Scanner'
inputs:
debugMode: false
folderSuppression: false
- task: PoliCheck@2
displayName: 'Run PoliCheck'
inputs:
result: PoliCheck.xml
- task: CmdLine@2
displayName: Fetch Tests
inputs:
script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
- task: Armory@2
displayName: Run ARMory
- task: CMake@1
displayName: 'CMake (MSVC): Config x64'
inputs:
cwd: '$(Build.SourcesDirectory)/Tests/headertest'
cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -B out'
- task: CodeQL3000Init@0
inputs:
Enabled: true
- task: VSBuild@1
displayName: 'Build C++ with CodeQL'
inputs:
solution: '$(Build.SourcesDirectory)/Tests/headertest/out/headertest.sln'
vsVersion: 17.0
platform: x64
configuration: Release
msbuildArchitecture: x64
- task: CodeQL3000Finalize@0
condition: always()
- task: securedevelopmentteam.vss-secure-development-tools.build-task-postanalysis.PostAnalysis@2
displayName: 'Post Analysis'
inputs:
GdnBreakAllTools: true
GdnBreakPolicy: 'Microsoft'
GdnBreakPolicyMinSev: 'Error'
- task: ComponentGovernanceComponentDetection@0
displayName: Component Detection

View File

@ -0,0 +1,5 @@
@PACKAGE_INIT@
include(${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake)
check_required_components("@PROJECT_NAME@")

View File

@ -0,0 +1,10 @@
prefix=@CMAKE_INSTALL_PREFIX@
libdir=@DIRECTXMATH_LIBDIR_FOR_PKG_CONFIG@
includedir=@DIRECTXMATH_INCLUDEDIR_FOR_PKG_CONFIG@
Name: @PROJECT_NAME@
Description: @PROJECT_DESCRIPTION@
URL: @PROJECT_HOMEPAGE_URL@
Version: @PROJECT_VERSION@
Cflags: -I${includedir}
Libs:

View File

@ -0,0 +1,23 @@
# This module provides function for joining paths
# known from most languages
#
# SPDX-License-Identifier: (MIT OR CC0-1.0)
# Copyright 2020 Jan Tojnar
# https://github.com/jtojnar/cmake-snips
#
# Modelled after Pythons os.path.join
# https://docs.python.org/3.7/library/os.path.html#os.path.join
# Windows not supported
function(join_paths joined_path first_path_segment)
set(temp_path "${first_path_segment}")
foreach(current_segment IN LISTS ARGN)
if(NOT ("${current_segment}" STREQUAL ""))
if(IS_ABSOLUTE "${current_segment}")
set(temp_path "${current_segment}")
else()
set(temp_path "${temp_path}/${current_segment}")
endif()
endif()
endforeach()
set(${joined_path} "${temp_path}" PARENT_SCOPE)
endfunction()