feat(gx): add directxmath for MinGW

2025-07-16 13:49:05 +03:00 · 2024-09-07 13:54:54 -04:00 · 2024-09-07 13:54:54 -04:00 · 3e77eb935a
commit 3e77eb935a
parent 0d09dee4b3
51 changed files with 49251 additions and 12 deletions
--- a/src/gx/CMakeLists.txt
+++ b/src/gx/CMakeLists.txt
@ -6,24 +6,24 @@ file(GLOB GX_SOURCES
    "texture/*.cpp"
 )

-if(WHOA_SYSTEM_WIN)
+if (WHOA_SYSTEM_WIN)
    file(GLOB D3D_SOURCES "d3d/*.cpp")
    list(APPEND GX_SOURCES ${D3D_SOURCES})
-endif()
+endif ()

-if(WHOA_SYSTEM_MAC)
+if (WHOA_SYSTEM_MAC)
    file(GLOB GLL_SOURCES "gll/*.cpp" "gll/*.mm")
    set_source_files_properties(${GLL_SOURCES}
        PROPERTIES COMPILE_FLAGS "-x objective-c++"
    )
    list(APPEND GX_SOURCES ${GLL_SOURCES})
-endif()
+endif ()

 # Build OpenGL/SDL graphics device if enabled
-if(WHOA_BUILD_GLSDL)
+if (WHOA_BUILD_GLSDL)
    file(GLOB GLSDL_SOURCES "glsdl/*.cpp")
    list(APPEND GX_SOURCES ${GLSDL_SOURCES})
-endif()
+endif ()

 add_library(gx STATIC ${GX_SOURCES})

@ -46,12 +46,20 @@ target_link_libraries(gx
        tempest
 )

-if(WHOA_SYSTEM_WIN)
+if (WHOA_SYSTEM_WIN)
    target_link_libraries(gx
        PRIVATE
            d3d9.lib
    )
-endif()
+
+    # MSVC includes DirectXMath by default
+    if (NOT MSVC)
+        target_link_libraries(gx
+            PRIVATE
+                DirectXMath
+        )
+    endif ()
+endif ()

 # Link SDL2 and GLEW for GLSDL
 if (WHOA_BUILD_GLSDL)
@ -60,12 +68,12 @@ if (WHOA_BUILD_GLSDL)
            SDL2::SDL2-static
            libglew_static
        )
-endif()
+endif ()

-if(WHOA_SYSTEM_MAC)
+if (WHOA_SYSTEM_MAC)
    target_link_libraries(gx
        PRIVATE
            "-framework AppKit"
            "-framework OpenGL"
    )
-endif()
+endif ()
--- a/src/gx/d3d/CGxDeviceD3d.cpp
+++ b/src/gx/d3d/CGxDeviceD3d.cpp
@ -4,7 +4,7 @@
 #include "gx/texture/CGxTex.hpp"
 #include "math/Utils.hpp"
 #include <algorithm>
-#include <directxmath.h>
+#include <DirectXMath.h>

 int32_t CGxDeviceD3d::s_clientAdjustWidth;
 int32_t CGxDeviceD3d::s_clientAdjustHeight;
--- a/vendor/directxmath-3.19.0/.gitattributes
+++ b/vendor/directxmath-3.19.0/.gitattributes
@ -0,0 +1,8 @@
+# Auto detect text files and perform LF normalization
+* text=auto
+
+# Explicitly declare code/VS files as CRLF
+*.cpp eol=crlf
+*.cmd eol=crlf
+*.h eol=crlf
+*.inl eol=crlf
--- a/vendor/directxmath-3.19.0/.gitignore
+++ b/vendor/directxmath-3.19.0/.gitignore
@ -0,0 +1,24 @@
+*.psess
+*.vsp
+*.log
+*.err
+*.wrn
+*.suo
+*.sdf
+*.user
+*.i
+*.vspscc
+*.opensdf
+*.opendb
+*.ipch
+*.cache
+*.tlog
+*.lastbuildstate
+*.ilk
+*.VC.db
+*.nupkg
+.vs
+/Tests
+/wiki
+/out
+/CMakeUserPresets.json
--- a/vendor/directxmath-3.19.0/.nuget/directxmath.nuspec
+++ b/vendor/directxmath-3.19.0/.nuget/directxmath.nuspec
@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
+    <metadata minClientVersion="2.8.6">
+        <id>directxmath</id>
+        <version>0.0.0-SpecifyVersionOnCommandline</version>
+        <title>DirectXMath</title>
+        <authors>Microsoft</authors>
+        <owners>microsoft,directxtk</owners>
+        <summary>DirectXMath is an all inline SIMD C++ linear algebra library for use in games and graphics apps.</summary>
+        <description>The DirectXMath API provides SIMD-friendly C++ types and functions for common linear algebra and graphics math operations common to DirectX applications. The library provides optimized versions for Windows 32-bit (x86), Windows 64-bit (x64), and Windows on ARM through SSE2 and ARM-NEON intrinsics support in the Visual Studio compiler.</description>
+        <releaseNotes>Matches the February 2024 release.</releaseNotes>
+        <projectUrl>http://go.microsoft.com/fwlink/?LinkID=615560</projectUrl>
+        <repository type="git" url="https://github.com/microsoft/DirectXMath.git" />
+        <icon>images\icon.jpg</icon>
+        <readme>docs\README.md</readme>
+        <license type="expression">MIT</license>
+        <requireLicenseAcceptance>false</requireLicenseAcceptance>
+        <copyright>&#169; Microsoft Corporation. All rights reserved.</copyright>
+        <tags>C++  native  DirectX  math nativepackage</tags>
+    </metadata>
+
+    <files>
+
+        <file target="docs" src="*.md" />
+
+        <file target="include" src="Inc\*" />
+
+        <file src=".nuget/directxmath.targets" target="build\native" />
+
+        <file src=".nuget/icon.jpg" target="images\" />
+
+    </files>
+</package>
--- a/vendor/directxmath-3.19.0/.nuget/directxmath.targets
+++ b/vendor/directxmath-3.19.0/.nuget/directxmath.targets
@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <PreprocessorDefinitions>HAS_DIRECTXMATH;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(MSBuildThisFileDirectory)..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemDefinitionGroup>
+
+</Project>
--- a/vendor/directxmath-3.19.0/.nuget/icon.jpg
+++ b/vendor/directxmath-3.19.0/.nuget/icon.jpg
--- a/vendor/directxmath-3.19.0/.nuget/signconfig.xml
+++ b/vendor/directxmath-3.19.0/.nuget/signconfig.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<SignConfigXML>
+  <job dest="__OUTPATHROOT__" certSubject="NuGet" jobname="NugetSigningTest">
+    <file src="__INPATHROOT__\directxmath*.nupkg" signType="CP-401405" dest="__OUTPATHROOT__\directxmath*.nupkg" />
+  </job>
+</SignConfigXML>
--- a/vendor/directxmath-3.19.0/CMakeLists.txt
+++ b/vendor/directxmath-3.19.0/CMakeLists.txt
@ -0,0 +1,113 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+cmake_minimum_required (VERSION 3.20)
+
+set(DIRECTXMATH_VERSION 3.1.9)
+
+project(DirectXMath
+  VERSION ${DIRECTXMATH_VERSION}
+  DESCRIPTION "DirectXMath SIMD C++ math library"
+  HOMEPAGE_URL "https://go.microsoft.com/fwlink/?LinkID=615560"
+  LANGUAGES CXX)
+
+include(GNUInstallDirs)
+
+#--- Library
+set(LIBRARY_HEADERS
+    Inc/DirectXCollision.h
+    Inc/DirectXCollision.inl
+    Inc/DirectXColors.h
+    Inc/DirectXMath.h
+    Inc/DirectXMathConvert.inl
+    Inc/DirectXMathMatrix.inl
+    Inc/DirectXMathMisc.inl
+    Inc/DirectXMathVector.inl
+    Inc/DirectXPackedVector.h
+    Inc/DirectXPackedVector.inl)
+
+add_library(${PROJECT_NAME} INTERFACE)
+
+target_include_directories(${PROJECT_NAME} INTERFACE
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/Inc>
+  $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/directxmath>)
+
+target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_11)
+
+#--- Package
+include(CMakePackageConfigHelpers)
+
+string(TOLOWER ${PROJECT_NAME} PACKAGE_NAME)
+
+write_basic_package_version_file(
+  ${PACKAGE_NAME}-config-version.cmake
+  VERSION ${DIRECTXMATH_VERSION}
+  COMPATIBILITY AnyNewerVersion
+  ARCH_INDEPENDENT)
+
+install(TARGETS ${PROJECT_NAME}
+  EXPORT ${PROJECT_NAME}-targets
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/build/${PROJECT_NAME}-config.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}-config.cmake
+  INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PACKAGE_NAME})
+
+install(EXPORT ${PROJECT_NAME}-targets
+  FILE ${PROJECT_NAME}-targets.cmake
+  NAMESPACE Microsoft::
+  DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PACKAGE_NAME})
+
+install(FILES ${LIBRARY_HEADERS}
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/directxmath)
+
+install(FILES
+    ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}-config.cmake
+    ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}-config-version.cmake
+  DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PACKAGE_NAME})
+
+# Create pkg-config file
+include(build/JoinPaths.cmake)
+# from: https://github.com/jtojnar/cmake-snips#concatenating-paths-when-building-pkg-config-files
+join_paths(DIRECTXMATH_INCLUDEDIR_FOR_PKG_CONFIG "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}")
+join_paths(DIRECTXMATH_LIBDIR_FOR_PKG_CONFIG "\${prefix}"     "${CMAKE_INSTALL_LIBDIR}")
+
+configure_file(
+    "${CMAKE_CURRENT_SOURCE_DIR}/build/DirectXMath.pc.in"
+    "${CMAKE_CURRENT_BINARY_DIR}/DirectXMath.pc" @ONLY)
+
+# Install the pkg-config file
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DirectXMath.pc"
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
+
+#--- Test suite
+if(DEFINED VCPKG_TARGET_ARCHITECTURE)
+    set(DXMATH_ARCHITECTURE ${VCPKG_TARGET_ARCHITECTURE})
+elseif(CMAKE_GENERATOR_PLATFORM MATCHES "^[Ww][Ii][Nn]32$")
+    set(DXMATH_ARCHITECTURE x86)
+elseif(CMAKE_GENERATOR_PLATFORM MATCHES "^[Xx]64$")
+    set(DXMATH_ARCHITECTURE x64)
+elseif(CMAKE_GENERATOR_PLATFORM MATCHES "^[Aa][Rr][Mm]$")
+    set(DXMATH_ARCHITECTURE arm)
+elseif(CMAKE_GENERATOR_PLATFORM MATCHES "^[Aa][Rr][Mm]64$")
+    set(DXMATH_ARCHITECTURE arm64)
+elseif(CMAKE_VS_PLATFORM_NAME_DEFAULT MATCHES "^[Ww][Ii][Nn]32$")
+    set(DXMATH_ARCHITECTURE x86)
+elseif(CMAKE_VS_PLATFORM_NAME_DEFAULT MATCHES "^[Xx]64$")
+    set(DXMATH_ARCHITECTURE x64)
+elseif(CMAKE_VS_PLATFORM_NAME_DEFAULT MATCHES "^[Aa][Rr][Mm]$")
+    set(DXMATH_ARCHITECTURE arm)
+elseif(CMAKE_VS_PLATFORM_NAME_DEFAULT MATCHES "^[Aa][Rr][Mm]64$")
+    set(DXMATH_ARCHITECTURE arm64)
+elseif(NOT (DEFINED DXMATH_ARCHITECTURE))
+    set(DXMATH_ARCHITECTURE "x64")
+endif()
+
+#--- Test suite
+include(CTest)
+if(BUILD_TESTING AND WIN32 AND (NOT WINDOWS_STORE) AND (EXISTS "${CMAKE_CURRENT_LIST_DIR}/Tests/CMakeLists.txt"))
+  enable_testing()
+  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/Tests)
+endif()
--- a/vendor/directxmath-3.19.0/CMakePresets.json
+++ b/vendor/directxmath-3.19.0/CMakePresets.json
@ -0,0 +1,175 @@
+{
+  "version": 2,
+  "configurePresets": [
+    {
+      "name": "base",
+      "displayName": "Basic Config",
+      "description": "Basic build using Ninja generator",
+      "generator": "Ninja",
+      "hidden": true,
+      "binaryDir": "${sourceDir}/out/build/${presetName}",
+      "cacheVariables": { "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}" }
+    },
+
+    {
+      "name": "x64",
+      "architecture": {
+        "value": "x64",
+        "strategy": "external"
+      },
+      "cacheVariables": { "DXMATH_ARCHITECTURE": "x64" },
+      "hidden": true
+    },
+    {
+      "name": "x86",
+      "architecture": {
+        "value": "x86",
+        "strategy": "external"
+      },
+      "cacheVariables": { "DXMATH_ARCHITECTURE": "x86" },
+      "hidden": true
+    },
+    {
+      "name": "ARM",
+      "architecture": {
+        "value": "arm",
+        "strategy": "external"
+      },
+      "cacheVariables": { "DXMATH_ARCHITECTURE": "arm" },
+      "hidden": true
+    },
+    {
+      "name": "ARM64",
+      "architecture": {
+        "value": "arm64",
+        "strategy": "external"
+      },
+      "cacheVariables": { "DXMATH_ARCHITECTURE": "arm64" },
+      "hidden": true
+    },
+
+    {
+      "name": "Debug",
+      "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" },
+      "hidden": true
+    },
+    {
+      "name": "Release",
+      "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" },
+      "hidden": true
+    },
+
+    {
+      "name": "OneCore",
+      "cacheVariables": { "BUILD_FOR_ONECORE": true },
+      "hidden": true
+    },
+    {
+      "name": "AVX",
+      "cacheVariables": { "BUILD_AVX_TEST": true },
+      "hidden": true
+    },
+    {
+      "name": "AVX2",
+      "cacheVariables": { "BUILD_AVX2_TEST": true },
+      "hidden": true
+    },
+    {
+      "name": "F16C",
+      "cacheVariables": { "BUILD_F16C_TEST": true },
+      "hidden": true
+    },
+    {
+      "name": "NI",
+      "cacheVariables": { "BUILD_NO_INTRINSICS": true },
+      "hidden": true
+    },
+
+    {
+      "name": "MSVC",
+      "hidden": true,
+      "cacheVariables": {
+        "CMAKE_CXX_COMPILER": "cl.exe"
+      },
+      "toolset": {
+        "value": "host=x64",
+        "strategy": "external"
+      }
+    },
+    {
+      "name": "Clang",
+      "hidden": true,
+      "cacheVariables": {
+        "CMAKE_CXX_COMPILER": "clang-cl.exe"
+      },
+      "toolset": {
+        "value": "host=x64",
+        "strategy": "external"
+      }
+    },
+    {
+      "name": "GNUC",
+      "hidden": true,
+      "cacheVariables": {
+        "CMAKE_CXX_COMPILER": "g++.exe"
+      },
+      "toolset": {
+        "value": "host=x64",
+        "strategy": "external"
+      }
+    },
+    {
+      "name": "Intel",
+      "hidden": true,
+      "cacheVariables": {
+        "CMAKE_CXX_COMPILER": "icl.exe"
+      },
+      "toolset": {
+        "value": "host=x64",
+        "strategy": "external"
+      }
+    },
+    {
+      "name": "IntelLLVM",
+      "hidden": true,
+      "cacheVariables": {
+        "CMAKE_CXX_COMPILER": "icx.exe"
+      },
+      "toolset": {
+        "value": "host=x64",
+        "strategy": "external"
+      }
+    },
+
+    { "name": "x64-Debug"    , "description": "MSVC for x64 (Debug) - SSE/SSE2", "inherits": [ "base", "x64", "Debug", "MSVC" ] },
+    { "name": "x64-Release"  , "description": "MSVC for x64 (Release) - SSE/SSE2", "inherits": [ "base", "x64", "Release", "MSVC" ] },
+    { "name": "x86-Debug"    , "description": "MSVC for x86 (Debug) - SSE/SSE2", "inherits": [ "base", "x86", "Debug", "MSVC" ] },
+    { "name": "x86-Release"  , "description": "MSVC for x86 (Release) - SSE/SSE2", "inherits": [ "base", "x86", "Release", "MSVC" ] },
+    { "name": "arm-Debug"    , "description": "MSVC for ARM (Debug) - ARM-NEON", "inherits": [ "base", "ARM", "Debug", "MSVC" ] },
+    { "name": "arm-Release"  , "description": "MSVC for ARM (Release) - ARM-NEON", "inherits": [ "base", "ARM", "Release", "MSVC" ] },
+    { "name": "arm64-Debug"  , "description": "MSVC for ARM64 (Debug) - ARM-NEON", "inherits": [ "base", "ARM64", "Debug", "MSVC" ] },
+    { "name": "arm64-Release", "description": "MSVC for ARM64 (Release) - ARM-NEON", "inherits": [ "base", "ARM64", "Release", "MSVC" ] },
+
+    { "name": "x64-Debug-Clang"    , "description": "Clang/LLVM for x64 (Debug) - SSE/SSE2", "inherits": [ "base", "x64", "Debug", "Clang" ] },
+    { "name": "x64-Release-Clang"  , "description": "Clang/LLVM for x64 (Release) - SSE/SSE2", "inherits": [ "base", "x64", "Release", "Clang" ] },
+    { "name": "x86-Debug-Clang"    , "description": "Clang/LLVM for x86 (Debug) - SSE/SSE2", "inherits": [ "base", "x86", "Debug", "Clang" ], "environment": { "CXXFLAGS": "-m32" } },
+    { "name": "x86-Release-Clang"  , "description": "Clang/LLVM for x86 (Release) - SSE/SSE2", "inherits": [ "base", "x86", "Release", "Clang" ], "environment": { "CXXFLAGS": "-m32" } },
+    { "name": "arm64-Debug-Clang"  , "description": "Clang/LLVM for AArch64 (Debug) - ARM-NEON", "inherits": [ "base", "ARM64", "Debug", "Clang" ], "environment": { "CXXFLAGS": "--target=arm64-pc-windows-msvc" } },
+    { "name": "arm64-Release-Clang", "description": "Clang/LLVM for AArch64 (Release) - ARM-NEON", "inherits": [ "base", "ARM64", "Release", "Clang" ], "environment": { "CXXFLAGS": "--target=arm64-pc-windows-msvc" } }
+  ],
+  "testPresets": [
+    { "name": "x64-Debug"    , "configurePreset": "x64-Debug" },
+    { "name": "x64-Release"  , "configurePreset": "x64-Release" },
+    { "name": "x86-Debug"    , "configurePreset": "x86-Debug" },
+    { "name": "x86-Release"  , "configurePreset": "x86-Release" },
+    { "name": "arm64-Debug"  , "configurePreset": "arm64-Debug" },
+    { "name": "arm64-Release", "configurePreset": "arm64-Release" },
+
+    { "name": "x64-Debug-Clang"    , "configurePreset": "x64-Debug-Clang" },
+    { "name": "x64-Release-Clang"  , "configurePreset": "x64-Release-Clang" },
+    { "name": "x86-Debug-Clang"    , "configurePreset": "x86-Debug-Clang" },
+    { "name": "x86-Release-Clang"  , "configurePreset": "x86-Release-Clang" },
+    { "name": "arm64-Debug-Clang"  , "configurePreset": "arm64-Debug-Clang" },
+    { "name": "arm64-Release-Clang", "configurePreset": "arm64-Release-Clang" }
+  ]
+}
--- a/vendor/directxmath-3.19.0/Extensions/DirectXMathAVX.h
+++ b/vendor/directxmath-3.19.0/Extensions/DirectXMathAVX.h
@ -0,0 +1,275 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathAVX.h -- AVX (version 1) extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error AVX not supported on ARM platform
+#endif
+
+#include <DirectXMath.h>
+
+namespace DirectX
+{
+
+namespace AVX
+{
+
+inline bool XMVerifyAVXSupport()
+{
+    // Should return true for AMD Bulldozer, Intel "Sandy Bridge", and Intel "Ivy Bridge" or later processors
+    // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
+
+    // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+    int CPUInfo[4] = {-1};
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid( CPUInfo, 0 );
+#endif
+
+    if ( CPUInfo[0] < 1  )
+        return false;
+
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 1 );
+#endif
+
+    // We check for AVX, OSXSAVE, SSSE4.1, and SSE3
+    return ( (CPUInfo[2] & 0x18080001) == 0x18080001 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVectorReplicatePtr( _In_  const float *pValue )
+{
+    return _mm_broadcast_ss( pValue );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatX( FXMVECTOR V )
+{
+    return _mm_permute_ps( V, _MM_SHUFFLE(0, 0, 0, 0) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatY( FXMVECTOR V )
+{
+    return _mm_permute_ps( V, _MM_SHUFFLE(1, 1, 1, 1) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatZ( FXMVECTOR V )
+{
+    return _mm_permute_ps( V, _MM_SHUFFLE(2, 2, 2, 2) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatW( FXMVECTOR V )
+{
+    return _mm_permute_ps( V, _MM_SHUFFLE(3, 3, 3, 3) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSwizzle( FXMVECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3 )
+{
+    assert( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
+    _Analysis_assume_( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
+
+    unsigned int elem[4] = { E0, E1, E2, E3 };
+    __m128i vControl = _mm_loadu_si128( reinterpret_cast<const __m128i *>(&elem[0]) );
+    return _mm_permutevar_ps( V, vControl );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorPermute( FXMVECTOR V1, FXMVECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW )
+{
+    assert( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
+    _Analysis_assume_( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
+
+    static const XMVECTORU32 three = { { { 3, 3, 3, 3 } } };
+
+    XM_ALIGNED_DATA(16) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW };
+    __m128i vControl = _mm_load_si128( reinterpret_cast<const __m128i *>(&elem[0]) );
+    
+    __m128i vSelect = _mm_cmpgt_epi32( vControl, three );
+    vControl = _mm_castps_si128( _mm_and_ps( _mm_castsi128_ps( vControl ), three ) );
+
+    __m128 shuffled1 = _mm_permutevar_ps( V1, vControl );
+    __m128 shuffled2 = _mm_permutevar_ps( V2, vControl );
+
+    __m128 masked1 = _mm_andnot_ps( _mm_castsi128_ps( vSelect ), shuffled1 );
+    __m128 masked2 = _mm_and_ps( _mm_castsi128_ps( vSelect ), shuffled2 );
+
+    return _mm_or_ps( masked1, masked2 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements)
+{
+    assert( Elements < 4 );
+    _Analysis_assume_( Elements < 4 );
+    return AVX::XMVectorPermute(V1, V2, Elements, ((Elements) + 1), ((Elements) + 2), ((Elements) + 3));
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements)
+{
+    assert( Elements < 4 );
+    _Analysis_assume_( Elements < 4 );
+    return AVX::XMVectorSwizzle( V, Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, uint32_t Elements)
+{
+    assert( Elements < 4 );
+    _Analysis_assume_( Elements < 4 );
+    return AVX::XMVectorSwizzle( V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, (6 - (Elements)) & 3, (7 - (Elements)) & 3 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Permute Templates
+//-------------------------------------------------------------------------------------
+
+namespace Internal
+{
+    // Slow path fallback for permutes that do not map to a single SSE opcode.
+    template<uint32_t Shuffle, bool WhichX, bool WhichY, bool WhichZ, bool WhichW> struct PermuteHelper
+    {
+        static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2)
+        {
+            static const XMVECTORU32 selectMask =
+            { { {
+                WhichX ? 0xFFFFFFFF : 0,
+				WhichY ? 0xFFFFFFFF : 0,
+				WhichZ ? 0xFFFFFFFF : 0,
+				WhichW ? 0xFFFFFFFF : 0,
+            } } };
+
+            XMVECTOR shuffled1 = _mm_permute_ps(v1, Shuffle);
+            XMVECTOR shuffled2 = _mm_permute_ps(v2, Shuffle);
+
+            XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
+            XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
+
+            return _mm_or_ps(masked1, masked2);
+        }
+    };
+
+    // Fast path for permutes that only read from the first vector.
+    template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, false, false>
+    {
+        static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return _mm_permute_ps(v1, Shuffle); }
+    };
+
+    // Fast path for permutes that only read from the second vector.
+    template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, true, true>
+    {
+        static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return _mm_permute_ps(v2, Shuffle); }
+    };
+
+    // Fast path for permutes that read XY from the first vector, ZW from the second.
+    template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, true, true>
+    {
+        static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v1, v2, Shuffle); }
+    };
+
+    // Fast path for permutes that read XY from the second vector, ZW from the first.
+    template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, false, false>
+    {
+        static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v2, v1, Shuffle); }
+    };
+};
+
+// General permute template
+template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW>
+    inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2)
+{
+    static_assert(PermuteX <= 7, "PermuteX template parameter out of range");
+    static_assert(PermuteY <= 7, "PermuteY template parameter out of range");
+    static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range");
+    static_assert(PermuteW <= 7, "PermuteW template parameter out of range");
+
+    const uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
+
+    const bool WhichX = PermuteX > 3;
+    const bool WhichY = PermuteY > 3;
+    const bool WhichZ = PermuteZ > 3;
+    const bool WhichW = PermuteW > 3;
+
+    return AVX::Internal::PermuteHelper<Shuffle, WhichX, WhichY, WhichZ, WhichW>::Permute(V1, V2);
+}
+
+// Special-case permute templates
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,3>(FXMVECTOR V1, FXMVECTOR) { return V1; }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,7>(FXMVECTOR, FXMVECTOR V2) { return V2; }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x1); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x2); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x3); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x4); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x5); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x6); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x7); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x8); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x9); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xA); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xB); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xC); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xD); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xE); }
+
+
+//-------------------------------------------------------------------------------------
+// Swizzle Templates
+//-------------------------------------------------------------------------------------
+
+// General swizzle template
+template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t SwizzleW>
+    inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V)
+{
+    static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
+    static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
+    static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
+    static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
+
+    return _mm_permute_ps( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) );
+}
+
+// Specialized swizzles
+template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; }
+template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); }
+
+
+//-------------------------------------------------------------------------------------
+// Other Templates
+//-------------------------------------------------------------------------------------
+
+template<uint32_t Elements>
+    inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2)
+{
+    static_assert( Elements < 4, "Elements template parameter out of range" );
+    return AVX::XMVectorPermute<Elements, (Elements + 1), (Elements + 2), (Elements + 3)>(V1, V2);
+}
+
+template<uint32_t Elements>
+    inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V)
+{
+    static_assert( Elements < 4, "Elements template parameter out of range" );
+    return AVX::XMVectorSwizzle<Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3>(V);
+}
+
+template<uint32_t Elements>
+    inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V)
+{
+    static_assert( Elements < 4, "Elements template parameter out of range" );
+    return AVX::XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V);
+}
+
+} // namespace AVX
+
+} // namespace DirectX;
--- a/vendor/directxmath-3.19.0/Extensions/DirectXMathAVX2.h
+++ b/vendor/directxmath-3.19.0/Extensions/DirectXMathAVX2.h
--- a/vendor/directxmath-3.19.0/Extensions/DirectXMathBE.h
+++ b/vendor/directxmath-3.19.0/Extensions/DirectXMathBE.h
@ -0,0 +1,95 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathBE.h -- Big-endian swap extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64)
+#include <tmmintrin.h>
+#endif
+
+#include <DirectXMath.h>
+
+namespace DirectX
+{
+
+inline XMVECTOR XM_CALLCONV XMVectorEndian
+(
+    FXMVECTOR V 
+)
+{
+#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+    static const XMVECTORU32 idx = { { { 0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu } } };
+
+    uint8x8x2_t tbl;
+    tbl.val[0] = vreinterpret_u8_f32(vget_low_f32(V));
+    tbl.val[1] = vreinterpret_u8_f32(vget_high_f32(V));
+
+    const uint8x8_t rL = vtbl2_u8(tbl, vget_low_u32(idx));
+    const uint8x8_t rH = vtbl2_u8(tbl, vget_high_u32(idx));
+    return vcombine_f32(vreinterpret_f32_u8(rL), vreinterpret_f32_u8(rH));
+#else
+    XMVECTORU32 E;
+    E.v = V;
+    uint32_t value = E.u[0];
+    E.u[0] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
+    value = E.u[1];
+    E.u[1] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
+    value = E.u[2];
+    E.u[2] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
+    value = E.u[3];
+    E.u[3] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
+    return E.v;
+#endif
+}
+
+
+#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64)
+namespace SSSE3
+{
+
+inline bool XMVerifySSSE3Support()
+{
+    // Should return true on AMD Bulldozer, Intel Core i7/i5/i3, Intel Atom, or later processors
+
+    // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+    int CPUInfo[4] = { -1 };
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 0);
+#endif
+
+    if ( CPUInfo[0] < 1  )
+        return false;
+
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 1);
+#endif
+
+    // Check for SSSE3 instruction set.
+    return ( (CPUInfo[2] & 0x200) != 0 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorEndian
+(
+    FXMVECTOR V 
+)
+{
+    static const XMVECTORU32 idx = { { { 0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu } } };
+   
+    __m128i Result = _mm_shuffle_epi8( _mm_castps_si128(V), idx );
+    return _mm_castsi128_ps( Result );
+}
+
+} // namespace SSSE3
+#endif // X86 || X64
+
+} // namespace DirectX
--- a/vendor/directxmath-3.19.0/Extensions/DirectXMathF16C.h
+++ b/vendor/directxmath-3.19.0/Extensions/DirectXMathF16C.h
@ -0,0 +1,471 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathF16C.h -- F16C/CVT16 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error F16C not supported on ARM platform
+#endif
+
+#include <DirectXMath.h>
+#include <DirectXPackedVector.h>
+
+namespace DirectX
+{
+
+namespace F16C
+{
+
+inline bool XMVerifyF16CSupport()
+{
+    // Should return true for AMD "Piledriver" and Intel "Ivy Bridge" processors
+    // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
+
+    // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+    int CPUInfo[4] = { -1 };
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 0);
+#endif
+
+    if ( CPUInfo[0] < 1  )
+        return false;
+
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 1);
+#endif
+
+    // We check for F16C, AVX, OSXSAVE, and SSE4.1
+    return ( (CPUInfo[2] & 0x38080000 ) == 0x38080000 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Data conversion
+//-------------------------------------------------------------------------------------
+
+inline float XMConvertHalfToFloat( PackedVector::HALF Value )
+{
+    __m128i V1 = _mm_cvtsi32_si128( static_cast<int>(Value) );
+    __m128 V2 = _mm_cvtph_ps( V1 );
+    return _mm_cvtss_f32( V2 );
+}
+
+inline PackedVector::HALF XMConvertFloatToHalf( float Value )
+{
+    __m128 V1 = _mm_set_ss( Value );
+    __m128i V2 = _mm_cvtps_ph( V1, 0 );
+    return static_cast<PackedVector::HALF>( _mm_cvtsi128_si32(V2) );
+}
+
+inline float* XMConvertHalfToFloatStream
+(
+    _Out_writes_bytes_(sizeof(float) + OutputStride * (HalfCount - 1)) float* pOutputStream,
+    _In_ size_t      OutputStride,
+    _In_reads_bytes_(2 + InputStride * (HalfCount - 1)) const PackedVector::HALF* pInputStream,
+    _In_ size_t      InputStride,
+    _In_ size_t      HalfCount
+)
+{
+    using namespace PackedVector;
+
+    assert(pOutputStream);
+    assert(pInputStream);
+
+    assert(InputStride >= sizeof(HALF));
+    assert(OutputStride >= sizeof(float));
+
+    auto pHalf = reinterpret_cast<const uint8_t*>(pInputStream);
+    auto pFloat = reinterpret_cast<uint8_t*>(pOutputStream);
+
+    size_t i = 0;
+    size_t four = HalfCount >> 2;
+    if (four > 0)
+    {
+        if (InputStride == sizeof(HALF))
+        {
+            if (OutputStride == sizeof(float))
+            {
+                if ((reinterpret_cast<uintptr_t>(pFloat) & 0xF) == 0)
+                {
+                    // Packed input, aligned & packed output
+                    for (size_t j = 0; j < four; ++j)
+                    {
+                        __m128i HV = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(pHalf));
+                        pHalf += InputStride * 4;
+
+                        __m128 FV = _mm_cvtph_ps(HV);
+
+                        _mm_stream_ps(reinterpret_cast<float*>(pFloat), FV);
+                        pFloat += OutputStride * 4;
+                        i += 4;
+                    }
+                }
+                else
+                {
+                    // Packed input, packed output
+                    for (size_t j = 0; j < four; ++j)
+                    {
+                        __m128i HV = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(pHalf));
+                        pHalf += InputStride * 4;
+
+                        __m128 FV = _mm_cvtph_ps(HV);
+
+                        _mm_storeu_ps(reinterpret_cast<float*>(pFloat), FV);
+                        pFloat += OutputStride * 4;
+                        i += 4;
+                    }
+                }
+            }
+            else
+            {
+                // Packed input, scattered output
+                for (size_t j = 0; j < four; ++j)
+                {
+                    __m128i HV = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(pHalf));
+                    pHalf += InputStride * 4;
+
+                    __m128 FV = _mm_cvtph_ps(HV);
+
+                    _mm_store_ss(reinterpret_cast<float*>(pFloat), FV);
+                    pFloat += OutputStride;
+                    *reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 1);
+                    pFloat += OutputStride;
+                    *reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 2);
+                    pFloat += OutputStride;
+                    *reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 3);
+                    pFloat += OutputStride;
+                    i += 4;
+                }
+            }
+        }
+        else if (OutputStride == sizeof(float))
+        {
+            if ((reinterpret_cast<uintptr_t>(pFloat) & 0xF) == 0)
+            {
+                // Scattered input, aligned & packed output
+                for (size_t j = 0; j < four; ++j)
+                {
+                    uint16_t H1 = *reinterpret_cast<const HALF*>(pHalf);
+                    pHalf += InputStride;
+                    uint16_t H2 = *reinterpret_cast<const HALF*>(pHalf);
+                    pHalf += InputStride;
+                    uint16_t H3 = *reinterpret_cast<const HALF*>(pHalf);
+                    pHalf += InputStride;
+                    uint16_t H4 = *reinterpret_cast<const HALF*>(pHalf);
+                    pHalf += InputStride;
+
+                    __m128i HV = _mm_setzero_si128();
+                    HV = _mm_insert_epi16(HV, H1, 0);
+                    HV = _mm_insert_epi16(HV, H2, 1);
+                    HV = _mm_insert_epi16(HV, H3, 2);
+                    HV = _mm_insert_epi16(HV, H4, 3);
+                    __m128 FV = _mm_cvtph_ps(HV);
+
+                    _mm_stream_ps(reinterpret_cast<float*>(pFloat), FV);
+                    pFloat += OutputStride * 4;
+                    i += 4;
+                }
+            }
+            else
+            {
+                // Scattered input, packed output
+                for (size_t j = 0; j < four; ++j)
+                {
+                    uint16_t H1 = *reinterpret_cast<const HALF*>(pHalf);
+                    pHalf += InputStride;
+                    uint16_t H2 = *reinterpret_cast<const HALF*>(pHalf);
+                    pHalf += InputStride;
+                    uint16_t H3 = *reinterpret_cast<const HALF*>(pHalf);
+                    pHalf += InputStride;
+                    uint16_t H4 = *reinterpret_cast<const HALF*>(pHalf);
+                    pHalf += InputStride;
+
+                    __m128i HV = _mm_setzero_si128();
+                    HV = _mm_insert_epi16(HV, H1, 0);
+                    HV = _mm_insert_epi16(HV, H2, 1);
+                    HV = _mm_insert_epi16(HV, H3, 2);
+                    HV = _mm_insert_epi16(HV, H4, 3);
+                    __m128 FV = _mm_cvtph_ps(HV);
+
+                    _mm_storeu_ps(reinterpret_cast<float*>(pFloat), FV);
+                    pFloat += OutputStride * 4;
+                    i += 4;
+                }
+
+            }
+        }
+        else
+        {
+            // Scattered input, scattered output
+            for (size_t j = 0; j < four; ++j)
+            {
+                uint16_t H1 = *reinterpret_cast<const HALF*>(pHalf);
+                pHalf += InputStride;
+                uint16_t H2 = *reinterpret_cast<const HALF*>(pHalf);
+                pHalf += InputStride;
+                uint16_t H3 = *reinterpret_cast<const HALF*>(pHalf);
+                pHalf += InputStride;
+                uint16_t H4 = *reinterpret_cast<const HALF*>(pHalf);
+                pHalf += InputStride;
+
+                __m128i HV = _mm_setzero_si128();
+                HV = _mm_insert_epi16(HV, H1, 0);
+                HV = _mm_insert_epi16(HV, H2, 1);
+                HV = _mm_insert_epi16(HV, H3, 2);
+                HV = _mm_insert_epi16(HV, H4, 3);
+                __m128 FV = _mm_cvtph_ps(HV);
+
+                _mm_store_ss(reinterpret_cast<float*>(pFloat), FV);
+                pFloat += OutputStride;
+                *reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 1);
+                pFloat += OutputStride;
+                *reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 2);
+                pFloat += OutputStride;
+                *reinterpret_cast<int*>(pFloat) = _mm_extract_ps(FV, 3);
+                pFloat += OutputStride;
+                i += 4;
+            }
+        }
+    }
+
+    for (; i < HalfCount; ++i)
+    {
+        *reinterpret_cast<float*>(pFloat) = XMConvertHalfToFloat(reinterpret_cast<const HALF*>(pHalf)[0]);
+        pHalf += InputStride;
+        pFloat += OutputStride;
+    }
+
+    return pOutputStream;
+}
+
+
+inline PackedVector::HALF* XMConvertFloatToHalfStream
+(
+    _Out_writes_bytes_(2 + OutputStride * (FloatCount - 1)) PackedVector::HALF* pOutputStream,
+    _In_ size_t       OutputStride,
+    _In_reads_bytes_(sizeof(float) + InputStride * (FloatCount - 1)) const float* pInputStream,
+    _In_ size_t       InputStride,
+    _In_ size_t       FloatCount
+)
+{
+    using namespace PackedVector;
+
+    assert(pOutputStream);
+    assert(pInputStream);
+
+    assert(InputStride >= sizeof(float));
+    assert(OutputStride >= sizeof(HALF));
+
+    auto pFloat = reinterpret_cast<const uint8_t*>(pInputStream);
+    auto pHalf = reinterpret_cast<uint8_t*>(pOutputStream);
+
+    size_t i = 0;
+    size_t four = FloatCount >> 2;
+    if (four > 0)
+    {
+        if (InputStride == sizeof(float))
+        {
+            if (OutputStride == sizeof(HALF))
+            {
+                if ((reinterpret_cast<uintptr_t>(pFloat) & 0xF) == 0)
+                {
+                    // Aligned and packed input, packed output
+                    for (size_t j = 0; j < four; ++j)
+                    {
+                        __m128 FV = _mm_load_ps(reinterpret_cast<const float*>(pFloat));
+                        pFloat += InputStride * 4;
+
+                        __m128i HV = _mm_cvtps_ph(FV, 0);
+
+                        _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
+                        pHalf += OutputStride * 4;
+                        i += 4;
+                    }
+                }
+                else
+                {
+                    // Packed input, packed output
+                    for (size_t j = 0; j < four; ++j)
+                    {
+                        __m128 FV = _mm_loadu_ps(reinterpret_cast<const float*>(pFloat));
+                        pFloat += InputStride * 4;
+
+                        __m128i HV = _mm_cvtps_ph(FV, 0);
+
+                        _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
+                        pHalf += OutputStride * 4;
+                        i += 4;
+                    }
+                }
+            }
+            else
+            {
+                if ((reinterpret_cast<uintptr_t>(pFloat) & 0xF) == 0)
+                {
+                    // Aligned & packed input, scattered output
+                    for (size_t j = 0; j < four; ++j)
+                    {
+                        __m128 FV = _mm_load_ps(reinterpret_cast<const float*>(pFloat));
+                        pFloat += InputStride * 4;
+
+                        __m128i HV = _mm_cvtps_ph(FV, 0);
+
+                        *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
+                        pHalf += OutputStride;
+                        *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 1));
+                        pHalf += OutputStride;
+                        *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 2));
+                        pHalf += OutputStride;
+                        *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 3));
+                        pHalf += OutputStride;
+                        i += 4;
+                    }
+                }
+                else
+                {
+                    // Packed input, scattered output
+                    for (size_t j = 0; j < four; ++j)
+                    {
+                        __m128 FV = _mm_loadu_ps(reinterpret_cast<const float*>(pFloat));
+                        pFloat += InputStride * 4;
+
+                        __m128i HV = _mm_cvtps_ph(FV, 0);
+
+                        *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
+                        pHalf += OutputStride;
+                        *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 1));
+                        pHalf += OutputStride;
+                        *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 2));
+                        pHalf += OutputStride;
+                        *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 3));
+                        pHalf += OutputStride;
+                        i += 4;
+                    }
+                }
+            }
+        }
+        else if (OutputStride == sizeof(HALF))
+        {
+            // Scattered input, packed output
+            for (size_t j = 0; j < four; ++j)
+            {
+                __m128 FV1 = _mm_load_ss(reinterpret_cast<const float*>(pFloat));
+                pFloat += InputStride;
+
+                __m128 FV2 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
+                pFloat += InputStride;
+
+                __m128 FV3 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
+                pFloat += InputStride;
+
+                __m128 FV4 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
+                pFloat += InputStride;
+
+                __m128 FV = _mm_blend_ps(FV1, FV2, 0x2);
+                __m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
+                FV = _mm_blend_ps(FV, FT, 0xC);
+
+                __m128i HV = _mm_cvtps_ph(FV, 0);
+
+                _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
+                pHalf += OutputStride * 4;
+                i += 4;
+            }
+        }
+        else
+        {
+            // Scattered input, scattered output
+            for (size_t j = 0; j < four; ++j)
+            {
+                __m128 FV1 = _mm_load_ss(reinterpret_cast<const float*>(pFloat));
+                pFloat += InputStride;
+
+                __m128 FV2 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
+                pFloat += InputStride;
+
+                __m128 FV3 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
+                pFloat += InputStride;
+
+                __m128 FV4 = _mm_broadcast_ss(reinterpret_cast<const float*>(pFloat));
+                pFloat += InputStride;
+
+                __m128 FV = _mm_blend_ps(FV1, FV2, 0x2);
+                __m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
+                FV = _mm_blend_ps(FV, FT, 0xC);
+
+                __m128i HV = _mm_cvtps_ph(FV, 0);
+
+                *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 0));
+                pHalf += OutputStride;
+                *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 1));
+                pHalf += OutputStride;
+                *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 2));
+                pHalf += OutputStride;
+                *reinterpret_cast<HALF*>(pHalf) = static_cast<HALF>(_mm_extract_epi16(HV, 3));
+                pHalf += OutputStride;
+                i += 4;
+            }
+        }
+    }
+
+    for (; i < FloatCount; ++i)
+    {
+        *reinterpret_cast<HALF*>(pHalf) = XMConvertFloatToHalf(reinterpret_cast<const float*>(pFloat)[0]);
+        pFloat += InputStride;
+        pHalf += OutputStride;
+    }
+
+    return pOutputStream;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Half2
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMLoadHalf2( _In_ const PackedVector::XMHALF2* pSource )
+{
+    assert(pSource);
+    __m128 V = _mm_load_ss( reinterpret_cast<const float*>(pSource) );
+    return _mm_cvtph_ps( _mm_castps_si128( V ) );
+}
+
+inline void XM_CALLCONV XMStoreHalf2( _Out_ PackedVector::XMHALF2* pDestination, _In_ FXMVECTOR V )
+{
+    assert(pDestination);
+    __m128i V1 = _mm_cvtps_ph( V, 0 );
+    _mm_store_ss( reinterpret_cast<float*>(pDestination), _mm_castsi128_ps(V1) );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Half4
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMLoadHalf4( _In_ const PackedVector::XMHALF4* pSource )
+{
+    assert(pSource);
+    __m128i V = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pSource) );
+    return _mm_cvtph_ps( V );
+}
+
+inline void XM_CALLCONV XMStoreHalf4( _Out_ PackedVector::XMHALF4* pDestination, _In_ FXMVECTOR V )
+{
+    assert(pDestination);
+    __m128i V1 = _mm_cvtps_ph( V, 0 );
+    _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), V1 );
+}
+
+} // namespace F16C
+
+} // namespace DirectX
--- a/vendor/directxmath-3.19.0/Extensions/DirectXMathFMA3.h
+++ b/vendor/directxmath-3.19.0/Extensions/DirectXMathFMA3.h
@ -0,0 +1,391 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathFMA3.h -- FMA3 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error FMA3 not supported on ARM platform
+#endif
+
+#include <DirectXMath.h>
+
+namespace DirectX
+{
+
+namespace FMA3
+{
+
+inline bool XMVerifyFMA3Support()
+{
+    // Should return true for AMD "Pildriver" and Intel "Haswell" processors
+    // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
+
+    // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+    int CPUInfo[4] = {-1};
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 0);
+#endif
+
+    if ( CPUInfo[0] < 1  )
+        return false;
+
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 1);
+#endif
+
+    // We check for FMA3, AVX, OSXSAVE
+    return ( (CPUInfo[2] & 0x18001000) == 0x18001000 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
+(
+    FXMVECTOR V1, 
+    FXMVECTOR V2, 
+    FXMVECTOR V3
+)
+{
+    return _mm_fmadd_ps( V1, V2, V3 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
+(
+    FXMVECTOR V1, 
+    FXMVECTOR V2, 
+    FXMVECTOR V3
+)
+{
+    return _mm_fnmadd_ps( V1, V2, V3 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector2
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector2Transform
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+    XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
+    vResult = _mm_div_ps( vResult, W );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_mul_ps( vResult, M.r[1] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+    return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector3
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector3Transform
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+    vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+    vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+    vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+    vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+    XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
+    vResult = _mm_div_ps( vResult, W );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+    vResult = _mm_mul_ps( vResult, M.r[2] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+    vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+    return vResult;
+}
+
+XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
+
+inline XMVECTOR XM_CALLCONV XMVector3Project
+(
+    FXMVECTOR V, 
+    float    ViewportX, 
+    float    ViewportY, 
+    float    ViewportWidth, 
+    float    ViewportHeight, 
+    float    ViewportMinZ, 
+    float    ViewportMaxZ, 
+    CXMMATRIX Projection, 
+    CXMMATRIX View, 
+    CXMMATRIX World
+)
+{
+    const float HalfViewportWidth = ViewportWidth * 0.5f;
+    const float HalfViewportHeight = ViewportHeight * 0.5f;
+
+    XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
+    XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
+
+    XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View);
+    Transform = FMA3::XMMatrixMultiply(Transform, Projection);
+
+    XMVECTOR Result = FMA3::XMVector3TransformCoord(V, Transform);
+
+    Result = FMA3::XMVectorMultiplyAdd(Result, Scale, Offset);
+
+    return Result;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Unproject
+(
+    FXMVECTOR V, 
+    float     ViewportX, 
+    float     ViewportY, 
+    float     ViewportWidth, 
+    float     ViewportHeight, 
+    float     ViewportMinZ, 
+    float     ViewportMaxZ, 
+    CXMMATRIX Projection, 
+    CXMMATRIX View, 
+    CXMMATRIX World
+)
+{
+    static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } };
+
+    XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
+    Scale = XMVectorReciprocal(Scale);
+
+    XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
+    Offset = FMA3::XMVectorMultiplyAdd(Scale, Offset, D.v);
+
+    XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View);
+    Transform = FMA3::XMMatrixMultiply(Transform, Projection);
+    Transform = XMMatrixInverse(nullptr, Transform);
+
+    XMVECTOR Result = FMA3::XMVectorMultiplyAdd(V, Scale, Offset);
+
+    return FMA3::XMVector3TransformCoord(Result, Transform);
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector4
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector4Transform
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
+    vResult = _mm_mul_ps( vResult, M.r[3] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+    vResult = _mm_fmadd_ps( vTemp, M.r[2], vResult );
+    vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+    vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+    return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Matrix
+//-------------------------------------------------------------------------------------
+
+inline XMMATRIX XM_CALLCONV XMMatrixMultiply
+(
+    CXMMATRIX M1, 
+    CXMMATRIX M2
+)
+{
+    XMMATRIX mResult;
+    // Use vW to hold the original row
+    XMVECTOR vW = M1.r[0];
+    // Splat the component X,Y,Z then W
+    XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    // Perform the operation on the first row
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+    vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+    vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+    mResult.r[0] = vX;
+    // Repeat for the other 3 rows
+    vW = M1.r[1];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+    vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+    vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+    mResult.r[1] = vX;
+    vW = M1.r[2];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+    vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+    vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+    mResult.r[2] = vX;
+    vW = M1.r[3];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+    vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+    vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+    mResult.r[3] = vX;
+    return mResult;
+}
+
+inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
+(
+    FXMMATRIX M1, 
+    CXMMATRIX M2
+)
+{
+    // Use vW to hold the original row
+    XMVECTOR vW = M1.r[0];
+    // Splat the component X,Y,Z then W
+    XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    // Perform the operation on the first row
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+    vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+    vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+    __m128 r0 = vX;
+    // Repeat for the other 3 rows
+    vW = M1.r[1];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+    vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+    vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+    __m128 r1 = vX;
+    vW = M1.r[2];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+    vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+    vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+    __m128 r2 = vX;
+    vW = M1.r[3];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+    vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+    vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+    __m128 r3 = vX;
+
+    // x.x,x.y,y.x,y.y
+    XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
+    // x.z,x.w,y.z,y.w
+    XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
+    // z.x,z.y,w.x,w.y
+    XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
+    // z.z,z.w,w.z,w.w
+    XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
+
+    XMMATRIX mResult;
+    // x.x,y.x,z.x,w.x
+    mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
+    // x.y,y.y,z.y,w.y
+    mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
+    // x.z,y.z,z.z,w.z
+    mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
+    // x.w,y.w,z.w,w.w
+    mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
+    return mResult;
+}
+
+} // namespace FMA3
+
+} // namespace DirectX;
--- a/vendor/directxmath-3.19.0/Extensions/DirectXMathFMA4.h
+++ b/vendor/directxmath-3.19.0/Extensions/DirectXMathFMA4.h
@ -0,0 +1,415 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathFMA4.h -- FMA4 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error FMA4 not supported on ARM platform
+#endif
+
+#include <DirectXMath.h>
+#include <ammintrin.h>
+
+#ifdef __GNUC__
+#include <x86intrin.h>
+#endif
+
+namespace DirectX
+{
+
+namespace FMA4
+{
+
+inline bool XMVerifyFMA4Support()
+{
+    // Should return true for AMD Bulldozer processors
+    // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
+
+   // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+   int CPUInfo[4] = {-1};
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+   __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+   __cpuid(CPUInfo, 0);
+#endif
+
+   if ( CPUInfo[0] < 1  )
+       return false;
+
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+   __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+   __cpuid(CPUInfo, 1);
+#endif
+
+    // We check for AVX, OSXSAVE (required to access FMA4)
+    if ( (CPUInfo[2] & 0x18000000) != 0x18000000 )
+        return false;
+
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(0x80000000, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 0x80000000);
+#endif
+
+    if ( uint32_t(CPUInfo[0]) < 0x80000001u )
+        return false;
+
+    // We check for FMA4
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(0x80000001, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 0x80000001);
+#endif
+
+    return ( CPUInfo[2] & 0x10000 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
+(
+    FXMVECTOR V1, 
+    FXMVECTOR V2, 
+    FXMVECTOR V3
+)
+{
+    return _mm_macc_ps( V1, V2, V3 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
+(
+    FXMVECTOR V1, 
+    FXMVECTOR V2, 
+    FXMVECTOR V3
+)
+{
+    return _mm_nmacc_ps( V1, V2, V3 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector2
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector2Transform
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+    XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
+    vResult = _mm_div_ps( vResult, W );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_mul_ps( vResult, M.r[1] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+    return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector3
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector3Transform
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+    vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
+    vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+    vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
+    vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+    XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
+    vResult = _mm_div_ps( vResult, W );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+    vResult = _mm_mul_ps( vResult, M.r[2] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
+    vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+    return vResult;
+}
+
+XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
+
+inline XMVECTOR XM_CALLCONV XMVector3Project
+(
+    FXMVECTOR V, 
+    float    ViewportX, 
+    float    ViewportY, 
+    float    ViewportWidth, 
+    float    ViewportHeight, 
+    float    ViewportMinZ, 
+    float    ViewportMaxZ, 
+    CXMMATRIX Projection, 
+    CXMMATRIX View, 
+    CXMMATRIX World
+)
+{
+    const float HalfViewportWidth = ViewportWidth * 0.5f;
+    const float HalfViewportHeight = ViewportHeight * 0.5f;
+
+    XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
+    XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
+
+    XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
+    Transform = FMA4::XMMatrixMultiply(Transform, Projection);
+
+    XMVECTOR Result = FMA4::XMVector3TransformCoord(V, Transform);
+
+    Result = FMA4::XMVectorMultiplyAdd(Result, Scale, Offset);
+
+    return Result;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Unproject
+(
+    FXMVECTOR V, 
+    float     ViewportX, 
+    float     ViewportY, 
+    float     ViewportWidth, 
+    float     ViewportHeight, 
+    float     ViewportMinZ, 
+    float     ViewportMaxZ, 
+    CXMMATRIX Projection, 
+    CXMMATRIX View, 
+    CXMMATRIX World
+)
+{
+    static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } };
+
+    XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
+    Scale = XMVectorReciprocal(Scale);
+
+    XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
+    Offset = FMA4::XMVectorMultiplyAdd(Scale, Offset, D.v);
+
+    XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
+    Transform = FMA4::XMMatrixMultiply(Transform, Projection);
+    Transform = XMMatrixInverse(nullptr, Transform);
+
+    XMVECTOR Result = FMA4::XMVectorMultiplyAdd(V, Scale, Offset);
+
+    return FMA4::XMVector3TransformCoord(Result, Transform);
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector4
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector4Transform
+(
+    FXMVECTOR V, 
+    CXMMATRIX M
+)
+{
+    XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
+    vResult = _mm_mul_ps( vResult, M.r[3] );
+    XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+    vResult = _mm_macc_ps( vTemp, M.r[2], vResult );
+    vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+    vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
+    vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+    vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+    return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Matrix
+//-------------------------------------------------------------------------------------
+
+inline XMMATRIX XM_CALLCONV XMMatrixMultiply
+(
+    CXMMATRIX M1, 
+    CXMMATRIX M2
+)
+{
+    XMMATRIX mResult;
+    // Use vW to hold the original row
+    XMVECTOR vW = M1.r[0];
+    // Splat the component X,Y,Z then W
+    XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    // Perform the operation on the first row
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_macc_ps(vY,M2.r[1],vX);
+    vX = _mm_macc_ps(vZ,M2.r[2],vX);
+    vX = _mm_macc_ps(vW,M2.r[3],vX);
+    mResult.r[0] = vX;
+    // Repeat for the other 3 rows
+    vW = M1.r[1];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_macc_ps(vY,M2.r[1],vX);
+    vX = _mm_macc_ps(vZ,M2.r[2],vX);
+    vX = _mm_macc_ps(vW,M2.r[3],vX);
+    mResult.r[1] = vX;
+    vW = M1.r[2];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_macc_ps(vY,M2.r[1],vX);
+    vX = _mm_macc_ps(vZ,M2.r[2],vX);
+    vX = _mm_macc_ps(vW,M2.r[3],vX);
+    mResult.r[2] = vX;
+    vW = M1.r[3];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_macc_ps(vY,M2.r[1],vX);
+    vX = _mm_macc_ps(vZ,M2.r[2],vX);
+    vX = _mm_macc_ps(vW,M2.r[3],vX);
+    mResult.r[3] = vX;
+    return mResult;
+}
+
+inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
+(
+    FXMMATRIX M1, 
+    CXMMATRIX M2
+)
+{
+    // Use vW to hold the original row
+    XMVECTOR vW = M1.r[0];
+    // Splat the component X,Y,Z then W
+    XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    // Perform the operation on the first row
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_macc_ps(vY,M2.r[1],vX);
+    vX = _mm_macc_ps(vZ,M2.r[2],vX);
+    vX = _mm_macc_ps(vW,M2.r[3],vX);
+    __m128 r0 = vX;
+    // Repeat for the other 3 rows
+    vW = M1.r[1];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_macc_ps(vY,M2.r[1],vX);
+    vX = _mm_macc_ps(vZ,M2.r[2],vX);
+    vX = _mm_macc_ps(vW,M2.r[3],vX);
+    __m128 r1 = vX;
+    vW = M1.r[2];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_macc_ps(vY,M2.r[1],vX);
+    vX = _mm_macc_ps(vZ,M2.r[2],vX);
+    vX = _mm_macc_ps(vW,M2.r[3],vX);
+    __m128 r2 = vX;
+    vW = M1.r[3];
+    vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+    vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+    vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+    vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+    vX = _mm_mul_ps(vX,M2.r[0]);
+    vX = _mm_macc_ps(vY,M2.r[1],vX);
+    vX = _mm_macc_ps(vZ,M2.r[2],vX);
+    vX = _mm_macc_ps(vW,M2.r[3],vX);
+    __m128 r3 = vX;
+
+    // x.x,x.y,y.x,y.y
+    XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
+    // x.z,x.w,y.z,y.w
+    XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
+    // z.x,z.y,w.x,w.y
+    XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
+    // z.z,z.w,w.z,w.w
+    XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
+
+    XMMATRIX mResult;
+    // x.x,y.x,z.x,w.x
+    mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
+    // x.y,y.y,z.y,w.y
+    mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
+    // x.z,y.z,z.z,w.z
+    mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
+    // x.w,y.w,z.w,w.w
+    mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
+    return mResult;
+}
+
+} // namespace FMA4
+
+} // namespace DirectX;
--- a/vendor/directxmath-3.19.0/Extensions/DirectXMathSSE3.h
+++ b/vendor/directxmath-3.19.0/Extensions/DirectXMathSSE3.h
@ -0,0 +1,111 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathSSE3.h -- SSE3 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error SSE3 not supported on ARM platform
+#endif
+
+#include <pmmintrin.h>
+
+#include <DirectXMath.h>
+
+namespace DirectX
+{
+
+namespace SSE3
+{
+
+inline bool XMVerifySSE3Support()
+{
+    // Should return true on AMD Athlon 64, AMD Phenom, and Intel Pentium 4 or later processors
+
+    // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+    int CPUInfo[4] = { -1 };
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 0);
+#endif
+    if ( CPUInfo[0] < 1  )
+        return false;
+
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 1);
+#endif
+
+    // We only check for SSE3 instruction set. SSSE3 instructions are not used.
+    return ( (CPUInfo[2] & 0x1) != 0 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2Dot
+(
+    FXMVECTOR V1, 
+    FXMVECTOR V2
+)
+{
+    XMVECTOR vTemp = _mm_mul_ps(V1,V2);
+    vTemp = _mm_hadd_ps(vTemp,vTemp);
+    return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(0,0,0,0));
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
+{
+    return SSE3::XMVector2Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Dot
+(
+    FXMVECTOR V1, 
+    FXMVECTOR V2
+)
+{
+    XMVECTOR vTemp = _mm_mul_ps(V1,V2);
+    vTemp = _mm_and_ps( vTemp, g_XMMask3 );
+    vTemp = _mm_hadd_ps(vTemp,vTemp);
+    return _mm_hadd_ps(vTemp,vTemp);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
+{
+    return SSE3::XMVector3Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4Dot
+(
+    FXMVECTOR V1, 
+    FXMVECTOR V2
+)
+{
+    XMVECTOR vTemp = _mm_mul_ps(V1,V2);
+    vTemp = _mm_hadd_ps( vTemp, vTemp );
+    return _mm_hadd_ps( vTemp, vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
+{
+    return SSE3::XMVector4Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSwizzle_0022( FXMVECTOR V )
+{
+    return _mm_moveldup_ps(V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSwizzle_1133( FXMVECTOR V )
+{
+    return _mm_movehdup_ps(V);
+}
+
+} // namespace SSE3
+
+} // namespace DirectX
--- a/vendor/directxmath-3.19.0/Extensions/DirectXMathSSE4.h
+++ b/vendor/directxmath-3.19.0/Extensions/DirectXMathSSE4.h
@ -0,0 +1,417 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathSSE4.h -- SSE4.1 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error SSE4 not supported on ARM platform
+#endif
+
+#include <smmintrin.h>
+
+#include <DirectXMath.h>
+
+namespace DirectX
+{
+
+namespace SSE4
+{
+
+inline bool XMVerifySSE4Support()
+{
+    // Should return true on AMD Bulldozer, Intel Core 2 ("Penryn"), and Intel Core i7 ("Nehalem") or later processors
+
+    // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+    int CPUInfo[4] = { -1 };
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 0);
+#endif
+    if ( CPUInfo[0] < 1  )
+        return false;
+
+#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
+    __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+    __cpuid(CPUInfo, 1);
+#endif
+
+    // We only check for SSE4.1 instruction set. SSE4.2 instructions are not used.
+    return ( (CPUInfo[2] & 0x80000) == 0x80000 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector
+//-------------------------------------------------------------------------------------
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wundefined-reinterpret-cast"
+#endif
+
+inline void XM_CALLCONV XMVectorGetYPtr(_Out_ float *y, _In_ FXMVECTOR V)
+{
+    assert( y != nullptr );
+    *reinterpret_cast<int*>(y) = _mm_extract_ps( V, 1 );
+}
+
+inline void XM_CALLCONV XMVectorGetZPtr(_Out_ float *z, _In_ FXMVECTOR V)
+{
+    assert( z != nullptr );
+    *reinterpret_cast<int*>(z) = _mm_extract_ps( V, 2 );
+}
+
+inline void XM_CALLCONV XMVectorGetWPtr(_Out_ float *w, _In_ FXMVECTOR V)
+{
+    assert( w != nullptr );
+    *reinterpret_cast<int*>(w) = _mm_extract_ps( V, 3 );
+}
+
+inline uint32_t XM_CALLCONV XMVectorGetIntY(FXMVECTOR V)
+{
+    __m128i V1 = _mm_castps_si128( V );
+    return static_cast<uint32_t>( _mm_extract_epi32( V1, 1 ) );
+}
+
+inline uint32_t XM_CALLCONV XMVectorGetIntZ(FXMVECTOR V)
+{
+    __m128i V1 = _mm_castps_si128( V );
+    return static_cast<uint32_t>( _mm_extract_epi32( V1, 2 ) );
+}
+
+inline uint32_t XM_CALLCONV XMVectorGetIntW(FXMVECTOR V)
+{
+    __m128i V1 = _mm_castps_si128( V );
+    return static_cast<uint32_t>( _mm_extract_epi32( V1, 3 ) );
+}
+
+inline void XM_CALLCONV XMVectorGetIntYPtr(_Out_ uint32_t *y, _In_ FXMVECTOR V)
+{
+    assert( y != nullptr );
+    __m128i V1 = _mm_castps_si128( V );
+    *y = static_cast<uint32_t>( _mm_extract_epi32( V1, 1 ) );
+}
+
+inline void XM_CALLCONV XMVectorGetIntZPtr(_Out_ uint32_t *z, _In_ FXMVECTOR V)
+{
+    assert( z != nullptr );
+    __m128i V1 = _mm_castps_si128( V );
+    *z = static_cast<uint32_t>( _mm_extract_epi32( V1, 2 ) );
+}
+
+inline void XM_CALLCONV XMVectorGetIntWPtr(_Out_ uint32_t *w, _In_ FXMVECTOR V)
+{
+    assert( w != nullptr );
+    __m128i V1 = _mm_castps_si128( V );
+    *w = static_cast<uint32_t>( _mm_extract_epi32( V1, 3 ) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetY(FXMVECTOR V, float y)
+{
+    XMVECTOR vResult = _mm_set_ss(y);
+    vResult = _mm_insert_ps( V, vResult, 0x10 );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetZ(FXMVECTOR V, float z)
+{
+    XMVECTOR vResult = _mm_set_ss(z);
+    vResult = _mm_insert_ps( V, vResult, 0x20 );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetW(FXMVECTOR V, float w)
+{
+    XMVECTOR vResult = _mm_set_ss(w);
+    vResult = _mm_insert_ps( V, vResult, 0x30 );
+    return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetIntY(FXMVECTOR V, uint32_t y)
+{
+    __m128i vResult = _mm_castps_si128( V );
+    vResult = _mm_insert_epi32( vResult, static_cast<int>(y), 1 );
+    return _mm_castsi128_ps( vResult );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetIntZ(FXMVECTOR V, uint32_t z)
+{
+    __m128i vResult = _mm_castps_si128( V );
+    vResult = _mm_insert_epi32( vResult, static_cast<int>(z), 2 );
+    return _mm_castsi128_ps( vResult );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetIntW(FXMVECTOR V, uint32_t w)
+{
+    __m128i vResult = _mm_castps_si128( V );
+    vResult = _mm_insert_epi32( vResult, static_cast<int>(w), 3 );
+    return _mm_castsi128_ps( vResult );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorRound( FXMVECTOR V )
+{
+    return _mm_round_ps( V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorTruncate( FXMVECTOR V )
+{
+    return _mm_round_ps( V, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorFloor( FXMVECTOR V )
+{
+    return _mm_floor_ps( V );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorCeiling( FXMVECTOR V )
+{
+    return _mm_ceil_ps( V );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector2
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector2Dot( FXMVECTOR V1, FXMVECTOR V2 )
+{
+    return _mm_dp_ps( V1, V2, 0x3f );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
+{
+    return SSE4::XMVector2Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLengthEst( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
+    return _mm_rsqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLength( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
+    XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
+    return _mm_div_ps( g_XMOne, vLengthSq );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2LengthEst( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
+    return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2Length( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
+    return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2NormalizeEst( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
+    XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
+    return _mm_mul_ps(vResult, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2Normalize( FXMVECTOR V )
+{
+    XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x3f );
+    // Prepare for the division
+    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+    // Create zero with a single instruction
+    XMVECTOR vZeroMask = _mm_setzero_ps();
+    // Test for a divide by zero (Must be FP to detect -0.0)
+    vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
+    // Failsafe on zero (Or epsilon) length planes
+    // If the length is infinity, set the elements to zero
+    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+    // Reciprocal mul to perform the normalization
+    vResult = _mm_div_ps(V,vResult);
+    // Any that are infinity, set to zero
+    vResult = _mm_and_ps(vResult,vZeroMask);
+    // Select qnan or result based on infinite length
+    XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
+    XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
+    vResult = _mm_or_ps(vTemp1,vTemp2);
+    return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector3
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector3Dot( FXMVECTOR V1, FXMVECTOR V2 )
+{
+    return _mm_dp_ps( V1, V2, 0x7f );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
+{
+    return SSE4::XMVector3Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLengthEst( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
+    return _mm_rsqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLength( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
+    XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
+    return _mm_div_ps( g_XMOne, vLengthSq );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3LengthEst( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
+    return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Length( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
+    return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3NormalizeEst( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
+    XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
+    return _mm_mul_ps(vResult, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Normalize( FXMVECTOR V )
+{
+    XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x7f );
+    // Prepare for the division
+    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+    // Create zero with a single instruction
+    XMVECTOR vZeroMask = _mm_setzero_ps();
+    // Test for a divide by zero (Must be FP to detect -0.0)
+    vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
+    // Failsafe on zero (Or epsilon) length planes
+    // If the length is infinity, set the elements to zero
+    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+    // Divide to perform the normalization
+    vResult = _mm_div_ps(V,vResult);
+    // Any that are infinity, set to zero
+    vResult = _mm_and_ps(vResult,vZeroMask);
+    // Select qnan or result based on infinite length
+    XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
+    XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
+    vResult = _mm_or_ps(vTemp1,vTemp2);
+    return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector4
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector4Dot( FXMVECTOR V1, FXMVECTOR V2 )
+{
+    return _mm_dp_ps( V1, V2, 0xff );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
+{
+    return SSE4::XMVector4Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLengthEst( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
+    return _mm_rsqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLength( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
+    XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
+    return _mm_div_ps( g_XMOne, vLengthSq );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4LengthEst( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
+    return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4Length( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
+    return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4NormalizeEst( FXMVECTOR V )
+{
+    XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
+    XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
+    return _mm_mul_ps(vResult, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4Normalize( FXMVECTOR V )
+{
+    XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0xff );
+    // Prepare for the division
+    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+    // Create zero with a single instruction
+    XMVECTOR vZeroMask = _mm_setzero_ps();
+    // Test for a divide by zero (Must be FP to detect -0.0)
+    vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
+    // Failsafe on zero (Or epsilon) length planes
+    // If the length is infinity, set the elements to zero
+    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+    // Divide to perform the normalization
+    vResult = _mm_div_ps(V,vResult);
+    // Any that are infinity, set to zero
+    vResult = _mm_and_ps(vResult,vZeroMask);
+    // Select qnan or result based on infinite length
+    XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
+    XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
+    vResult = _mm_or_ps(vTemp1,vTemp2);
+    return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Plane
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst( FXMVECTOR P )
+{
+    XMVECTOR vTemp = _mm_dp_ps( P, P, 0x7f );
+    XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
+    return _mm_mul_ps(vResult, P);
+}
+
+inline XMVECTOR XM_CALLCONV XMPlaneNormalize( FXMVECTOR P )
+{
+    XMVECTOR vLengthSq = _mm_dp_ps( P, P, 0x7f );
+    // Prepare for the division
+    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+    // Failsafe on zero (Or epsilon) length planes
+    // If the length is infinity, set the elements to zero
+    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+    // Reciprocal mul to perform the normalization
+    vResult = _mm_div_ps(P,vResult);
+    // Any that are infinity, set to zero
+    vResult = _mm_and_ps(vResult,vLengthSq);
+    return vResult;
+}
+
+} // namespace SSE4
+
+} // namespace DirectX
--- a/vendor/directxmath-3.19.0/HISTORY.md
+++ b/vendor/directxmath-3.19.0/HISTORY.md
@ -0,0 +1,208 @@
+# DirectXMath
+
+https://github.com/Microsoft/DirectXMath
+
+Release available for download on [GitHub](https://github.com/microsoft/DirectXMath/releases)
+
+## Release History
+
+### February 2024 (3.19)
+* Fix to address MinGW issue with ``__cpuid`` in cpuid.h vs. intrin.h
+* Additional updates for clang/LLVM and GNUC
+* Minor comment updates
+
+### December 2023 (3.18b)
+* Hot-fix to address ``-Wunsafe-buffer-usage`` warnings from clang v16
+* Hot-fix to address MinGW issue with ``__cpuid`` in cpuid.h vs. intrin.h
+* CMake project updates including pkg-config file generation
+
+### December 2022 (3.18)
+* C++20 spaceship operators for XMFLOAT2, XMFLOAT3, etc. when building with ``/std:c++20 /Zc:_cplusplus``
+* Improved conformance for ARM64 when using `/Zc:arm64-aliased-neon-types-`
+* Minor code review
+* CMake project updated to require 3.20 or later
+* Added Azure Dev Ops Pipeline YAML files
+
+### May 2022 (3.17b)
+* Hot-fix to address ``-Wreserved-identifier`` warnings with clang v13
+* C++20 spaceship operators for XMFLOAT2, XMFLOAT3, etc. when building with ``/std:c++20 /Zc:_cplusplus``
+* Minor CMake project update
+
+### January 2022 (3.17)
+* Added ColorsLinear namespace to DirectXColors.h with linear versions of .NET colors
+* Optimized the ``XMMatrixRotationRollPitchYaw(FromVector)`` functions
+* Fixed overread problem for 16bpp GPU types Load functions:
+  * ``XMUNIBBLE4``, ``XMU555``, ``XMU565``, ``XMBYTEN2``, ``XMBYTE2``, ``XMUBYTEN2``, ``XMUBYTE2``
+* ``XM_CACHE_LINE_SIZE`` updated for ARM/ARM64 targets to 128 bytes
+* A few comments added to improve IntelliSense experience
+* Conformance improvements for GNU compiler
+* Minor code cleanup
+
+### January 2021 (3.16b)
+* Hot-fixes to resolve build breaks for clang/LLVM and GCC on ARM64
+* ``XM_ALIGNED_DATA`` and ``XM_ALIGNED_STRUCT`` macros updated to use C++17 ``alignas`` when available
+
+### December 2020 (3.16)
+* Added ``XMVectorLog10`` / ``XMVectorExp10``
+* Added ``XMColorRGBToYUV_UHD`` / ``XMColorYUVToRGB_UHD`` for Rec. 2020 YUV
+* Added optional ``rhcoords`` parameter for BoundingFrustum ``CreateFromMatrix``
+* Added use of Intel&reg; Short Vector Matrix Library (SVML) supported by VS 2019
+  * Opt-in with ``_XM_SVML_INTRINSICS_``; opt-out with ``_XM_DISABLE_INTEL_SVML_``
+* Fixed denorm handling for ``XMConvertFloatToHalf``
+* Fixed flush (too small for denorm) handling for ``XMStoreFloat3PK``
+* Fixed clamping bug in ``XMStoreByteN4``
+* Cleaned up ARM-NEON intrinsics type issues for improved portability on GNUC
+* Fixed ``GXMVECTOR`` for x86 ``__vectorcall``
+* Code review
+
+### April 2020 (3.15)
+* Added ``XMMatrixVectorTensorProduct`` for creating a matrix from two vectors
+* Use of m256 registers and FMA3 with ``/arch:AVX2`` for stream and some matrix functions
+* Optimized load/stores for SSE2 float2 & float3 functions
+* Optimized some instruction choices for better AMD CPU support
+* Improved conformance for clang/LLVM, GCC, and MinGW compilers
+* Code review (``constexpr`` / ``noexcept`` usage)
+* Retired VS 2015 support
+
+### August 2019 (3.14)
+* Added float control around IsNan functions to resolve issue with VS 2019 with ``/fp:fast``
+* XMVerifyCPUSupport updated for clang/LLVM cpuid implementation on x86/x64
+* Added support for clang/LLVM built-in platform defines as well as the MSVC ones
+* Cleaned up ARM-NEON intrinsics type issues for improved portability
+* Removed unneeded malloc.h include in DirectXMath.h
+* Whitespace cleanup
+
+### July 2018 (3.13)
+* ``XMFLOAT3X4``, ``XMFLOAT3X4A``, and associated Load/Store functions
+* Move/copy constructors and assignment operators for C++ types
+* Minor fix for XMVectorClamp behavior with NaN
+* Fixed compilation warnings with VS 2017 (15.7 update), Intel C++ 18.0 compiler, and clang 6
+* Retired VS 2013 support
+* Minor code cleanup
+
+### February 2018 (3.12)
+* ARM64 use of fused multiply-accumulate intriniscs
+* Conformance fix for XMConvertFloatToHalf
+* Minor code cleanup
+
+### June 2017 (3.11)
+* AVX optimization of XMMatrixMultiply and XMMatrixMultiplyTranspose
+* AVX2 optimization for XMVectorSplatX
+* FMA3 optimization of XMVectorMultiplyAdd and XMVectorNegativeMultiplySubtract (implied by /arch:AVX2)
+* Conformance fixes to support compilation with Clang 3.7
+
+### January 2017 (3.10)
+* Added XMVectorSum for horizontal adds
+* ARMv8 intrinsics use for ARM64 platform (division, rounding, half-precision conversion)
+* Added SSE3 codepaths using opt-in ``_XM_SSE3_INTRINSICS_``
+* XMVectorRound fix for no-intrinsics to match round to nearest (even)
+* XMStoreFloat3SE fix when max channel isn't a perfect power of 2
+* constexpr conformance fix and workaround for compiler bug in VS 2015 RTM
+* Remove support for VS 2012 compilers
+* Remove ``__vector4i`` deprecated type
+
+### June 2016 (3.09)
+* Includes support for additional optimizations when built with /arch:AVX or /arch:AVX2
+* Added use of constexpr for type constructors, XMConvertToRadians, and XMConvertToDegrees
+* Marked ``__vector4i``, ``XMXDEC4``, ``XMDECN4``, ``XMDEC4``, and associated Load & Store functions as deprecated.
+  + These are vestiges of Xbox 360 support and will be removed in a future release
+* Renamed parameter in XMMatrixPerspectiveFov* to reduce user confusion when relying on IntelliSense
+* XMU565, XMUNIBBLE4 constructors take uint8_t instead of int8_t
+
+### May 2016
+* DirectXMath 3.08 released under the MIT license
+
+### November 2015 (3.08)
+* Added use of ``_mm_sfence`` for Stream methods
+* Fixed bug with non-uniform scaling transforms for BoundingOrientedBox
+* Added asserts for Near/FarZ in XMMatrix* methods
+* Added use of ``=default`` for PODs with VS 2013/2015
+* Additional SSE and ARM-NEON optimizations for PackedVector functions
+
+### April 2015 (3.07)
+* Fix customer reported bugs in BoundingBox methods
+* Fix customer reported bug in XMStoreFloat3SE
+* Fix customer reported bug in XMVectorATan2, XMVectorATan2Est
+* Fix customer reported bug in XMVectorRound
+
+### October 2013 (3.06)
+* Fixed load/store of XMFLOAT3SE to properly match the ``DXGI_FORMAT_R9G9B9E5_SHAREDEXP``
+* Added ``XMLoadUDecN4_XR`` and ``XMStoreUDecN4_XR`` to match ``DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM``
+* Added ``XMColorRGBToSRGB`` and ``XMColorSRGBToRGB`` to convert linear RGB <-> sRGB
+
+### July 2013 (3.05)
+* Use x86/x64 ``__vectorcall`` calling-convention when available (``XM_CALLCONV``, ``HXMVECTOR``, ``FXMMATRIX`` introduced)
+* Fixed bug with XMVectorFloor and XMVectorCeiling when given whole odd numbers (i.e. 105.0)
+* Improved XMVectorRound algorithm
+* ARM-NEON optimizations for XMVectorExp2, XMVectorLog2, XMVectorExpE, and XMVectorLogE
+* ARM-NEON code paths use multiply-by-scalar intrinsics when supported
+* Additional optimizations for ARM-NEON Stream functions
+* Fixed potential warning C4723 using ``operator/`` or ``operator/=``
+
+### March 2013 (3.04)
+* ``XMVectorExp2``, ``XMVectorLog2``, ``XMVectorExpE``, and ``XMVectorLogE`` functions added to provide base-e support in addition to the existing base-2 support
+* ``XMVectorExp`` and ``XMVectorLog`` are now aliases for XMVectorExp2 and XMVectorLog2
+* Additional optimizations for Stream functions
+* XMVector3Cross now ensures w component is zero on ARM
+* XMConvertHalfToFloat and XMConvertFloatToHalf  now use IEEE 754 standard float16 behavior for INF/QNAN
+* Updated matrix version Transform for BoundingOrientedBox and BoundingFrustum to handle scaling
+
+### March 2012 (3.03)
+* *breaking change* Removed union members from XMMATRIX type to make it a fully 'opaque' type
+* Marked single-parameter C++ constructors for XMFLOAT2, XMFLOAT2A, XMFLOAT3, XMFLOAT3A, XMFLOAT4, and XMFLOAT4A explicit
+
+### February 2012 (3.02)
+* ARM-NEON intrinsics (selected by default for the ARM platform)
+* Reworked XMVectorPermute, change of ``XM_PERMUTE_`` defines, removal of XMVectorPermuteControl
+* Addition of ``XM_SWIZZLE_`` defines
+* Optimizations for transcendental functions
+* Template forms for permute, swizzle, shift-left, rotate-left, rotation-right, and insert
+* Removal of deprecated types and functions
+  + ``XM_CACHE_LINE_SIZE`` define, XMVectorExpEst, XMVectorLogEst, XMVectorPowEst, XMVectorSinHEs, XMVectorCosHEst, XMVectorTanHEst, XMVector2InBoundsR, XMVector3InBoundsR, XMVector4InBoundsR
+* Removed ``XM_STRICT_VECTOR4``; XMVECTOR in NO-INTRINSICS always defined without .x, .y, .z, .w, .v, or .u
+* Additional bounding types
+* SAL fixes and improvements
+
+### September 2011 (3.00)
+* Renamed and reorganized the headers
+* Introduced C++ namespaces
+* Removed the Xbox 360-specific GPU types
+  + HENDN3, XMHEND3, XMUHENDN3, XMUHEND3, XMDHENN3, XMDHEN3, XMUDHENN3, XMUDHEN3, XMXICON4, XMXICO4, XMICON4, XMICO4, XMUICON4, XMUICO4
+
+### July 2012 (XNAMath 2.05)
+* Template forms have been added for `XMVectorPermute`, `XMVectorSwizzle`, `XMVectorShiftLeft`, `XMVectorRotateLeft`, `XMVectorRotateRight`, and `XMVectorInsert`
+* The `XM_STRICT_XMMATRIX` compilation define has been added for opaque `XMMATRIX`.
+* Stream stride and count arguments have been changed to `size_t`
+* The ``pDeterminant`` parameter of `XMMatrixInverse` is now optional
+* Additional operator= overloads for `XMBYTEN4`, `XMBYTE4`, `XMUBYTEN4`, and `XMUBYTE4` types are now available
+
+### February 2011 (XNAMath 2.04)
+* Addition of new data types and associated load-store functions:
+  + `XMBYTEN2, XMBYTE2, XMUBYTEN2, XMUBYTE2`
+  + `XMLoadByteN2, XMLoadByte2, XMLoadUByteN2, XMLoadUByte2`
+  + `XMStoreByteN2, XMStoreByte2, XMStoreUByteN2, XMStoreUByte2`
+  + `XMINT2, XMUINT2, XMINT3, XMUINT3, XMINT4, XMUINT4`
+  + `XMLoadSInt2, XMLoadUInt2, XMLoadSInt3, XMLoadUInt3, XMLoadSInt4, XMLoadUInt4`
+  + `XMStoreSInt2, XMStoreUInt2, XMStoreSInt3, XMStoreUInt3, XMStoreSInt4, XMStoreUInt4`
+* Marked most single-parameter C++ constructors with `explicit` keyword
+* Corrected range issues with SSE implementations of `XMVectorFloor` and `XMVectorCeiling`
+
+
+### June 2010 (XNAMath 2.03)
+* Addition of ``XMVectorDivide`` to optimize SSE2 vector division operations
+* Unified handling of floating-point specials between the Windows SSE2 and no-intrinsics implementations
+* Use of Visual Studio style SAL annotations
+* Modifications to the C++ declarations for `XMFLOAT2A/3A/4A/4X3A/4X4A` to better support these types in C++ templates
+
+### February 2010 (XNAMath 2.02)
+* Fixes to `XMStoreColor`, `XMQuaternionRotationMatrix`, `XMVectorATan2`, and `XMVectorATan2Est`
+
+### August 2009 (XNAMath 2.01)
+* Adds ``XM_STRICT_VECTOR4``. This opt-in directive disallows the usage of XboxMath-like  member accessors such as .x, .y, and .z. This makes it easier to write portable XNA Math code.
+* Added conversion support for the following Windows graphics formats:
+  + 16-bit color formats (565, 555X, 5551)
+  + 4-bits per channel color formats (4444)
+  + Unique Direct3D 10/11 formats (``DXGI_FORMAT_R9G9B9E5_SHAREDEXP`` and ``DXGI_FORMAT_R11G11B10_FLOAT``)
+
+### March 2009 (XNAMath 2.00)
+* Initial release (based on the Xbox 360 Xbox math library)
--- a/vendor/directxmath-3.19.0/Inc/DirectXCollision.h
+++ b/vendor/directxmath-3.19.0/Inc/DirectXCollision.h
@ -0,0 +1,370 @@
+//-------------------------------------------------------------------------------------
+// DirectXCollision.h -- C++ Collision Math library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "DirectXMath.h"
+
+namespace DirectX
+{
+
+    enum ContainmentType
+    {
+        DISJOINT = 0,
+        INTERSECTS = 1,
+        CONTAINS = 2
+    };
+
+    enum PlaneIntersectionType
+    {
+        FRONT = 0,
+        INTERSECTING = 1,
+        BACK = 2
+    };
+
+    struct BoundingBox;
+    struct BoundingOrientedBox;
+    struct BoundingFrustum;
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4324 4820)
+    // C4324: alignment padding warnings
+    // C4820: Off by default noise
+#endif
+
+    //-------------------------------------------------------------------------------------
+    // Bounding sphere
+    //-------------------------------------------------------------------------------------
+    struct BoundingSphere
+    {
+        XMFLOAT3 Center;            // Center of the sphere.
+        float Radius;               // Radius of the sphere.
+
+        // Creators
+        BoundingSphere() noexcept : Center(0, 0, 0), Radius(1.f) {}
+
+        BoundingSphere(const BoundingSphere&) = default;
+        BoundingSphere& operator=(const BoundingSphere&) = default;
+
+        BoundingSphere(BoundingSphere&&) = default;
+        BoundingSphere& operator=(BoundingSphere&&) = default;
+
+        constexpr BoundingSphere(_In_ const XMFLOAT3& center, _In_ float radius) noexcept
+            : Center(center), Radius(radius) {}
+
+        // Methods
+        void    XM_CALLCONV     Transform(_Out_ BoundingSphere& Out, _In_ FXMMATRIX M) const noexcept;
+        void    XM_CALLCONV     Transform(_Out_ BoundingSphere& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
+        // Transform the sphere
+
+        ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR Point) const noexcept;
+        ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+        ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept;
+        ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
+        ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
+        ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
+
+        bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
+        bool Intersects(_In_ const BoundingBox& box) const noexcept;
+        bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
+        bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
+
+        bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+        // Triangle-sphere test
+
+        PlaneIntersectionType    XM_CALLCONV     Intersects(_In_ FXMVECTOR Plane) const noexcept;
+        // Plane-sphere test
+
+        bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
+        // Ray-sphere test
+
+        ContainmentType     XM_CALLCONV     ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+            _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
+        // Test sphere against six planes (see BoundingFrustum::GetPlanes)
+
+        // Static methods
+        static void CreateMerged(_Out_ BoundingSphere& Out, _In_ const BoundingSphere& S1, _In_ const BoundingSphere& S2) noexcept;
+
+        static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingBox& box) noexcept;
+        static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingOrientedBox& box) noexcept;
+
+        static void CreateFromPoints(_Out_ BoundingSphere& Out, _In_ size_t Count,
+            _In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept;
+
+        static void CreateFromFrustum(_Out_ BoundingSphere& Out, _In_ const BoundingFrustum& fr) noexcept;
+    };
+
+    //-------------------------------------------------------------------------------------
+    // Axis-aligned bounding box
+    //-------------------------------------------------------------------------------------
+    struct BoundingBox
+    {
+        static constexpr size_t CORNER_COUNT = 8;
+
+        XMFLOAT3 Center;            // Center of the box.
+        XMFLOAT3 Extents;           // Distance from the center to each side.
+
+        // Creators
+        BoundingBox() noexcept : Center(0, 0, 0), Extents(1.f, 1.f, 1.f) {}
+
+        BoundingBox(const BoundingBox&) = default;
+        BoundingBox& operator=(const BoundingBox&) = default;
+
+        BoundingBox(BoundingBox&&) = default;
+        BoundingBox& operator=(BoundingBox&&) = default;
+
+        constexpr BoundingBox(_In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents) noexcept
+            : Center(center), Extents(extents) {}
+
+        // Methods
+        void    XM_CALLCONV     Transform(_Out_ BoundingBox& Out, _In_ FXMMATRIX M) const noexcept;
+        void    XM_CALLCONV     Transform(_Out_ BoundingBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
+
+        void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept;
+        // Gets the 8 corners of the box
+
+        ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR Point) const noexcept;
+        ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+        ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept;
+        ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
+        ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
+        ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
+
+        bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
+        bool Intersects(_In_ const BoundingBox& box) const noexcept;
+        bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
+        bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
+
+        bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+        // Triangle-Box test
+
+        PlaneIntersectionType    XM_CALLCONV     Intersects(_In_ FXMVECTOR Plane) const noexcept;
+        // Plane-box test
+
+        bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
+        // Ray-Box test
+
+        ContainmentType     XM_CALLCONV     ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+            _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
+        // Test box against six planes (see BoundingFrustum::GetPlanes)
+
+        // Static methods
+        static void CreateMerged(_Out_ BoundingBox& Out, _In_ const BoundingBox& b1, _In_ const BoundingBox& b2) noexcept;
+
+        static void CreateFromSphere(_Out_ BoundingBox& Out, _In_ const BoundingSphere& sh) noexcept;
+
+        static void    XM_CALLCONV     CreateFromPoints(_Out_ BoundingBox& Out, _In_ FXMVECTOR pt1, _In_ FXMVECTOR pt2) noexcept;
+        static void CreateFromPoints(_Out_ BoundingBox& Out, _In_ size_t Count,
+            _In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept;
+    };
+
+    //-------------------------------------------------------------------------------------
+    // Oriented bounding box
+    //-------------------------------------------------------------------------------------
+    struct BoundingOrientedBox
+    {
+        static constexpr size_t CORNER_COUNT = 8;
+
+        XMFLOAT3 Center;            // Center of the box.
+        XMFLOAT3 Extents;           // Distance from the center to each side.
+        XMFLOAT4 Orientation;       // Unit quaternion representing rotation (box -> world).
+
+        // Creators
+        BoundingOrientedBox() noexcept : Center(0, 0, 0), Extents(1.f, 1.f, 1.f), Orientation(0, 0, 0, 1.f) {}
+
+        BoundingOrientedBox(const BoundingOrientedBox&) = default;
+        BoundingOrientedBox& operator=(const BoundingOrientedBox&) = default;
+
+        BoundingOrientedBox(BoundingOrientedBox&&) = default;
+        BoundingOrientedBox& operator=(BoundingOrientedBox&&) = default;
+
+        constexpr BoundingOrientedBox(_In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents, _In_ const XMFLOAT4& orientation) noexcept
+            : Center(center), Extents(extents), Orientation(orientation) {}
+
+        // Methods
+        void    XM_CALLCONV     Transform(_Out_ BoundingOrientedBox& Out, _In_ FXMMATRIX M) const noexcept;
+        void    XM_CALLCONV     Transform(_Out_ BoundingOrientedBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
+
+        void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept;
+        // Gets the 8 corners of the box
+
+        ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR Point) const noexcept;
+        ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+        ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept;
+        ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
+        ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
+        ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
+
+        bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
+        bool Intersects(_In_ const BoundingBox& box) const noexcept;
+        bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
+        bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
+
+        bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+        // Triangle-OrientedBox test
+
+        PlaneIntersectionType    XM_CALLCONV     Intersects(_In_ FXMVECTOR Plane) const noexcept;
+        // Plane-OrientedBox test
+
+        bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
+        // Ray-OrientedBox test
+
+        ContainmentType     XM_CALLCONV     ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+            _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
+        // Test OrientedBox against six planes (see BoundingFrustum::GetPlanes)
+
+        // Static methods
+        static void CreateFromBoundingBox(_Out_ BoundingOrientedBox& Out, _In_ const BoundingBox& box) noexcept;
+
+        static void CreateFromPoints(_Out_ BoundingOrientedBox& Out, _In_ size_t Count,
+            _In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept;
+    };
+
+    //-------------------------------------------------------------------------------------
+    // Bounding frustum
+    //-------------------------------------------------------------------------------------
+    struct BoundingFrustum
+    {
+        static constexpr size_t CORNER_COUNT = 8;
+
+        XMFLOAT3 Origin;            // Origin of the frustum (and projection).
+        XMFLOAT4 Orientation;       // Quaternion representing rotation.
+
+        float RightSlope;           // Positive X (X/Z)
+        float LeftSlope;            // Negative X
+        float TopSlope;             // Positive Y (Y/Z)
+        float BottomSlope;          // Negative Y
+        float Near, Far;            // Z of the near plane and far plane.
+
+        // Creators
+        BoundingFrustum() noexcept :
+            Origin(0, 0, 0), Orientation(0, 0, 0, 1.f), RightSlope(1.f), LeftSlope(-1.f),
+            TopSlope(1.f), BottomSlope(-1.f), Near(0), Far(1.f) {}
+
+        BoundingFrustum(const BoundingFrustum&) = default;
+        BoundingFrustum& operator=(const BoundingFrustum&) = default;
+
+        BoundingFrustum(BoundingFrustum&&) = default;
+        BoundingFrustum& operator=(BoundingFrustum&&) = default;
+
+        constexpr BoundingFrustum(_In_ const XMFLOAT3& origin, _In_ const XMFLOAT4& orientation,
+            _In_ float rightSlope, _In_ float leftSlope, _In_ float topSlope, _In_ float bottomSlope,
+            _In_ float nearPlane, _In_ float farPlane) noexcept
+            : Origin(origin), Orientation(orientation),
+            RightSlope(rightSlope), LeftSlope(leftSlope), TopSlope(topSlope), BottomSlope(bottomSlope),
+            Near(nearPlane), Far(farPlane) {}
+        BoundingFrustum(_In_ CXMMATRIX Projection, bool rhcoords = false) noexcept;
+
+        // Methods
+        void    XM_CALLCONV     Transform(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX M) const noexcept;
+        void    XM_CALLCONV     Transform(_Out_ BoundingFrustum& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
+
+        void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept;
+        // Gets the 8 corners of the frustum
+
+        ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR Point) const noexcept;
+        ContainmentType    XM_CALLCONV     Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+        ContainmentType Contains(_In_ const BoundingSphere& sp) const noexcept;
+        ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
+        ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
+        ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
+        // Frustum-Frustum test
+
+        bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
+        bool Intersects(_In_ const BoundingBox& box) const noexcept;
+        bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
+        bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
+
+        bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+        // Triangle-Frustum test
+
+        PlaneIntersectionType    XM_CALLCONV     Intersects(_In_ FXMVECTOR Plane) const noexcept;
+        // Plane-Frustum test
+
+        bool    XM_CALLCONV     Intersects(_In_ FXMVECTOR rayOrigin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
+        // Ray-Frustum test
+
+        ContainmentType     XM_CALLCONV     ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+            _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
+        // Test frustum against six planes (see BoundingFrustum::GetPlanes)
+
+        void GetPlanes(_Out_opt_ XMVECTOR* NearPlane, _Out_opt_ XMVECTOR* FarPlane, _Out_opt_ XMVECTOR* RightPlane,
+            _Out_opt_ XMVECTOR* LeftPlane, _Out_opt_ XMVECTOR* TopPlane, _Out_opt_ XMVECTOR* BottomPlane) const noexcept;
+        // Create 6 Planes representation of Frustum
+
+        // Static methods
+        static void     XM_CALLCONV     CreateFromMatrix(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX Projection, bool rhcoords = false) noexcept;
+    };
+
+    //-----------------------------------------------------------------------------
+    // Triangle intersection testing routines.
+    //-----------------------------------------------------------------------------
+    namespace TriangleTests
+    {
+        bool                    XM_CALLCONV     Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _In_ FXMVECTOR V0, _In_ GXMVECTOR V1, _In_ HXMVECTOR V2, _Out_ float& Dist) noexcept;
+        // Ray-Triangle
+
+        bool                    XM_CALLCONV     Intersects(_In_ FXMVECTOR A0, _In_ FXMVECTOR A1, _In_ FXMVECTOR A2, _In_ GXMVECTOR B0, _In_ HXMVECTOR B1, _In_ HXMVECTOR B2) noexcept;
+        // Triangle-Triangle
+
+        PlaneIntersectionType   XM_CALLCONV     Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, _In_ GXMVECTOR Plane) noexcept;
+        // Plane-Triangle
+
+        ContainmentType         XM_CALLCONV     ContainedBy(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2,
+            _In_ GXMVECTOR Plane0, _In_ HXMVECTOR Plane1, _In_ HXMVECTOR Plane2,
+            _In_ CXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5) noexcept;
+        // Test a triangle against six planes at once (see BoundingFrustum::GetPlanes)
+    }
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+    /****************************************************************************
+     *
+     * Implementation
+     *
+     ****************************************************************************/
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4068 4365 4616 6001)
+     // C4068/4616: ignore unknown pragmas
+     // C4365: Off by default noise
+     // C6001: False positives
+#endif
+
+#ifdef _PREFAST_
+#pragma prefast(push)
+#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
+#pragma prefast(disable : 26495, "Union initialization confuses /analyze")
+#endif
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wfloat-equal"
+#pragma clang diagnostic ignored "-Wunknown-warning-option"
+#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
+
+#include "DirectXCollision.inl"
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+#ifdef _PREFAST_
+#pragma prefast(pop)
+#endif
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+} // namespace DirectX
+
--- a/vendor/directxmath-3.19.0/Inc/DirectXCollision.inl
+++ b/vendor/directxmath-3.19.0/Inc/DirectXCollision.inl
--- a/vendor/directxmath-3.19.0/Inc/DirectXColors.h
+++ b/vendor/directxmath-3.19.0/Inc/DirectXColors.h
@ -0,0 +1,312 @@
+//-------------------------------------------------------------------------------------
+// DirectXColors.h -- C++ Color Math library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "DirectXMath.h"
+
+namespace DirectX
+{
+
+    namespace Colors
+    {
+        // Standard colors (Red/Green/Blue/Alpha) in sRGB colorspace
+        XMGLOBALCONST XMVECTORF32 AliceBlue = { { { 0.941176534f, 0.972549081f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 AntiqueWhite = { { { 0.980392218f, 0.921568692f, 0.843137324f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Aqua = { { { 0.f, 1.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Aquamarine = { { { 0.498039246f, 1.f, 0.831372619f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Azure = { { { 0.941176534f, 1.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Beige = { { { 0.960784376f, 0.960784376f, 0.862745166f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Bisque = { { { 1.f, 0.894117713f, 0.768627524f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Black = { { { 0.f, 0.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 BlanchedAlmond = { { { 1.f, 0.921568692f, 0.803921640f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Blue = { { { 0.f, 0.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 BlueViolet = { { { 0.541176498f, 0.168627456f, 0.886274576f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Brown = { { { 0.647058845f, 0.164705887f, 0.164705887f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 BurlyWood = { { { 0.870588303f, 0.721568644f, 0.529411793f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 CadetBlue = { { { 0.372549027f, 0.619607866f, 0.627451003f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Chartreuse = { { { 0.498039246f, 1.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Chocolate = { { { 0.823529482f, 0.411764741f, 0.117647067f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Coral = { { { 1.f, 0.498039246f, 0.313725501f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 CornflowerBlue = { { { 0.392156899f, 0.584313750f, 0.929411829f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Cornsilk = { { { 1.f, 0.972549081f, 0.862745166f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Crimson = { { { 0.862745166f, 0.078431375f, 0.235294133f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Cyan = { { { 0.f, 1.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkBlue = { { { 0.f, 0.f, 0.545098066f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkCyan = { { { 0.f, 0.545098066f, 0.545098066f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkGoldenrod = { { { 0.721568644f, 0.525490224f, 0.043137256f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkGray = { { { 0.662745118f, 0.662745118f, 0.662745118f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkGreen = { { { 0.f, 0.392156899f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkKhaki = { { { 0.741176486f, 0.717647076f, 0.419607878f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkMagenta = { { { 0.545098066f, 0.f, 0.545098066f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkOliveGreen = { { { 0.333333343f, 0.419607878f, 0.184313729f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkOrange = { { { 1.f, 0.549019635f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkOrchid = { { { 0.600000024f, 0.196078449f, 0.800000072f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkRed = { { { 0.545098066f, 0.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkSalmon = { { { 0.913725555f, 0.588235319f, 0.478431404f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkSeaGreen = { { { 0.560784340f, 0.737254918f, 0.545098066f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkSlateBlue = { { { 0.282352954f, 0.239215702f, 0.545098066f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkSlateGray = { { { 0.184313729f, 0.309803933f, 0.309803933f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkTurquoise = { { { 0.f, 0.807843208f, 0.819607913f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkViolet = { { { 0.580392182f, 0.f, 0.827451050f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DeepPink = { { { 1.f, 0.078431375f, 0.576470613f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DeepSkyBlue = { { { 0.f, 0.749019623f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DimGray = { { { 0.411764741f, 0.411764741f, 0.411764741f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DodgerBlue = { { { 0.117647067f, 0.564705908f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Firebrick = { { { 0.698039234f, 0.133333340f, 0.133333340f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 FloralWhite = { { { 1.f, 0.980392218f, 0.941176534f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 ForestGreen = { { { 0.133333340f, 0.545098066f, 0.133333340f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Fuchsia = { { { 1.f, 0.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Gainsboro = { { { 0.862745166f, 0.862745166f, 0.862745166f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 GhostWhite = { { { 0.972549081f, 0.972549081f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Gold = { { { 1.f, 0.843137324f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Goldenrod = { { { 0.854902029f, 0.647058845f, 0.125490203f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Gray = { { { 0.501960814f, 0.501960814f, 0.501960814f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Green = { { { 0.f, 0.501960814f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 GreenYellow = { { { 0.678431392f, 1.f, 0.184313729f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Honeydew = { { { 0.941176534f, 1.f, 0.941176534f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 HotPink = { { { 1.f, 0.411764741f, 0.705882370f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 IndianRed = { { { 0.803921640f, 0.360784322f, 0.360784322f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Indigo = { { { 0.294117659f, 0.f, 0.509803951f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Ivory = { { { 1.f, 1.f, 0.941176534f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Khaki = { { { 0.941176534f, 0.901960850f, 0.549019635f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Lavender = { { { 0.901960850f, 0.901960850f, 0.980392218f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LavenderBlush = { { { 1.f, 0.941176534f, 0.960784376f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LawnGreen = { { { 0.486274540f, 0.988235354f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LemonChiffon = { { { 1.f, 0.980392218f, 0.803921640f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightBlue = { { { 0.678431392f, 0.847058892f, 0.901960850f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightCoral = { { { 0.941176534f, 0.501960814f, 0.501960814f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightCyan = { { { 0.878431439f, 1.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = { { { 0.980392218f, 0.980392218f, 0.823529482f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightGray = { { { 0.827451050f, 0.827451050f, 0.827451050f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightGreen = { { { 0.564705908f, 0.933333397f, 0.564705908f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightPink = { { { 1.f, 0.713725507f, 0.756862819f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightSalmon = { { { 1.f, 0.627451003f, 0.478431404f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightSeaGreen = { { { 0.125490203f, 0.698039234f, 0.666666687f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightSkyBlue = { { { 0.529411793f, 0.807843208f, 0.980392218f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightSlateGray = { { { 0.466666698f, 0.533333361f, 0.600000024f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightSteelBlue = { { { 0.690196097f, 0.768627524f, 0.870588303f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightYellow = { { { 1.f, 1.f, 0.878431439f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Lime = { { { 0.f, 1.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LimeGreen = { { { 0.196078449f, 0.803921640f, 0.196078449f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Linen = { { { 0.980392218f, 0.941176534f, 0.901960850f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Magenta = { { { 1.f, 0.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Maroon = { { { 0.501960814f, 0.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumAquamarine = { { { 0.400000036f, 0.803921640f, 0.666666687f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumBlue = { { { 0.f, 0.f, 0.803921640f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumOrchid = { { { 0.729411781f, 0.333333343f, 0.827451050f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumPurple = { { { 0.576470613f, 0.439215720f, 0.858823597f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumSeaGreen = { { { 0.235294133f, 0.701960802f, 0.443137288f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumSlateBlue = { { { 0.482352972f, 0.407843173f, 0.933333397f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumSpringGreen = { { { 0.f, 0.980392218f, 0.603921592f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumTurquoise = { { { 0.282352954f, 0.819607913f, 0.800000072f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumVioletRed = { { { 0.780392230f, 0.082352944f, 0.521568656f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MidnightBlue = { { { 0.098039225f, 0.098039225f, 0.439215720f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MintCream = { { { 0.960784376f, 1.f, 0.980392218f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MistyRose = { { { 1.f, 0.894117713f, 0.882353008f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Moccasin = { { { 1.f, 0.894117713f, 0.709803939f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 NavajoWhite = { { { 1.f, 0.870588303f, 0.678431392f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Navy = { { { 0.f, 0.f, 0.501960814f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 OldLace = { { { 0.992156923f, 0.960784376f, 0.901960850f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Olive = { { { 0.501960814f, 0.501960814f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 OliveDrab = { { { 0.419607878f, 0.556862772f, 0.137254909f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Orange = { { { 1.f, 0.647058845f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 OrangeRed = { { { 1.f, 0.270588249f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Orchid = { { { 0.854902029f, 0.439215720f, 0.839215755f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PaleGoldenrod = { { { 0.933333397f, 0.909803987f, 0.666666687f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PaleGreen = { { { 0.596078455f, 0.984313786f, 0.596078455f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PaleTurquoise = { { { 0.686274529f, 0.933333397f, 0.933333397f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PaleVioletRed = { { { 0.858823597f, 0.439215720f, 0.576470613f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PapayaWhip = { { { 1.f, 0.937254965f, 0.835294187f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PeachPuff = { { { 1.f, 0.854902029f, 0.725490212f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Peru = { { { 0.803921640f, 0.521568656f, 0.247058839f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Pink = { { { 1.f, 0.752941251f, 0.796078503f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Plum = { { { 0.866666734f, 0.627451003f, 0.866666734f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PowderBlue = { { { 0.690196097f, 0.878431439f, 0.901960850f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Purple = { { { 0.501960814f, 0.f, 0.501960814f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Red = { { { 1.f, 0.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 RosyBrown = { { { 0.737254918f, 0.560784340f, 0.560784340f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 RoyalBlue = { { { 0.254901975f, 0.411764741f, 0.882353008f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SaddleBrown = { { { 0.545098066f, 0.270588249f, 0.074509807f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Salmon = { { { 0.980392218f, 0.501960814f, 0.447058856f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SandyBrown = { { { 0.956862807f, 0.643137276f, 0.376470625f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SeaGreen = { { { 0.180392161f, 0.545098066f, 0.341176480f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SeaShell = { { { 1.f, 0.960784376f, 0.933333397f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Sienna = { { { 0.627451003f, 0.321568638f, 0.176470593f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Silver = { { { 0.752941251f, 0.752941251f, 0.752941251f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SkyBlue = { { { 0.529411793f, 0.807843208f, 0.921568692f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SlateBlue = { { { 0.415686309f, 0.352941185f, 0.803921640f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SlateGray = { { { 0.439215720f, 0.501960814f, 0.564705908f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Snow = { { { 1.f, 0.980392218f, 0.980392218f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SpringGreen = { { { 0.f, 1.f, 0.498039246f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SteelBlue = { { { 0.274509817f, 0.509803951f, 0.705882370f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Tan = { { { 0.823529482f, 0.705882370f, 0.549019635f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Teal = { { { 0.f, 0.501960814f, 0.501960814f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Thistle = { { { 0.847058892f, 0.749019623f, 0.847058892f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Tomato = { { { 1.f, 0.388235331f, 0.278431386f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Transparent = { { { 0.f, 0.f, 0.f, 0.f } } };
+        XMGLOBALCONST XMVECTORF32 Turquoise = { { { 0.250980407f, 0.878431439f, 0.815686345f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Violet = { { { 0.933333397f, 0.509803951f, 0.933333397f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Wheat = { { { 0.960784376f, 0.870588303f, 0.701960802f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 White = { { { 1.f, 1.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 WhiteSmoke = { { { 0.960784376f, 0.960784376f, 0.960784376f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Yellow = { { { 1.f, 1.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 YellowGreen = { { { 0.603921592f, 0.803921640f, 0.196078449f, 1.f } } };
+
+    } // namespace Colors
+
+    namespace ColorsLinear
+    {
+        // Standard colors (Red/Green/Blue/Alpha) in linear colorspace
+        XMGLOBALCONST XMVECTORF32 AliceBlue = { { { 0.871367335f, 0.938685894f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 AntiqueWhite = { { { 0.955973506f, 0.830770075f, 0.679542601f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Aqua = { { { 0.f, 1.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Aquamarine = { { { 0.212230787f, 1.f, 0.658374965f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Azure = { { { 0.871367335f, 1.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Beige = { { { 0.913098991f, 0.913098991f, 0.715693772f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Bisque = { { { 1.f, 0.775822461f, 0.552011609f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Black = { { { 0.f, 0.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 BlanchedAlmond = { { { 1.f, 0.830770075f, 0.610495746f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Blue = { { { 0.f, 0.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 BlueViolet = { { { 0.254152179f, 0.024157630f, 0.760524750f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Brown = { { { 0.376262218f, 0.023153365f, 0.023153365f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 BurlyWood = { { { 0.730461001f, 0.479320228f, 0.242281199f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 CadetBlue = { { { 0.114435382f, 0.341914445f, 0.351532698f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Chartreuse = { { { 0.212230787f, 1.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Chocolate = { { { 0.644479871f, 0.141263321f, 0.012983031f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Coral = { { { 1.f, 0.212230787f, 0.080219828f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 CornflowerBlue = { { { 0.127437726f, 0.300543845f, 0.846873462f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Cornsilk = { { { 1.f, 0.938685894f, 0.715693772f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Crimson = { { { 0.715693772f, 0.006995410f, 0.045186214f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Cyan = { { { 0.f, 1.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkBlue = { { { 0.f, 0.f, 0.258182913f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkCyan = { { { 0.f, 0.258182913f, 0.258182913f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkGoldenrod = { { { 0.479320228f, 0.238397658f, 0.003346536f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkGray = { { { 0.396755308f, 0.396755308f, 0.396755308f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkGreen = { { { 0.f, 0.127437726f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkKhaki = { { { 0.508881450f, 0.473531544f, 0.147027299f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkMagenta = { { { 0.258182913f, 0.f, 0.258182913f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkOliveGreen = { { { 0.090841733f, 0.147027299f, 0.028426038f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkOrange = { { { 1.f, 0.262250721f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkOrchid = { { { 0.318546832f, 0.031896040f, 0.603827536f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkRed = { { { 0.258182913f, 0.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkSalmon = { { { 0.814846814f, 0.304987371f, 0.194617867f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkSeaGreen = { { { 0.274677366f, 0.502886593f, 0.258182913f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkSlateBlue = { { { 0.064803280f, 0.046665095f, 0.258182913f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkSlateGray = { { { 0.028426038f, 0.078187428f, 0.078187428f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkTurquoise = { { { 0.f, 0.617206752f, 0.637597024f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DarkViolet = { { { 0.296138316f, 0.f, 0.651405811f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DeepPink = { { { 1.f, 0.006995410f, 0.291770697f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DeepSkyBlue = { { { 0.f, 0.520995677f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DimGray = { { { 0.141263321f, 0.141263321f, 0.141263321f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 DodgerBlue = { { { 0.012983031f, 0.278894335f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Firebrick = { { { 0.445201248f, 0.015996292f, 0.015996292f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 FloralWhite = { { { 1.f, 0.955973506f, 0.871367335f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 ForestGreen = { { { 0.015996292f, 0.258182913f, 0.015996292f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Fuchsia = { { { 1.f, 0.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Gainsboro = { { { 0.715693772f, 0.715693772f, 0.715693772f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 GhostWhite = { { { 0.938685894f, 0.938685894f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Gold = { { { 1.f, 0.679542601f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Goldenrod = { { { 0.701102138f, 0.376262218f, 0.014443844f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Gray = { { { 0.215860531f, 0.215860531f, 0.215860531f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Green = { { { 0.f, 0.215860531f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 GreenYellow = { { { 0.417885154f, 1.f, 0.028426038f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Honeydew = { { { 0.871367335f, 1.f, 0.871367335f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 HotPink = { { { 1.f, 0.141263321f, 0.456411064f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 IndianRed = { { { 0.610495746f, 0.107023112f, 0.107023112f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Indigo = { { { 0.070360109f, 0.f, 0.223227978f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Ivory = { { { 1.f, 1.f, 0.871367335f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Khaki = { { { 0.871367335f, 0.791298151f, 0.262250721f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Lavender = { { { 0.791298151f, 0.791298151f, 0.955973506f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LavenderBlush = { { { 1.f, 0.871367335f, 0.913098991f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LawnGreen = { { { 0.201556295f, 0.973445475f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LemonChiffon = { { { 1.f, 0.955973506f, 0.610495746f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightBlue = { { { 0.417885154f, 0.686685443f, 0.791298151f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightCoral = { { { 0.871367335f, 0.215860531f, 0.215860531f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightCyan = { { { 0.745404482f, 1.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = { { { 0.955973506f, 0.955973506f, 0.644479871f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightGray = { { { 0.651405811f, 0.651405811f, 0.651405811f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightGreen = { { { 0.278894335f, 0.854992807f, 0.278894335f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightPink = { { { 1.f, 0.467783839f, 0.533276618f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightSalmon = { { { 1.f, 0.351532698f, 0.194617867f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightSeaGreen = { { { 0.014443844f, 0.445201248f, 0.401977867f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightSkyBlue = { { { 0.242281199f, 0.617206752f, 0.955973506f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightSlateGray = { { { 0.184475034f, 0.246201396f, 0.318546832f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightSteelBlue = { { { 0.434153706f, 0.552011609f, 0.730461001f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LightYellow = { { { 1.f, 1.f, 0.745404482f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Lime = { { { 0.f, 1.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 LimeGreen = { { { 0.031896040f, 0.610495746f, 0.031896040f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Linen = { { { 0.955973506f, 0.871367335f, 0.791298151f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Magenta = { { { 1.f, 0.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Maroon = { { { 0.215860531f, 0.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumAquamarine = { { { 0.132868364f, 0.610495746f, 0.401977867f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumBlue = { { { 0.f, 0.f, 0.610495746f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumOrchid = { { { 0.491020888f, 0.090841733f, 0.651405811f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumPurple = { { { 0.291770697f, 0.162029430f, 0.708376050f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumSeaGreen = { { { 0.045186214f, 0.450785846f, 0.165132239f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumSlateBlue = { { { 0.198069349f, 0.138431653f, 0.854992807f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumSpringGreen = { { { 0.f, 0.955973506f, 0.323143244f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumTurquoise = { { { 0.064803280f, 0.637597024f, 0.603827536f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MediumVioletRed = { { { 0.571125031f, 0.007499032f, 0.234550655f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MidnightBlue = { { { 0.009721218f, 0.009721218f, 0.162029430f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MintCream = { { { 0.913098991f, 1.f, 0.955973506f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 MistyRose = { { { 1.f, 0.775822461f, 0.752942443f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Moccasin = { { { 1.f, 0.775822461f, 0.462077051f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 NavajoWhite = { { { 1.f, 0.730461001f, 0.417885154f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Navy = { { { 0.f, 0.f, 0.215860531f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 OldLace = { { { 0.982250869f, 0.913098991f, 0.791298151f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Olive = { { { 0.215860531f, 0.215860531f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 OliveDrab = { { { 0.147027299f, 0.270497859f, 0.016807375f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Orange = { { { 1.f, 0.376262218f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 OrangeRed = { { { 1.f, 0.059511241f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Orchid = { { { 0.701102138f, 0.162029430f, 0.672443330f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PaleGoldenrod = { { { 0.854992807f, 0.806952477f, 0.401977867f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PaleGreen = { { { 0.313988745f, 0.964686573f, 0.313988745f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PaleTurquoise = { { { 0.428690553f, 0.854992807f, 0.854992807f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PaleVioletRed = { { { 0.708376050f, 0.162029430f, 0.291770697f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PapayaWhip = { { { 1.f, 0.863157392f, 0.665387452f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PeachPuff = { { { 1.f, 0.701102138f, 0.485149980f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Peru = { { { 0.610495746f, 0.234550655f, 0.049706575f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Pink = { { { 1.f, 0.527115345f, 0.597202003f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Plum = { { { 0.723055363f, 0.351532698f, 0.723055363f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 PowderBlue = { { { 0.434153706f, 0.745404482f, 0.791298151f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Purple = { { { 0.215860531f, 0.f, 0.215860531f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Red = { { { 1.f, 0.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 RosyBrown = { { { 0.502886593f, 0.274677366f, 0.274677366f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 RoyalBlue = { { { 0.052860655f, 0.141263321f, 0.752942443f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SaddleBrown = { { { 0.258182913f, 0.059511241f, 0.006512091f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Salmon = { { { 0.955973506f, 0.215860531f, 0.168269455f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SandyBrown = { { { 0.904661357f, 0.371237785f, 0.116970696f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SeaGreen = { { { 0.027320892f, 0.258182913f, 0.095307484f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SeaShell = { { { 1.f, 0.913098991f, 0.854992807f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Sienna = { { { 0.351532698f, 0.084376216f, 0.026241222f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Silver = { { { 0.527115345f, 0.527115345f, 0.527115345f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SkyBlue = { { { 0.242281199f, 0.617206752f, 0.830770075f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SlateBlue = { { { 0.144128501f, 0.102241747f, 0.610495746f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SlateGray = { { { 0.162029430f, 0.215860531f, 0.278894335f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Snow = { { { 1.f, 0.955973506f, 0.955973506f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SpringGreen = { { { 0.f, 1.f, 0.212230787f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 SteelBlue = { { { 0.061246071f, 0.223227978f, 0.456411064f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Tan = { { { 0.644479871f, 0.456411064f, 0.262250721f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Teal = { { { 0.f, 0.215860531f, 0.215860531f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Thistle = { { { 0.686685443f, 0.520995677f, 0.686685443f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Tomato = { { { 1.f, 0.124771863f, 0.063010029f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Transparent = { { { 0.f, 0.f, 0.f, 0.f } } };
+        XMGLOBALCONST XMVECTORF32 Turquoise = { { { 0.051269468f, 0.745404482f, 0.630757332f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Violet = { { { 0.854992807f, 0.223227978f, 0.854992807f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Wheat = { { { 0.913098991f, 0.730461001f, 0.450785846f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 White = { { { 1.f, 1.f, 1.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 WhiteSmoke = { { { 0.913098991f, 0.913098991f, 0.913098991f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 Yellow = { { { 1.f, 1.f, 0.f, 1.f } } };
+        XMGLOBALCONST XMVECTORF32 YellowGreen = { { { 0.323143244f, 0.610495746f, 0.031896040f, 1.f } } };
+
+    } // namespace ColorsLinear
+
+} // namespace DirectX
+
--- a/vendor/directxmath-3.19.0/Inc/DirectXMath.h
+++ b/vendor/directxmath-3.19.0/Inc/DirectXMath.h
--- a/vendor/directxmath-3.19.0/Inc/DirectXMathConvert.inl
+++ b/vendor/directxmath-3.19.0/Inc/DirectXMathConvert.inl
--- a/vendor/directxmath-3.19.0/Inc/DirectXMathMatrix.inl
+++ b/vendor/directxmath-3.19.0/Inc/DirectXMathMatrix.inl
--- a/vendor/directxmath-3.19.0/Inc/DirectXMathMisc.inl
+++ b/vendor/directxmath-3.19.0/Inc/DirectXMathMisc.inl
--- a/vendor/directxmath-3.19.0/Inc/DirectXMathVector.inl
+++ b/vendor/directxmath-3.19.0/Inc/DirectXMathVector.inl
--- a/vendor/directxmath-3.19.0/Inc/DirectXPackedVector.h
+++ b/vendor/directxmath-3.19.0/Inc/DirectXPackedVector.h
--- a/vendor/directxmath-3.19.0/Inc/DirectXPackedVector.inl
+++ b/vendor/directxmath-3.19.0/Inc/DirectXPackedVector.inl
--- a/vendor/directxmath-3.19.0/LICENSE
+++ b/vendor/directxmath-3.19.0/LICENSE
@ -0,0 +1,21 @@
+    MIT License
+
+    Copyright (c) Microsoft Corporation.
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE
--- a/vendor/directxmath-3.19.0/MatrixStack/DirectXMatrixStack.h
+++ b/vendor/directxmath-3.19.0/MatrixStack/DirectXMatrixStack.h
@ -0,0 +1,241 @@
+//-------------------------------------------------------------------------------------
+// DirectXMatrixStack.h -- DirectXMath C++ Matrix Stack
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <new>
+
+#ifdef _WIN32
+#include <malloc.h>
+#endif
+
+#include <DirectXMath.h>
+
+
+namespace DirectX
+{
+    class MatrixStack
+    {
+    public:
+        MatrixStack(size_t startSize = 16) noexcept(false) :
+            m_stackSize(0),
+            m_current(0),
+            m_stack(nullptr)
+        {
+            assert(startSize > 0);
+            Allocate(startSize);
+            LoadIdentity();
+        }
+
+        MatrixStack(MatrixStack&&) = default;
+        MatrixStack& operator= (MatrixStack&&) = default;
+
+        MatrixStack(MatrixStack const&) = delete;
+        MatrixStack& operator= (MatrixStack const&) = delete;
+
+        const XMMATRIX XM_CALLCONV Top() const noexcept { return m_stack[m_current]; }
+        const XMMATRIX* GetTop() const noexcept { return &m_stack[m_current]; }
+
+        size_t Size() const noexcept { return (m_current + 1); }
+
+        void Pop()
+        {
+            if (m_current > 0)
+            {
+                --m_current;
+            }
+        }
+
+        void Push()
+        {
+            ++m_current;
+
+            if (m_current >= m_stackSize)
+            {
+                Allocate(m_stackSize * 2);
+            }
+
+            // Replicate the original top of the matrix stack.
+            m_stack[m_current] = m_stack[m_current - 1];
+        }
+
+        // Loads identity into the top of the matrix stack.
+        void LoadIdentity() noexcept
+        {
+            m_stack[m_current] = XMMatrixIdentity();
+        }
+
+        // Load a matrix into the top of the matrix stack.
+        void XM_CALLCONV LoadMatrix(FXMMATRIX matrix) noexcept
+        {
+            m_stack[m_current] = matrix;
+        }
+
+        // Multiply a matrix by the top of the stack, store result in top.
+        void XM_CALLCONV MultiplyMatrix(FXMMATRIX matrix) noexcept
+        {
+            m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], matrix);
+        }
+
+        // Pre-multiplies a matrix by the top of the stack, store result in top.
+        void XM_CALLCONV MultiplyMatrixLocal(FXMMATRIX matrix) noexcept
+        {
+            m_stack[m_current] = XMMatrixMultiply(matrix, m_stack[m_current]);
+        }
+
+        // Add a rotation about X to stack top.
+        void XM_CALLCONV RotateX(float angle) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationX(angle);
+            m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
+        }
+
+        void XM_CALLCONV RotateXLocal(float angle) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationX(angle);
+            m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
+        }
+
+        // Add a rotation about Y to stack top.
+        void XM_CALLCONV RotateY(float angle) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationY(angle);
+            m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
+        }
+
+        void XM_CALLCONV RotateYLocal(float angle) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationY(angle);
+            m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
+        }
+
+        // Add a rotation about Z to stack top.
+        void XM_CALLCONV RotateZ(float angle) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationZ(angle);
+            m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
+        }
+
+        void XM_CALLCONV RotateZLocal(float angle) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationZ(angle);
+            m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
+        }
+
+        // Add a rotation around an axis to stack top.
+        void XM_CALLCONV RotateAxis(FXMVECTOR axis, float angle) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationAxis(axis, angle);
+            m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
+        }
+
+        void XM_CALLCONV RotateAxisLocal(FXMVECTOR axis, float angle) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationAxis(axis, angle);
+            m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
+        }
+
+        // Add a rotation by roll/pitch/yaw to the stack top.
+        void RotateRollPitchYaw(float pitch, float yaw, float roll) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationRollPitchYaw(pitch, yaw, roll);
+            m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
+        }
+
+        void RotateRollPitchYawLocal(float pitch, float yaw, float roll) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationRollPitchYaw(pitch, yaw, roll);
+            m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
+        }
+
+        // Add a rotation by a quaternion stack top.
+        void XM_CALLCONV RotateByQuaternion(FXMVECTOR quat) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationQuaternion(quat);
+            m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
+        }
+
+        void XM_CALLCONV RotateByQuaternionLocal(FXMVECTOR quat) noexcept
+        {
+            XMMATRIX mat = XMMatrixRotationQuaternion(quat);
+            m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
+        }
+
+        // Add a scale to the stack top.
+        void Scale(float x, float y, float z) noexcept
+        {
+            XMMATRIX mat = XMMatrixScaling(x, y, z);
+            m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
+        }
+
+        void ScaleLocal(float x, float y, float z) noexcept
+        {
+            XMMATRIX mat = XMMatrixScaling(x, y, z);
+            m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
+        }
+
+        // Add a translation to the stack top.
+        void Translate(float x, float y, float z) noexcept
+        {
+            XMMATRIX mat = XMMatrixTranslation(x, y, z);
+            m_stack[m_current] = XMMatrixMultiply(m_stack[m_current], mat);
+        }
+
+        void TranslateLocal(float x, float y, float z) noexcept
+        {
+            XMMATRIX mat = XMMatrixTranslation(x, y, z);
+            m_stack[m_current] = XMMatrixMultiply(mat, m_stack[m_current]);
+        }
+
+    private:
+
+        struct matrix_deleter
+        {
+            void operator()(void* p) noexcept
+            {
+#ifdef _WIN32
+                _aligned_free(p);
+#else
+                free(p);
+#endif
+            }
+        };
+
+        void Allocate(size_t newSize)
+        {
+#ifdef _WIN32
+            void* ptr = _aligned_malloc(newSize * sizeof(XMMATRIX), 16);
+#else
+            // This C++17 Standard Library function is currently NOT
+            // implemented for the Microsoft Standard C++ Library.
+            void* ptr = aligned_alloc(16, newSize * sizeof(XMMATRIX));
+#endif
+            if (!ptr)
+                throw std::bad_alloc();
+
+            if (m_stack)
+            {
+                assert(newSize >= m_stackSize);
+                memcpy(ptr, m_stack.get(), sizeof(XMMATRIX) * m_stackSize);
+            }
+
+            m_stack.reset(reinterpret_cast<XMMATRIX*>(ptr));
+            m_stackSize = newSize;
+        }
+
+        size_t										m_stackSize;
+        size_t										m_current;
+        std::unique_ptr<XMMATRIX[], matrix_deleter>	m_stack;
+    };
+} // namespace DirectX
--- a/vendor/directxmath-3.19.0/README.md
+++ b/vendor/directxmath-3.19.0/README.md
@ -0,0 +1,121 @@
+![DirectX Logo](https://raw.githubusercontent.com/wiki/Microsoft/DirectXMath/X_jpg.jpg)
+
+# DirectXMath
+
+https://github.com/Microsoft/DirectXMath
+
+Copyright (c) Microsoft Corporation.
+
+**February 2024**
+
+This package contains the DirectXMath library, an all inline SIMD C++ linear algebra library for use in games and graphics apps.
+
+This code is designed to build with Visual Studio 2019 (16.11), Visual Studio 2022, or clang/LLVM for Windows. It is recommended that you make use of the latest updates.
+
+These components are designed to work without requiring any content from the legacy DirectX SDK. For details, see [Where is the DirectX SDK?](https://aka.ms/dxsdk).
+
+## Directory Layout
+
+* ``Inc\``
+
+  + DirectXMath Files (in the DirectX C++ namespace)
+
+    * DirectXMath.h - Core library
+    * DirectXPackedVector.h - Load/Store functions and types for working with various compressed GPU formats
+    * DirectXColors.h - .NET-style Color defines in sRGB and linear color space
+    * DirectXCollision.h - Bounding volume collision library
+
+* ``Extentions\``
+
+  + Advanced instruction set variants for guarded codepaths
+
+    * DirectXMathSSE3.h - SSE3
+    * DirectXMathBE.h - Supplemental SSE3 (SSSE3)
+    * DirectXMathSSE4.h - SSE4.1
+    * DirectXMathAVX.h - Advanced Vector Extensions (AVX)
+    * DirectXMathAVX2.h - Advanced Vector Extensions 2 (AVX2)
+    * DirectXMathF16C.h - Half-precision conversions (F16C)
+    * DirectXMathFMA3.h - Fused multiply-accumulate (FMA3)
+    * DirectXMathFMA4.h - Fused multiply-accumulate (FMA4)
+
+* ``SHMath\``
+
+  + Spherical Harmonics math functions
+
+    * DirectXSH.h - Header for SHMath functions
+    * DirectXSH.cpp, DirectXSHD3D11.cpp, DirectXSHD3D12.cpp - Implementation
+
+* ``XDSP\``
+
+  + XDSP.h - Digital Signal Processing helper functions
+
+* ``build\``
+
+  + Contains YAML files for the build pipelines along with some miscellaneous build files and scripts.
+
+## Documentation
+
+Documentation is available on the [Microsoft Docs](https://docs.microsoft.com/en-us/windows/desktop/dxmath/directxmath-portal). Additional information can be found on the [project wiki](https://github.com/microsoft/DirectXMath/wiki).
+
+## Compiler support
+
+Officially the library is supported with Microsoft Visual C++ 2019 or later, clang/LLVM v12 or later, and GCC 9 or later. It should also compile with the Intel C++ and MinGW compilers.
+
+When building with clang/LLVM or other GNU C compilers, the ``_XM_NO_XMVECTOR_OVERLOADS_`` control define is set because these compilers do not support creating operator overloads for the ``XMVECTOR`` type. You can choose to enable this preprocessor define explicitly to do the same thing with Visual C++ for improved portability.
+
+To build for non-Windows platforms, you need to provide a ``sal.h`` header in your include path. You can obtain an open source version from [GitHub](https://raw.githubusercontent.com/dotnet/runtime/main/src/coreclr/pal/inc/rt/sal.h).
+
+With GCC, the SAL annotation preprocessor symbols can conflict with the GNU implementation of the Standard C++ Library. The workaround is to include the system headers before including DirectXMath:
+
+```
+#include <algorithm>
+#include <utility>
+
+#include <DirectXMath.h>
+```
+
+## Notices
+
+All content and source code for this package are subject to the terms of the [MIT License](https://github.com/microsoft/DirectXMath/blob/main/LICENSE).
+
+For the latest version of DirectXMath, bug reports, etc. please visit the project site on [GitHub](https://github.com/microsoft/DirectXMath).
+
+## Release Notes
+
+* The clang/LLVM toolset currently does not respect the ``float_control`` pragma for SSE instrinsics. Therefore, the use of ``/fp:fast`` is not recommended on clang/LLVM until this issue is fixed. See [55713](https://github.com/llvm/llvm-project/issues/55713).
+
+## Support
+
+For questions, consider using [Stack Overflow](https://stackoverflow.com/questions/tagged/directxmath) with the *directxmath* tag, or the [DirectX Discord Server](https://discord.gg/directx) in the *dx12-developers* or *dx9-dx11-developers* channel.
+
+For bug reports and feature requests, please use GitHub [issues](https://github.com/microsoft/DirectXMath/issues) for this project.
+
+## Contributing
+
+This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
+
+When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
+
+## Code of Conduct
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+
+## Trademarks
+
+This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party's policies.
+
+## Credits
+
+The xboxmath library was originated by Matt Bronder with contributions from Sakphong Chanbai and David Hefner for the Xbox 360.
+
+The xnamath library for the DirectX SDK and Xbox XDK was the work of Chuck Walbourn and Becky Heineman based on xboxmath, with contributions from Jeremy Gup, Dan Haffner, Matt Lee, Casey Meekhof, Rich Sauer, Jason Strayer, and Xiaoyue Zheng.
+
+The DirectXMath library for the Windows SDK and Xbox One XDK is the work of Chuck Walbourn based on xnamath, with contributions from Darren Anderson, Matt Lee, Aaron Rodriguez Hernandez, Yuichi Ito, Reza Nourai, Rich Sauer, and Jason Strayer.
+
+Thanks to Dave Eberly for his contributions particularly in improving the transcendental functions.
+
+Thanks to Bruce Dawson for his help with the rounding functions.
+
+Thanks to Andrew Farrier for the fixes to ``XMVerifyCPUSupport`` to properly support clang.
+
+Thanks to Scott Matloff for his help in getting the library updated to use Intel SVML for VS 2019.
--- a/vendor/directxmath-3.19.0/SECURITY.md
+++ b/vendor/directxmath-3.19.0/SECURITY.md
@ -0,0 +1,41 @@
+<!-- BEGIN MICROSOFT SECURITY.MD V0.0.7 BLOCK -->
+
+## Security
+
+Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
+
+If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
+
+## Reporting Security Issues
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
+
+If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
+
+You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
+
+Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
+
+  * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
+  * Full paths of source file(s) related to the manifestation of the issue
+  * The location of the affected source code (tag/branch/commit or direct URL)
+  * Any special configuration required to reproduce the issue
+  * Step-by-step instructions to reproduce the issue
+  * Proof-of-concept or exploit code (if possible)
+  * Impact of the issue, including how an attacker might exploit the issue
+
+This information will help us triage your report more quickly.
+
+If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
+
+## Preferred Languages
+
+We prefer all communications to be in English.
+
+## Policy
+
+Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
+
+<!-- END MICROSOFT SECURITY.MD BLOCK -->
--- a/vendor/directxmath-3.19.0/SHMath/DirectXSH.cpp
+++ b/vendor/directxmath-3.19.0/SHMath/DirectXSH.cpp
--- a/vendor/directxmath-3.19.0/SHMath/DirectXSH.h
+++ b/vendor/directxmath-3.19.0/SHMath/DirectXSH.h
@ -0,0 +1,72 @@
+//-------------------------------------------------------------------------------------
+// DirectXSH.h -- C++ Spherical Harmonics Math Library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/p/?LinkId=262885
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#define DIRECTX_SHMATH_VERSION 106
+
+#include <DirectXMath.h>
+
+namespace DirectX
+{
+	constexpr size_t XM_SH_MINORDER = 2;
+	constexpr size_t XM_SH_MAXORDER = 6;
+
+	float* XM_CALLCONV XMSHEvalDirection(_Out_writes_(order*order) float *result, _In_ size_t order, _In_ FXMVECTOR dir) noexcept;
+
+	float* XM_CALLCONV XMSHRotate(_Out_writes_(order*order) float *result, _In_ size_t order, _In_ FXMMATRIX rotMatrix, _In_reads_(order*order) const float *input) noexcept;
+
+	float* XMSHRotateZ(_Out_writes_(order*order) float *result, _In_ size_t order, _In_ float angle, _In_reads_(order*order) const float *input) noexcept;
+
+	float* XMSHAdd(_Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *inputA, _In_reads_(order*order) const float *inputB) noexcept;
+
+	float* XMSHScale(_Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *input, _In_ float scale) noexcept;
+
+	float XMSHDot(_In_ size_t order, _In_reads_(order*order) const float *inputA, _In_reads_(order*order) const float *inputB) noexcept;
+
+	float* XMSHMultiply(_Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *inputF, _In_reads_(order*order) const float *inputG) noexcept;
+
+	float* XMSHMultiply2(_Out_writes_(4) float *result, _In_reads_(4) const float *inputF, _In_reads_(4) const float *inputG) noexcept;
+
+	float* XMSHMultiply3(_Out_writes_(9) float *result, _In_reads_(9) const float *inputF, _In_reads_(9) const float *inputG) noexcept;
+
+	float* XMSHMultiply4(_Out_writes_(16) float *result, _In_reads_(16) const float *inputF, _In_reads_(16) const float *inputG) noexcept;
+
+	float* XMSHMultiply5(_Out_writes_(25) float *result, _In_reads_(25) const float *inputF, _In_reads_(25) const float *inputG) noexcept;
+
+	float* XMSHMultiply6(_Out_writes_(36) float *result, _In_reads_(36) const float *inputF, _In_reads_(36) const float *inputG) noexcept;
+
+	bool XM_CALLCONV XMSHEvalDirectionalLight(
+		_In_ size_t order, _In_ FXMVECTOR dir, _In_ FXMVECTOR color,
+		_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
+
+	bool XM_CALLCONV XMSHEvalSphericalLight(
+		_In_ size_t order, _In_ FXMVECTOR pos, _In_ float radius, _In_ FXMVECTOR color,
+		_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
+
+	bool XM_CALLCONV XMSHEvalConeLight(
+		_In_ size_t order, _In_ FXMVECTOR dir, _In_ float radius, _In_ FXMVECTOR color,
+		_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
+
+	bool XM_CALLCONV XMSHEvalHemisphereLight(
+		_In_ size_t order, _In_ FXMVECTOR dir, _In_ FXMVECTOR topColor, _In_ FXMVECTOR bottomColor,
+		_Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
+
+	#if defined(__d3d11_h__) || defined(__d3d11_x_h__)
+	HRESULT SHProjectCubeMap(
+		_In_ ID3D11DeviceContext *context, _In_ size_t order, _In_ ID3D11Texture2D *cubeMap,
+		_Out_writes_opt_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
+	#endif
+
+	#if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__)
+	HRESULT SHProjectCubeMap(
+		_In_ size_t order, _In_ const D3D12_RESOURCE_DESC& desc, _In_ const D3D12_SUBRESOURCE_DATA cubeMap[6],
+		_Out_writes_opt_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept;
+	#endif
+} // namespace DirectX
--- a/vendor/directxmath-3.19.0/SHMath/DirectXSHD3D11.cpp
+++ b/vendor/directxmath-3.19.0/SHMath/DirectXSHD3D11.cpp
@ -0,0 +1,385 @@
+//-------------------------------------------------------------------------------------
+// DirectXSHD3D11.cpp -- C++ Spherical Harmonics Math Library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/p/?LinkId=262885
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4616 4619 4061 4265 4626 5039 )
+// C4616/C4619 #pragma warning warnings
+// C4061 numerator 'identifier' in switch of enum 'enumeration' is not explicitly handled by a case label
+// C4265 class has virtual functions, but destructor is not virtual
+// C4626 assignment operator was implicitly defined as deleted
+// C5039 pointer or reference to potentially throwing function passed to extern C function under - EHc
+
+#pragma warning(push)
+#pragma warning(disable: 4365)
+#endif
+#include <d3d11_1.h>
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#include "DirectXSH.h"
+
+#include <DirectXPackedVector.h>
+
+#include <cassert>
+#include <memory>
+#include <malloc.h>
+
+#include <wrl/client.h>
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wcovered-switch-default"
+#pragma clang diagnostic ignored "-Wswitch-enum"
+#pragma clang diagnostic ignored "-Wunknown-warning-option"
+#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
+
+using namespace DirectX;
+
+using Microsoft::WRL::ComPtr;
+
+namespace
+{
+    struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } };
+
+    using ScopedAlignedArrayXMVECTOR = std::unique_ptr<DirectX::XMVECTOR, aligned_deleter>;
+
+    //-------------------------------------------------------------------------------------
+    // This code is lifted from DirectXTex http://go.microsoft.com/fwlink/?LinkId=248926
+    // If you need additional DXGI format support, see DirectXTexConvert.cpp
+    //-------------------------------------------------------------------------------------
+#define LOAD_SCANLINE( type, func )\
+        if ( size >= sizeof(type) )\
+        {\
+            const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
+            for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
+            {\
+                if ( dPtr >= ePtr ) break;\
+                *(dPtr++) = func( sPtr++ );\
+            }\
+            return true;\
+        }\
+        return false;
+
+#define LOAD_SCANLINE3( type, func, defvec )\
+        if ( size >= sizeof(type) )\
+        {\
+            const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
+            for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
+            {\
+                XMVECTOR v = func( sPtr++ );\
+                if ( dPtr >= ePtr ) break;\
+                *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\
+            }\
+            return true;\
+        }\
+        return false;
+
+#define LOAD_SCANLINE2( type, func, defvec )\
+        if ( size >= sizeof(type) )\
+        {\
+            const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
+            for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
+            {\
+                XMVECTOR v = func( sPtr++ );\
+                if ( dPtr >= ePtr ) break;\
+                *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\
+            }\
+            return true;\
+        }\
+        return false;
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 6101)
+#endif
+    _Success_(return)
+        bool LoadScanline(
+            _Out_writes_(count) DirectX::XMVECTOR* pDestination,
+            size_t count,
+            _In_reads_bytes_(size) LPCVOID pSource,
+            size_t size,
+            DXGI_FORMAT format)
+    {
+        assert(pDestination && count > 0 && ((reinterpret_cast<uintptr_t>(pDestination) & 0xF) == 0));
+        assert(pSource && size > 0);
+
+        using namespace DirectX::PackedVector;
+
+        XMVECTOR* __restrict dPtr = pDestination;
+        if (!dPtr)
+            return false;
+
+        const XMVECTOR* ePtr = pDestination + count;
+
+        switch (format)
+        {
+        case DXGI_FORMAT_R32G32B32A32_FLOAT:
+        {
+            size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size;
+            memcpy_s(dPtr, sizeof(XMVECTOR)*count, pSource, msize);
+        }
+        return true;
+
+        case DXGI_FORMAT_R32G32B32_FLOAT:
+            LOAD_SCANLINE3(XMFLOAT3, XMLoadFloat3, g_XMIdentityR3)
+
+        case DXGI_FORMAT_R16G16B16A16_FLOAT:
+            LOAD_SCANLINE(XMHALF4, XMLoadHalf4)
+
+        case DXGI_FORMAT_R32G32_FLOAT:
+            LOAD_SCANLINE2(XMFLOAT2, XMLoadFloat2, g_XMIdentityR3)
+
+        case DXGI_FORMAT_R11G11B10_FLOAT:
+            LOAD_SCANLINE3(XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3)
+
+        case DXGI_FORMAT_R16G16_FLOAT:
+            LOAD_SCANLINE2(XMHALF2, XMLoadHalf2, g_XMIdentityR3)
+
+        case DXGI_FORMAT_R32_FLOAT:
+            if (size >= sizeof(float))
+            {
+                const float* __restrict sPtr = reinterpret_cast<const float*>(pSource);
+                for (size_t icount = 0; icount < size; icount += sizeof(float))
+                {
+                    XMVECTOR v = XMLoadFloat(sPtr++);
+                    if (dPtr >= ePtr) break;
+                    *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000);
+                }
+                return true;
+            }
+            return false;
+
+        case DXGI_FORMAT_R16_FLOAT:
+            if (size >= sizeof(HALF))
+            {
+                const HALF * __restrict sPtr = reinterpret_cast<const HALF*>(pSource);
+                for (size_t icount = 0; icount < size; icount += sizeof(HALF))
+                {
+                    if (dPtr >= ePtr) break;
+                    *(dPtr++) = XMVectorSet(XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f);
+                }
+                return true;
+            }
+            return false;
+
+        default:
+            return false;
+        }
+    }
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+} // namespace anonymous
+
+//-------------------------------------------------------------------------------------
+// Projects a function represented in a cube map into spherical harmonics.
+//
+// http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx
+//-------------------------------------------------------------------------------------
+_Use_decl_annotations_
+HRESULT DirectX::SHProjectCubeMap(
+    ID3D11DeviceContext *context,
+    size_t order,
+    ID3D11Texture2D *cubeMap,
+    float *resultR,
+    float *resultG,
+    float* resultB) noexcept
+{
+    if (!context || !cubeMap)
+        return E_INVALIDARG;
+
+    if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER)
+        return E_INVALIDARG;
+
+    D3D11_TEXTURE2D_DESC desc;
+    cubeMap->GetDesc(&desc);
+
+    if ((desc.ArraySize != 6)
+        || (desc.Width != desc.Height)
+        || (desc.SampleDesc.Count > 1))
+        return E_FAIL;
+
+    switch (desc.Format)
+    {
+    case DXGI_FORMAT_R32G32B32A32_FLOAT:
+    case DXGI_FORMAT_R32G32B32_FLOAT:
+    case DXGI_FORMAT_R16G16B16A16_FLOAT:
+    case DXGI_FORMAT_R32G32_FLOAT:
+    case DXGI_FORMAT_R11G11B10_FLOAT:
+    case DXGI_FORMAT_R16G16_FLOAT:
+    case DXGI_FORMAT_R32_FLOAT:
+    case DXGI_FORMAT_R16_FLOAT:
+        // See LoadScanline to support more pixel formats
+        break;
+
+    default:
+        return E_FAIL;
+    }
+
+    //--- Create a staging resource copy (if needed) to be able to read data
+    ID3D11Texture2D* texture = nullptr;
+
+    ComPtr<ID3D11Texture2D> staging;
+    if (!(desc.CPUAccessFlags & D3D11_CPU_ACCESS_READ))
+    {
+        D3D11_TEXTURE2D_DESC sdesc = desc;
+        sdesc.BindFlags = 0;
+        sdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+        sdesc.Usage = D3D11_USAGE_STAGING;
+
+        ComPtr<ID3D11Device> device;
+        context->GetDevice(&device);
+
+        HRESULT hr = device->CreateTexture2D(&sdesc, nullptr, &staging);
+        if (FAILED(hr))
+            return hr;
+
+        context->CopyResource(staging.Get(), cubeMap);
+
+        texture = staging.Get();
+    }
+    else
+        texture = cubeMap;
+
+    assert(texture != nullptr);
+
+    //--- Setup for SH projection
+    ScopedAlignedArrayXMVECTOR scanline(reinterpret_cast<XMVECTOR*>(_aligned_malloc(sizeof(XMVECTOR)*desc.Width, 16)));
+    if (!scanline)
+        return E_OUTOFMEMORY;
+
+    assert(desc.Width > 0);
+    float fSize = static_cast<float>(desc.Width);
+    float fPicSize = 1.0f / fSize;
+
+    // index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w
+    // linear function x*S +B, 1st constraint means B is (-1+1/W), plug into
+    // second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did 
+    // this was incorrect - but only for computing the differential solid
+    // angle, where the final value was 1.0 instead of 1-1/w...
+
+    float fB = -1.0f + 1.0f / fSize;
+    float fS = (desc.Width > 1) ? (2.0f*(1.0f - 1.0f / fSize) / (fSize - 1.0f)) : 0.f;
+
+    // clear out accumulation variables
+    float fWt = 0.0f;
+
+    if (resultR)
+        memset(resultR, 0, sizeof(float)*order*order);
+    if (resultG)
+        memset(resultG, 0, sizeof(float)*order*order);
+    if (resultB)
+        memset(resultB, 0, sizeof(float)*order*order);
+
+    float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};
+    float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};
+
+    //--- Process each face of the cubemap
+    for (UINT face = 0; face < 6; ++face)
+    {
+        UINT dindex = D3D11CalcSubresource(0, face, desc.MipLevels);
+
+        D3D11_MAPPED_SUBRESOURCE mapped;
+        HRESULT hr = context->Map(texture, dindex, D3D11_MAP_READ, 0, &mapped);
+        if (FAILED(hr))
+            return hr;
+
+        const uint8_t *pSrc = reinterpret_cast<const uint8_t*>(mapped.pData);
+        for (UINT y = 0; y < desc.Height; ++y)
+        {
+            XMVECTOR* ptr = scanline.get();
+            if (!LoadScanline(ptr, desc.Width, pSrc, mapped.RowPitch, desc.Format))
+            {
+                context->Unmap(texture, dindex);
+                return E_FAIL;
+            }
+
+            const float v = float(y) * fS + fB;
+
+            XMVECTOR* pixel = ptr;
+            for (UINT x = 0; x < desc.Width; ++x, ++pixel)
+            {
+                const float u = float(x) * fS + fB;
+
+                float ix, iy, iz;
+                switch (face)
+                {
+                case 0: // Positive X
+                    iz = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;
+                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
+                    ix = 1.0f;
+                    break;
+
+                case 1: // Negative X
+                    iz = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
+                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
+                    ix = -1;
+                    break;
+
+                case 2: // Positive Y
+                    iz = -1.0f + (2.0f * float(y) + 1.0f) * fPicSize;
+                    iy = 1.0f;
+                    ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
+                    break;
+
+                case 3: // Negative Y
+                    iz = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
+                    iy = -1.0f;
+                    ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
+                    break;
+
+                case 4: // Positive Z
+                    iz = 1.0f;
+                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
+                    ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
+                    break;
+
+                case 5: // Negative Z
+                    iz = -1.0f;
+                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
+                    ix = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;
+                    break;
+
+                default:
+                    ix = iy = iz = 0.f;
+                    assert(false);
+                    break;
+                }
+
+                XMVECTOR dir = XMVectorSet(ix, iy, iz, 0);
+                dir = XMVector3Normalize(dir);
+
+                const float fDiffSolid = 4.0f / ((1.0f + u * u + v * v)*sqrtf(1.0f + u * u + v * v));
+                fWt += fDiffSolid;
+
+                XMSHEvalDirection(shBuff, order, dir);
+
+                XMFLOAT3A clr;
+                XMStoreFloat3A(&clr, *pixel);
+
+                if (resultR) XMSHAdd(resultR, order, resultR, XMSHScale(shBuffB, order, shBuff, clr.x*fDiffSolid));
+                if (resultG) XMSHAdd(resultG, order, resultG, XMSHScale(shBuffB, order, shBuff, clr.y*fDiffSolid));
+                if (resultB) XMSHAdd(resultB, order, resultB, XMSHScale(shBuffB, order, shBuff, clr.z*fDiffSolid));
+            }
+
+            pSrc += mapped.RowPitch;
+        }
+
+        context->Unmap(texture, dindex);
+    }
+
+    const float fNormProj = (4.0f*XM_PI) / fWt;
+
+    if (resultR) XMSHScale(resultR, order, resultR, fNormProj);
+    if (resultG) XMSHScale(resultG, order, resultG, fNormProj);
+    if (resultB) XMSHScale(resultB, order, resultB, fNormProj);
+
+    return S_OK;
+}
--- a/vendor/directxmath-3.19.0/SHMath/DirectXSHD3D12.cpp
+++ b/vendor/directxmath-3.19.0/SHMath/DirectXSHD3D12.cpp
@ -0,0 +1,341 @@
+//-------------------------------------------------------------------------------------
+// DirectXSHD3D12.cpp -- C++ Spherical Harmonics Math Library
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/p/?LinkId=262885
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4616 4619 4061 4265 4626 5039 )
+// C4616/C4619 #pragma warning warnings
+// C4061 numerator 'identifier' in switch of enum 'enumeration' is not explicitly handled by a case label
+// C4265 class has virtual functions, but destructor is not virtual
+// C4626 assignment operator was implicitly defined as deleted
+// C5039 pointer or reference to potentially throwing function passed to extern C function under - EHc
+#endif
+
+#include <d3d12.h>
+
+#include "DirectXSH.h"
+
+#include <DirectXPackedVector.h>
+
+#include <cassert>
+#include <memory>
+#include <malloc.h>
+
+#include <wrl/client.h>
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wcovered-switch-default"
+#pragma clang diagnostic ignored "-Wswitch-enum"
+#pragma clang diagnostic ignored "-Wunknown-warning-option"
+#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
+
+using namespace DirectX;
+
+using Microsoft::WRL::ComPtr;
+
+namespace
+{
+    struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } };
+
+    using ScopedAlignedArrayXMVECTOR = std::unique_ptr<DirectX::XMVECTOR, aligned_deleter>;
+
+    //-------------------------------------------------------------------------------------
+    // This code is lifted from DirectXTex http://go.microsoft.com/fwlink/?LinkId=248926
+    // If you need additional DXGI format support, see DirectXTexConvert.cpp
+    //-------------------------------------------------------------------------------------
+#define LOAD_SCANLINE( type, func )\
+        if ( size >= sizeof(type) )\
+        {\
+            const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
+            for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
+            {\
+                if ( dPtr >= ePtr ) break;\
+                *(dPtr++) = func( sPtr++ );\
+            }\
+            return true;\
+        }\
+        return false;
+
+#define LOAD_SCANLINE3( type, func, defvec )\
+        if ( size >= sizeof(type) )\
+        {\
+            const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
+            for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
+            {\
+                XMVECTOR v = func( sPtr++ );\
+                if ( dPtr >= ePtr ) break;\
+                *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\
+            }\
+            return true;\
+        }\
+        return false;
+
+#define LOAD_SCANLINE2( type, func, defvec )\
+        if ( size >= sizeof(type) )\
+        {\
+            const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
+            for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
+            {\
+                XMVECTOR v = func( sPtr++ );\
+                if ( dPtr >= ePtr ) break;\
+                *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\
+            }\
+            return true;\
+        }\
+        return false;
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 6101)
+#endif
+    _Success_(return)
+        bool LoadScanline(
+            _Out_writes_(count) DirectX::XMVECTOR* pDestination,
+            size_t count,
+            _In_reads_bytes_(size) LPCVOID pSource,
+            size_t size,
+            DXGI_FORMAT format)
+    {
+        assert(pDestination && count > 0 && ((reinterpret_cast<uintptr_t>(pDestination) & 0xF) == 0));
+        assert(pSource && size > 0);
+
+        using namespace DirectX::PackedVector;
+
+        XMVECTOR* __restrict dPtr = pDestination;
+        if (!dPtr)
+            return false;
+
+        const XMVECTOR* ePtr = pDestination + count;
+
+        switch (format)
+        {
+        case DXGI_FORMAT_R32G32B32A32_FLOAT:
+        {
+            size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size;
+            memcpy_s(dPtr, sizeof(XMVECTOR)*count, pSource, msize);
+        }
+        return true;
+
+        case DXGI_FORMAT_R32G32B32_FLOAT:
+            LOAD_SCANLINE3(XMFLOAT3, XMLoadFloat3, g_XMIdentityR3)
+
+        case DXGI_FORMAT_R16G16B16A16_FLOAT:
+            LOAD_SCANLINE(XMHALF4, XMLoadHalf4)
+
+        case DXGI_FORMAT_R32G32_FLOAT:
+            LOAD_SCANLINE2(XMFLOAT2, XMLoadFloat2, g_XMIdentityR3)
+
+        case DXGI_FORMAT_R11G11B10_FLOAT:
+            LOAD_SCANLINE3(XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3)
+
+        case DXGI_FORMAT_R16G16_FLOAT:
+            LOAD_SCANLINE2(XMHALF2, XMLoadHalf2, g_XMIdentityR3)
+
+        case DXGI_FORMAT_R32_FLOAT:
+            if (size >= sizeof(float))
+            {
+                const float* __restrict sPtr = reinterpret_cast<const float*>(pSource);
+                for (size_t icount = 0; icount < size; icount += sizeof(float))
+                {
+                    XMVECTOR v = XMLoadFloat(sPtr++);
+                    if (dPtr >= ePtr) break;
+                    *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000);
+                }
+                return true;
+            }
+            return false;
+
+        case DXGI_FORMAT_R16_FLOAT:
+            if (size >= sizeof(HALF))
+            {
+                const HALF * __restrict sPtr = reinterpret_cast<const HALF*>(pSource);
+                for (size_t icount = 0; icount < size; icount += sizeof(HALF))
+                {
+                    if (dPtr >= ePtr) break;
+                    *(dPtr++) = XMVectorSet(XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f);
+                }
+                return true;
+            }
+            return false;
+
+        default:
+            return false;
+        }
+    }
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+} // namespace anonymous
+
+//-------------------------------------------------------------------------------------
+// Projects a function represented in a cube map into spherical harmonics.
+//
+// http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx
+//-------------------------------------------------------------------------------------
+_Use_decl_annotations_
+HRESULT DirectX::SHProjectCubeMap(
+    size_t order,
+    const D3D12_RESOURCE_DESC& desc,
+    const D3D12_SUBRESOURCE_DATA cubeMap[6],
+    float *resultR,
+    float *resultG,
+    float *resultB) noexcept
+{
+    if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER)
+        return E_INVALIDARG;
+
+    if (desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D
+        || (desc.DepthOrArraySize != 6)
+        || (desc.Width != desc.Height)
+        || (desc.SampleDesc.Count > 1))
+        return E_FAIL;
+
+    switch (desc.Format)
+    {
+    case DXGI_FORMAT_R32G32B32A32_FLOAT:
+    case DXGI_FORMAT_R32G32B32_FLOAT:
+    case DXGI_FORMAT_R16G16B16A16_FLOAT:
+    case DXGI_FORMAT_R32G32_FLOAT:
+    case DXGI_FORMAT_R11G11B10_FLOAT:
+    case DXGI_FORMAT_R16G16_FLOAT:
+    case DXGI_FORMAT_R32_FLOAT:
+    case DXGI_FORMAT_R16_FLOAT:
+        // See LoadScanline to support more pixel formats
+        break;
+
+    default:
+        return E_FAIL;
+    }
+
+    //--- Setup for SH projection
+    ScopedAlignedArrayXMVECTOR scanline(reinterpret_cast<XMVECTOR*>(_aligned_malloc(static_cast<size_t>(sizeof(XMVECTOR)*desc.Width), 16)));
+    if (!scanline)
+        return E_OUTOFMEMORY;
+
+    assert(desc.Width > 0);
+    float fSize = static_cast<float>(desc.Width);
+    float fPicSize = 1.0f / fSize;
+
+    // index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w
+    // linear function x*S +B, 1st constraint means B is (-1+1/W), plug into
+    // second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did 
+    // this was incorrect - but only for computing the differential solid
+    // angle, where the final value was 1.0 instead of 1-1/w...
+
+    float fB = -1.0f + 1.0f / fSize;
+    float fS = (desc.Width > 1) ? (2.0f*(1.0f - 1.0f / fSize) / (fSize - 1.0f)) : 0.f;
+
+    // clear out accumulation variables
+    float fWt = 0.0f;
+
+    if (resultR)
+        memset(resultR, 0, sizeof(float)*order*order);
+    if (resultG)
+        memset(resultG, 0, sizeof(float)*order*order);
+    if (resultB)
+        memset(resultB, 0, sizeof(float)*order*order);
+
+    float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};
+    float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};
+
+    //--- Process each face of the cubemap
+    for (UINT face = 0; face < 6; ++face)
+    {
+        if (!cubeMap[face].pData)
+            return E_POINTER;
+
+        const uint8_t *pSrc = reinterpret_cast<const uint8_t*>(cubeMap[face].pData);
+        for (UINT y = 0; y < desc.Height; ++y)
+        {
+            XMVECTOR* ptr = scanline.get();
+            if (!LoadScanline(ptr, static_cast<size_t>(desc.Width), pSrc, static_cast<size_t>(cubeMap[face].RowPitch), desc.Format))
+            {
+                return E_FAIL;
+            }
+
+            const float v = float(y) * fS + fB;
+
+            XMVECTOR* pixel = ptr;
+            for (UINT x = 0; x < desc.Width; ++x, ++pixel)
+            {
+                const float u = float(x) * fS + fB;
+
+                float ix, iy, iz;
+                switch (face)
+                {
+                case 0: // Positive X
+                    iz = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;
+                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
+                    ix = 1.0f;
+                    break;
+
+                case 1: // Negative X
+                    iz = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
+                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
+                    ix = -1;
+                    break;
+
+                case 2: // Positive Y
+                    iz = -1.0f + (2.0f * float(y) + 1.0f) * fPicSize;
+                    iy = 1.0f;
+                    ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
+                    break;
+
+                case 3: // Negative Y
+                    iz = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
+                    iy = -1.0f;
+                    ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
+                    break;
+
+                case 4: // Positive Z
+                    iz = 1.0f;
+                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
+                    ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
+                    break;
+
+                case 5: // Negative Z
+                    iz = -1.0f;
+                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
+                    ix = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;
+                    break;
+
+                default:
+                    ix = iy = iz = 0.f;
+                    assert(false);
+                    break;
+                }
+
+                XMVECTOR dir = XMVectorSet(ix, iy, iz, 0);
+                dir = XMVector3Normalize(dir);
+
+                const float fDiffSolid = 4.0f / ((1.0f + u * u + v * v)*sqrtf(1.0f + u * u + v * v));
+                fWt += fDiffSolid;
+
+                XMSHEvalDirection(shBuff, order, dir);
+
+                XMFLOAT3A clr;
+                XMStoreFloat3A(&clr, *pixel);
+
+                if (resultR) XMSHAdd(resultR, order, resultR, XMSHScale(shBuffB, order, shBuff, clr.x*fDiffSolid));
+                if (resultG) XMSHAdd(resultG, order, resultG, XMSHScale(shBuffB, order, shBuff, clr.y*fDiffSolid));
+                if (resultB) XMSHAdd(resultB, order, resultB, XMSHScale(shBuffB, order, shBuff, clr.z*fDiffSolid));
+            }
+
+            pSrc += cubeMap[face].RowPitch;
+        }
+    }
+
+    const float fNormProj = (4.0f*XM_PI) / fWt;
+
+    if (resultR) XMSHScale(resultR, order, resultR, fNormProj);
+    if (resultG) XMSHScale(resultG, order, resultG, fNormProj);
+    if (resultB) XMSHScale(resultB, order, resultB, fNormProj);
+
+    return S_OK;
+}
--- a/vendor/directxmath-3.19.0/Stereo3D/Stereo3DMatrixHelper.cpp
+++ b/vendor/directxmath-3.19.0/Stereo3D/Stereo3DMatrixHelper.cpp
@ -0,0 +1,257 @@
+//-------------------------------------------------------------------------------------
+// Stereo3DMatrixHelper.cpp -- SIMD C++ Math helper for Stereo 3D matricies
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//-------------------------------------------------------------------------------------
+
+#include "Stereo3DMatrixHelper.h"
+
+using namespace DirectX;
+
+namespace
+{
+    inline bool StereoProjectionHelper
+    (
+        const STEREO_PARAMETERS& stereoParameters,
+        _Out_ float* fVirtualProjection,
+        _Out_ float* zNearWidth,
+        _Out_ float* zNearHeight,
+        float FovAngleY,
+        float AspectRatio,
+        float NearZ
+    )
+    {
+        // note that most people have difficulty fusing images into 3D
+        // if the separation equals even just the human average. by 
+        // reducing the separation (interocular distance) by 1/2, we
+        // guarantee a larger subset of people will see full 3D
+
+        // the conservative setting should always be used. the only problem
+        // with the conservative setting is that the 3D effect will be less 
+        // impressive on smaller screens (which makes sense, since your eye
+        // cannot be tricked as easily based on the smaller fov). to simulate
+        // the effect of a larger screen, use the liberal settings (debug only)
+
+        // Conservative Settings: * max acuity angle: 0.8f degrees * interoc distance: 1.25 inches
+
+        // Liberal Settings: * max acuity angle: 1.6f degrees * interoc distance: 2.5f inches
+
+        // maximum visual accuity angle allowed is 3.2 degrees for 
+        // a physical scene, and 1.6 degrees for a virtual one. 
+        // thus we cannot allow an object to appear any closer to
+        // the viewer than 1.6 degrees (divided by two for most 
+        // half-angle calculations)
+
+        static const float fMaxStereoDistance = 780; // inches (should be between 10 and 20m)
+        static const float fMaxVisualAcuityAngle = 1.6f * (XM_PI / 180.0f);  // radians
+        static const float fInterocularDistance = 1.25f; // inches
+
+        float fDisplayHeight = stereoParameters.fDisplaySizeInches / sqrtf(AspectRatio * AspectRatio + 1.0f);
+        float fDisplayWidth = fDisplayHeight * AspectRatio;
+        float fHalfInterocular = 0.5f * fInterocularDistance * stereoParameters.fStereoExaggerationFactor;
+        float fHalfPixelWidth = fDisplayWidth / stereoParameters.fPixelResolutionWidth * 0.5f;
+        float fHalfMaximumAcuityAngle = fMaxVisualAcuityAngle * 0.5f * stereoParameters.fStereoExaggerationFactor;
+        // float fHalfWidth = fDisplayWidth * 0.5f;
+
+        float fMaxSeparationAcuityAngle = atanf(fHalfInterocular / fMaxStereoDistance);
+        float fMaxSeparationDistance = fHalfPixelWidth / tanf(fMaxSeparationAcuityAngle);
+        float fRefinedMaxStereoDistance = fMaxStereoDistance - fMaxSeparationDistance;
+        float fFovHalfAngle = FovAngleY / 2.0f;
+
+        bool ComfortableResult = true;
+        if (fRefinedMaxStereoDistance < 0.0f || fMaxSeparationDistance > 0.1f * fMaxStereoDistance)
+        {
+            // Pixel resolution is too low to offer a comfortable stereo experience
+            ComfortableResult = false;
+        }
+
+        float fRefinedMaxSeparationAcuityAngle = atanf(fHalfInterocular / (fRefinedMaxStereoDistance));
+        float fPhysicalZNearDistance = fHalfInterocular / tanf(fHalfMaximumAcuityAngle);
+        // float fScalingFactor = fHalfMaximumAcuityAngle / atanf(fHalfInterocular / stereoParameters.fViewerDistanceInches);
+
+        float fNearZSeparation = tanf(fRefinedMaxSeparationAcuityAngle) * (fRefinedMaxStereoDistance - fPhysicalZNearDistance);
+        // float fNearZSeparation2 = fHalfInterocular * (fRefinedMaxStereoDistance - fPhysicalZNearDistance) / fRefinedMaxStereoDistance;
+
+        (*zNearHeight) = cosf(fFovHalfAngle) / sinf(fFovHalfAngle);
+        (*zNearWidth) = (*zNearHeight) / AspectRatio;
+        (*fVirtualProjection) = (fNearZSeparation * NearZ * (*zNearWidth * 4.0f)) / (2.0f * NearZ);
+
+        return ComfortableResult;
+    }
+}
+
+//------------------------------------------------------------------------------
+
+void DirectX::StereoCreateDefaultParameters
+(
+    STEREO_PARAMETERS& stereoParameters
+)
+{
+    // Default assumption is 1920x1200 resolution, a 22" LCD monitor, and a 2' viewing distance
+    stereoParameters.fViewerDistanceInches = 24.0f;
+    stereoParameters.fPixelResolutionWidth = 1920.0f;
+    stereoParameters.fPixelResolutionHeight = 1200.0f;
+    stereoParameters.fDisplaySizeInches = 22.0f;
+
+    stereoParameters.fStereoSeparationFactor = 1.0f;
+    stereoParameters.fStereoExaggerationFactor = 1.0f;
+}
+
+//------------------------------------------------------------------------------
+
+XMMATRIX DirectX::StereoProjectionFovLH
+(
+    _In_opt_ const STEREO_PARAMETERS* pStereoParameters,
+    STEREO_CHANNEL Channel,
+    float FovAngleY,
+    float AspectRatio,
+    float NearZ,
+    float FarZ,
+    STEREO_MODE StereoMode
+)
+{
+    assert(Channel == STEREO_CHANNEL_LEFT || Channel == STEREO_CHANNEL_RIGHT);
+    assert(StereoMode == STEREO_MODE_NORMAL || StereoMode == STEREO_MODE_INVERTED);
+    assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
+    assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
+    assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+    STEREO_PARAMETERS DefaultParameters = {};
+    if (pStereoParameters == nullptr)
+    {
+        StereoCreateDefaultParameters(DefaultParameters);
+        pStereoParameters = &DefaultParameters;
+    }
+
+    assert(pStereoParameters->fStereoSeparationFactor >= 0.0f && pStereoParameters->fStereoSeparationFactor <= 1.0f);
+    assert(pStereoParameters->fStereoExaggerationFactor >= 1.0f && pStereoParameters->fStereoExaggerationFactor <= 2.0f);
+
+    float fVirtualProjection = 0.0f;
+    float zNearWidth = 0.0f;
+    float zNearHeight = 0.0f;
+    StereoProjectionHelper(*pStereoParameters, &fVirtualProjection, &zNearWidth, &zNearHeight, FovAngleY, AspectRatio, NearZ);
+
+    fVirtualProjection *= pStereoParameters->fStereoSeparationFactor; // incorporate developer defined bias
+
+    //
+    // By applying a translation, we are forcing our cameras to be parallel 
+    //
+
+    float fInvertedAngle = atanf(fVirtualProjection / (2.0f * NearZ));
+
+    XMMATRIX proj = XMMatrixPerspectiveFovLH(FovAngleY, AspectRatio, NearZ, FarZ);
+
+    XMMATRIX patchedProjection;
+    if (Channel == STEREO_CHANNEL_LEFT)
+    {
+        if (StereoMode > STEREO_MODE_NORMAL)
+        {
+            XMMATRIX rots = XMMatrixRotationY(fInvertedAngle);
+            XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0);
+            patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj);
+        }
+        else
+        {
+            XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0);
+            patchedProjection = XMMatrixMultiply(trans, proj);
+        }
+    }
+    else
+    {
+        if (StereoMode > STEREO_MODE_NORMAL)
+        {
+            XMMATRIX rots = XMMatrixRotationY(-fInvertedAngle);
+            XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0);
+            patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj);
+        }
+        else
+        {
+            XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0);
+            patchedProjection = XMMatrixMultiply(trans, proj);
+        }
+    }
+
+    return patchedProjection;
+}
+
+//------------------------------------------------------------------------------
+
+XMMATRIX DirectX::StereoProjectionFovRH
+(
+    _In_opt_ const STEREO_PARAMETERS* pStereoParameters,
+    STEREO_CHANNEL Channel,
+    float FovAngleY,
+    float AspectRatio,
+    float NearZ,
+    float FarZ,
+    STEREO_MODE StereoMode
+)
+{
+    assert(Channel == STEREO_CHANNEL_LEFT || Channel == STEREO_CHANNEL_RIGHT);
+    assert(StereoMode == STEREO_MODE_NORMAL || StereoMode == STEREO_MODE_INVERTED);
+    assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
+    assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
+    assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+    STEREO_PARAMETERS DefaultParameters = {};
+    if (pStereoParameters == nullptr)
+    {
+        StereoCreateDefaultParameters(DefaultParameters);
+        pStereoParameters = &DefaultParameters;
+    }
+
+    assert(pStereoParameters->fStereoSeparationFactor >= 0.0f && pStereoParameters->fStereoSeparationFactor <= 1.0f);
+    assert(pStereoParameters->fStereoExaggerationFactor >= 1.0f && pStereoParameters->fStereoExaggerationFactor <= 2.0f);
+
+    float fVirtualProjection = 0.0f;
+    float zNearWidth = 0.0f;
+    float zNearHeight = 0.0f;
+    StereoProjectionHelper(*pStereoParameters, &fVirtualProjection, &zNearWidth, &zNearHeight, FovAngleY, AspectRatio, NearZ);
+
+    fVirtualProjection *= pStereoParameters->fStereoSeparationFactor; // incorporate developer defined bias
+
+    //
+    // By applying a translation, we are forcing our cameras to be parallel 
+    //
+
+    float fInvertedAngle = atanf(fVirtualProjection / (2.0f * NearZ));
+
+    XMMATRIX proj = XMMatrixPerspectiveFovRH(FovAngleY, AspectRatio, NearZ, FarZ);
+
+    //
+    // By applying a translation, we are forcing our cameras to be parallel 
+    //
+
+    XMMATRIX patchedProjection;
+    if (Channel == STEREO_CHANNEL_LEFT)
+    {
+        if (StereoMode > STEREO_MODE_NORMAL)
+        {
+            XMMATRIX rots = XMMatrixRotationY(fInvertedAngle);
+            XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0);
+            patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj);
+        }
+        else
+        {
+            XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0);
+            patchedProjection = XMMatrixMultiply(trans, proj);
+        }
+    }
+    else
+    {
+        if (StereoMode > STEREO_MODE_NORMAL)
+        {
+            XMMATRIX rots = XMMatrixRotationY(-fInvertedAngle);
+            XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0);
+            patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj);
+        }
+        else
+        {
+            XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0);
+            patchedProjection = XMMatrixMultiply(trans, proj);
+        }
+    }
+
+    return patchedProjection;
+}
--- a/vendor/directxmath-3.19.0/Stereo3D/Stereo3DMatrixHelper.h
+++ b/vendor/directxmath-3.19.0/Stereo3D/Stereo3DMatrixHelper.h
@ -0,0 +1,64 @@
+//-------------------------------------------------------------------------------------
+// Stereo3DMatrixHelper.h -- SIMD C++ Math helper for Stereo 3D matrices
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "DirectXMath.h"
+
+namespace DirectX
+{
+    // Enumeration for stereo channels (left and right).
+    enum STEREO_CHANNEL
+    {
+        STEREO_CHANNEL_LEFT = 0,
+        STEREO_CHANNEL_RIGHT
+    };
+
+    // Enumeration for stereo mode (normal or inverted).
+    enum STEREO_MODE
+    {
+        STEREO_MODE_NORMAL = 0,
+        STEREO_MODE_INVERTED,
+    };
+
+    //------------------------------------------------------------------------------
+    //
+    // Stereo calibration settings
+    //
+    // * Viewer distance to the display
+    // * Physical display size
+    // * Render resolution
+    //
+    // The stereo separation factor indicates how much separation is between the left and right
+    // eyes.  0 is no separation, 1 is full separation. It defaults to 1.0.
+    //
+    // The debug stereo exaggeration factor indicates how much to increase the interocular spacing and
+    // maximum acuity angle from comfortable defaults.  For retail builds, this value should always
+    // be 1.0, but during development, on small screens, this value can be raised to up to 2.0 in
+    // order to exaggerate the 3D effect.  Values over 1.0 may cause discomfort on normal sized
+    // displays. It defaults to 1.0.
+    // 
+    struct STEREO_PARAMETERS
+    {
+        float fViewerDistanceInches;
+        float fDisplaySizeInches;
+        float fPixelResolutionWidth;
+        float fPixelResolutionHeight;
+        float fStereoSeparationFactor;
+        float fStereoExaggerationFactor;
+    };
+
+    void StereoCreateDefaultParameters(STEREO_PARAMETERS& stereoParameters);
+
+    XMMATRIX StereoProjectionFovLH(_In_opt_ const STEREO_PARAMETERS* pStereoParameters,
+        STEREO_CHANNEL Channel, float FovAngleY, float AspectRatio, float NearZ, float FarZ,
+        STEREO_MODE StereoMode = STEREO_MODE_NORMAL);
+
+    XMMATRIX StereoProjectionFovRH(_In_opt_ const STEREO_PARAMETERS* pStereoParameters,
+        STEREO_CHANNEL Channel, float FovAngleY, float AspectRatio, float NearZ, float FarZ,
+        STEREO_MODE StereoMode = STEREO_MODE_NORMAL);
+}
--- a/vendor/directxmath-3.19.0/XDSP/XDSP.h
+++ b/vendor/directxmath-3.19.0/XDSP/XDSP.h
@ -0,0 +1,880 @@
+//--------------------------------------------------------------------------------------
+// File: XDSP.h
+//
+// DirectXMath based Digital Signal Processing (DSP) functions for audio,
+// primarily Fast Fourier Transform (FFT)
+//
+// All buffer parameters must be 16-byte aligned
+//
+// All FFT functions support only single-precision floating-point audio
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615557
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#include <cassert>
+#include <DirectXMath.h>
+
+#include <cstdint>
+#include <cstring>
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 6001 6262)
+#endif
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunknown-warning-option"
+#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
+
+namespace XDSP
+{
+    using XMVECTOR = DirectX::XMVECTOR;
+    using FXMVECTOR = DirectX::FXMVECTOR;
+    using GXMVECTOR = DirectX::GXMVECTOR;
+    using CXMVECTOR = DirectX::CXMVECTOR;
+    using XMFLOAT4A = DirectX::XMFLOAT4A;
+
+    inline bool ISPOWEROF2(size_t n) { return (((n)&((n)-1)) == 0 && (n) != 0); }
+
+    // Parallel multiplication of four complex numbers, assuming real and imaginary values are stored in separate vectors.
+    inline void XM_CALLCONV vmulComplex(
+        _Out_ XMVECTOR& rResult, _Out_ XMVECTOR& iResult,
+        _In_ FXMVECTOR r1, _In_ FXMVECTOR i1, _In_ FXMVECTOR r2, _In_ GXMVECTOR i2) noexcept
+    {
+        using namespace DirectX;
+        // (r1, i1) * (r2, i2) = (r1r2 - i1i2, r1i2 + r2i1)
+        const XMVECTOR vr1r2 = XMVectorMultiply(r1, r2);
+        const XMVECTOR vr1i2 = XMVectorMultiply(r1, i2);
+        rResult = XMVectorNegativeMultiplySubtract(i1, i2, vr1r2); // real: (r1*r2 - i1*i2)
+        iResult = XMVectorMultiplyAdd(r2, i1, vr1i2); // imaginary: (r1*i2 + r2*i1)
+    }
+
+    inline void XM_CALLCONV vmulComplex(
+        _Inout_ XMVECTOR& r1, _Inout_ XMVECTOR& i1, _In_ FXMVECTOR r2, _In_ FXMVECTOR i2) noexcept
+    {
+        using namespace DirectX;
+        // (r1, i1) * (r2, i2) = (r1r2 - i1i2, r1i2 + r2i1)
+        const XMVECTOR vr1r2 = XMVectorMultiply(r1, r2);
+        const XMVECTOR vr1i2 = XMVectorMultiply(r1, i2);
+        r1 = XMVectorNegativeMultiplySubtract(i1, i2, vr1r2); // real: (r1*r2 - i1*i2)
+        i1 = XMVectorMultiplyAdd(r2, i1, vr1i2); // imaginary: (r1*i2 + r2*i1)
+    }
+
+    //----------------------------------------------------------------------------------
+    // Radix-4 decimation-in-time FFT butterfly.
+    // This version assumes that all four elements of the butterfly are
+    // adjacent in a single vector.
+    //
+    // Compute the product of the complex input vector and the
+    // 4-element DFT matrix:
+    //     | 1  1  1  1 |    | (r1X,i1X) |
+    //     | 1 -j -1  j |    | (r1Y,i1Y) |
+    //     | 1 -1  1 -1 |    | (r1Z,i1Z) |
+    //     | 1  j -1 -j |    | (r1W,i1W) |
+    //
+    // This matrix can be decomposed into two simpler ones to reduce the
+    // number of additions needed. The decomposed matrices look like this:
+    //     | 1  0  1  0 |    | 1  0  1  0 |
+    //     | 0  1  0 -j |    | 1  0 -1  0 |
+    //     | 1  0 -1  0 |    | 0  1  0  1 |
+    //     | 0  1  0  j |    | 0  1  0 -1 |
+    //
+    // Combine as follows:
+    //          | 1  0  1  0 |   | (r1X,i1X) |         | (r1X + r1Z, i1X + i1Z) |
+    // Temp   = | 1  0 -1  0 | * | (r1Y,i1Y) |       = | (r1X - r1Z, i1X - i1Z) |
+    //          | 0  1  0  1 |   | (r1Z,i1Z) |         | (r1Y + r1W, i1Y + i1W) |
+    //          | 0  1  0 -1 |   | (r1W,i1W) |         | (r1Y - r1W, i1Y - i1W) |
+    //
+    //          | 1  0  1  0 |   | (rTempX,iTempX) |   | (rTempX + rTempZ, iTempX + iTempZ) |
+    // Result = | 0  1  0 -j | * | (rTempY,iTempY) | = | (rTempY + iTempW, iTempY - rTempW) |
+    //          | 1  0 -1  0 |   | (rTempZ,iTempZ) |   | (rTempX - rTempZ, iTempX - iTempZ) |
+    //          | 0  1  0  j |   | (rTempW,iTempW) |   | (rTempY - iTempW, iTempY + rTempW) |
+    //----------------------------------------------------------------------------------
+    inline void ButterflyDIT4_1 (_Inout_ XMVECTOR& r1, _Inout_ XMVECTOR& i1) noexcept
+    {
+        using namespace DirectX;
+
+        // sign constants for radix-4 butterflies
+        static const XMVECTORF32 vDFT4SignBits1 = { { { 1.0f, -1.0f, 1.0f, -1.0f } } };
+        static const XMVECTORF32 vDFT4SignBits2 = { { { 1.0f, 1.0f, -1.0f, -1.0f } } };
+        static const XMVECTORF32 vDFT4SignBits3 = { { { 1.0f, -1.0f, -1.0f, 1.0f } } };
+
+        // calculating Temp
+        // [r1X| r1X|r1Y| r1Y] + [r1Z|-r1Z|r1W|-r1W]
+        // [i1X| i1X|i1Y| i1Y] + [i1Z|-i1Z|i1W|-i1W]
+        const XMVECTOR r1L = XMVectorSwizzle<0, 0, 1, 1>(r1);
+        const XMVECTOR r1H = XMVectorSwizzle<2, 2, 3, 3>(r1);
+
+        const XMVECTOR i1L = XMVectorSwizzle<0, 0, 1, 1>(i1);
+        const XMVECTOR i1H = XMVectorSwizzle<2, 2, 3, 3>(i1);
+
+        const XMVECTOR rTemp = XMVectorMultiplyAdd(r1H, vDFT4SignBits1, r1L);
+        const XMVECTOR iTemp = XMVectorMultiplyAdd(i1H, vDFT4SignBits1, i1L);
+
+        // calculating Result
+        const XMVECTOR rZrWiZiW = XMVectorPermute<2, 3, 6, 7>(rTemp, iTemp);   // [rTempZ|rTempW|iTempZ|iTempW]
+        const XMVECTOR rZiWrZiW = XMVectorSwizzle<0, 3, 0, 3>(rZrWiZiW);       // [rTempZ|iTempW|rTempZ|iTempW]
+        const XMVECTOR iZrWiZrW = XMVectorSwizzle<2, 1, 2, 1>(rZrWiZiW);       // [rTempZ|iTempW|rTempZ|iTempW]
+
+        // [rTempX| rTempY| rTempX| rTempY] + [rTempZ| iTempW|-rTempZ|-iTempW]
+        // [iTempX| iTempY| iTempX| iTempY] + // [iTempZ|-rTempW|-iTempZ| rTempW]
+        const XMVECTOR rTempL = XMVectorSwizzle<0, 1, 0, 1>(rTemp);
+        const XMVECTOR iTempL = XMVectorSwizzle<0, 1, 0, 1>(iTemp);
+
+        r1 = XMVectorMultiplyAdd(rZiWrZiW, vDFT4SignBits2, rTempL);
+        i1 = XMVectorMultiplyAdd(iZrWiZrW, vDFT4SignBits3, iTempL);
+    }
+
+    //----------------------------------------------------------------------------------
+    // Radix-4 decimation-in-time FFT butterfly.
+    // This version assumes that elements of the butterfly are
+    // in different vectors, so that each vector in the input
+    // contains elements from four different butterflies.
+    // The four separate butterflies are processed in parallel.
+    //
+    // The calculations here are the same as the ones in the single-vector
+    // radix-4 DFT, but instead of being done on a single vector (X,Y,Z,W)
+    // they are done in parallel on sixteen independent complex values.
+    // There is no interdependence between the vector elements:
+    // | 1  0  1  0 |    | (rIn0,iIn0) |               | (rIn0 + rIn2, iIn0 + iIn2) |
+    // | 1  0 -1  0 | *  | (rIn1,iIn1) |  =   Temp   = | (rIn0 - rIn2, iIn0 - iIn2) |
+    // | 0  1  0  1 |    | (rIn2,iIn2) |               | (rIn1 + rIn3, iIn1 + iIn3) |
+    // | 0  1  0 -1 |    | (rIn3,iIn3) |               | (rIn1 - rIn3, iIn1 - iIn3) |
+    //
+    //          | 1  0  1  0 |   | (rTemp0,iTemp0) |   | (rTemp0 + rTemp2, iTemp0 + iTemp2) |
+    // Result = | 0  1  0 -j | * | (rTemp1,iTemp1) | = | (rTemp1 + iTemp3, iTemp1 - rTemp3) |
+    //          | 1  0 -1  0 |   | (rTemp2,iTemp2) |   | (rTemp0 - rTemp2, iTemp0 - iTemp2) |
+    //          | 0  1  0  j |   | (rTemp3,iTemp3) |   | (rTemp1 - iTemp3, iTemp1 + rTemp3) |
+    //----------------------------------------------------------------------------------
+    inline void ButterflyDIT4_4(
+        _Inout_ XMVECTOR& r0,
+        _Inout_ XMVECTOR& r1,
+        _Inout_ XMVECTOR& r2,
+        _Inout_ XMVECTOR& r3,
+        _Inout_ XMVECTOR& i0,
+        _Inout_ XMVECTOR& i1,
+        _Inout_ XMVECTOR& i2,
+        _Inout_ XMVECTOR& i3,
+        _In_reads_(uStride * 4) const XMVECTOR* __restrict pUnityTableReal,
+        _In_reads_(uStride * 4) const XMVECTOR* __restrict pUnityTableImaginary,
+        _In_ size_t uStride,
+        _In_ const bool fLast) noexcept
+    {
+        using namespace DirectX;
+
+        assert(pUnityTableReal);
+        assert(pUnityTableImaginary);
+        assert(reinterpret_cast<uintptr_t>(pUnityTableReal) % 16 == 0);
+        assert(reinterpret_cast<uintptr_t>(pUnityTableImaginary) % 16 == 0);
+        assert(ISPOWEROF2(uStride));
+
+        // calculating Temp
+        const XMVECTOR rTemp0 = XMVectorAdd(r0, r2);
+        const XMVECTOR iTemp0 = XMVectorAdd(i0, i2);
+
+        const XMVECTOR rTemp2 = XMVectorAdd(r1, r3);
+        const XMVECTOR iTemp2 = XMVectorAdd(i1, i3);
+
+        const XMVECTOR rTemp1 = XMVectorSubtract(r0, r2);
+        const XMVECTOR iTemp1 = XMVectorSubtract(i0, i2);
+
+        const XMVECTOR rTemp3 = XMVectorSubtract(r1, r3);
+        const XMVECTOR iTemp3 = XMVectorSubtract(i1, i3);
+
+        XMVECTOR rTemp4 = XMVectorAdd(rTemp0, rTemp2);
+        XMVECTOR iTemp4 = XMVectorAdd(iTemp0, iTemp2);
+
+        XMVECTOR rTemp5 = XMVectorAdd(rTemp1, iTemp3);
+        XMVECTOR iTemp5 = XMVectorSubtract(iTemp1, rTemp3);
+
+        XMVECTOR rTemp6 = XMVectorSubtract(rTemp0, rTemp2);
+        XMVECTOR iTemp6 = XMVectorSubtract(iTemp0, iTemp2);
+
+        XMVECTOR rTemp7 = XMVectorSubtract(rTemp1, iTemp3);
+        XMVECTOR iTemp7 = XMVectorAdd(iTemp1, rTemp3);
+
+        // calculating Result
+        // vmulComplex(rTemp0, iTemp0, rTemp0, iTemp0, pUnityTableReal[0], pUnityTableImaginary[0]); // first one is always trivial
+        vmulComplex(rTemp5, iTemp5, pUnityTableReal[uStride], pUnityTableImaginary[uStride]);
+        vmulComplex(rTemp6, iTemp6, pUnityTableReal[uStride * 2], pUnityTableImaginary[uStride * 2]);
+        vmulComplex(rTemp7, iTemp7, pUnityTableReal[uStride * 3], pUnityTableImaginary[uStride * 3]);
+
+        if (fLast)
+        {
+            ButterflyDIT4_1(rTemp4, iTemp4);
+            ButterflyDIT4_1(rTemp5, iTemp5);
+            ButterflyDIT4_1(rTemp6, iTemp6);
+            ButterflyDIT4_1(rTemp7, iTemp7);
+        }
+
+        r0 = rTemp4;    i0 = iTemp4;
+        r1 = rTemp5;    i1 = iTemp5;
+        r2 = rTemp6;    i2 = iTemp6;
+        r3 = rTemp7;    i3 = iTemp7;
+    }
+
+    //==================================================================================
+    // F-U-N-C-T-I-O-N-S
+    //==================================================================================
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  4-sample FFT.
+    //
+    // PARAMETERS:
+    //  pReal      - [inout] real components, must have at least uCount elements
+    //  pImaginary - [inout] imaginary components, must have at least uCount elements
+    //  uCount     - [in]    number of FFT iterations
+    //----------------------------------------------------------------------------------
+    inline void FFT4(
+        _Inout_updates_(uCount) XMVECTOR* __restrict pReal,
+        _Inout_updates_(uCount) XMVECTOR* __restrict pImaginary,
+        const size_t uCount = 1) noexcept
+    {
+        assert(pReal);
+        assert(pImaginary);
+        assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
+        assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
+        assert(ISPOWEROF2(uCount));
+
+        for (size_t uIndex = 0; uIndex < uCount; ++uIndex)
+        {
+            ButterflyDIT4_1(pReal[uIndex], pImaginary[uIndex]);
+        }
+    }
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  8-sample FFT.
+    //
+    // PARAMETERS:
+    //  pReal      - [inout] real components, must have at least uCount*2 elements
+    //  pImaginary - [inout] imaginary components, must have at least uCount*2 elements
+    //  uCount     - [in]    number of FFT iterations
+    //----------------------------------------------------------------------------------
+    inline void FFT8(
+        _Inout_updates_(uCount * 2) XMVECTOR* __restrict pReal,
+        _Inout_updates_(uCount * 2) XMVECTOR* __restrict pImaginary,
+        _In_ const size_t uCount = 1) noexcept
+    {
+        using namespace DirectX;
+
+        assert(pReal);
+        assert(pImaginary);
+        assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
+        assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
+        assert(ISPOWEROF2(uCount));
+
+        static const XMVECTORF32 wr1 = { { { 1.0f, 0.70710677f, 0.0f, -0.70710677f } } };
+        static const XMVECTORF32 wi1 = { { { 0.0f, -0.70710677f, -1.0f, -0.70710677f } } };
+        static const XMVECTORF32 wr2 = { { { -1.0f, -0.70710677f, 0.0f, 0.70710677f } } };
+        static const XMVECTORF32 wi2 = { { { 0.0f, 0.70710677f, 1.0f, 0.70710677f } } };
+
+        for (size_t uIndex = 0; uIndex < uCount; ++uIndex)
+        {
+            XMVECTOR* __restrict pR = pReal + uIndex * 2;
+            XMVECTOR* __restrict pI = pImaginary + uIndex * 2;
+
+            XMVECTOR oddsR = XMVectorPermute<1, 3, 5, 7>(pR[0], pR[1]);
+            XMVECTOR evensR = XMVectorPermute<0, 2, 4, 6>(pR[0], pR[1]);
+            XMVECTOR oddsI = XMVectorPermute<1, 3, 5, 7>(pI[0], pI[1]);
+            XMVECTOR evensI = XMVectorPermute<0, 2, 4, 6>(pI[0], pI[1]);
+            ButterflyDIT4_1(oddsR, oddsI);
+            ButterflyDIT4_1(evensR, evensI);
+
+            XMVECTOR r, i;
+            vmulComplex(r, i, oddsR, oddsI, wr1, wi1);
+            pR[0] = XMVectorAdd(evensR, r);
+            pI[0] = XMVectorAdd(evensI, i);
+
+            vmulComplex(r, i, oddsR, oddsI, wr2, wi2);
+            pR[1] = XMVectorAdd(evensR, r);
+            pI[1] = XMVectorAdd(evensI, i);
+        }
+    }
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  16-sample FFT.
+    //
+    // PARAMETERS:
+    //  pReal      - [inout] real components, must have at least uCount*4 elements
+    //  pImaginary - [inout] imaginary components, must have at least uCount*4 elements
+    //  uCount     - [in]    number of FFT iterations
+    //----------------------------------------------------------------------------------
+    inline void FFT16(
+        _Inout_updates_(uCount * 4) XMVECTOR* __restrict pReal,
+        _Inout_updates_(uCount * 4) XMVECTOR* __restrict pImaginary,
+        _In_ const size_t uCount = 1) noexcept
+    {
+        using namespace DirectX;
+
+        assert(pReal);
+        assert(pImaginary);
+        assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
+        assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
+        assert(ISPOWEROF2(uCount));
+
+        static const XMVECTORF32 aUnityTableReal[4] = {
+            { { { 1.0f, 1.0f, 1.0f, 1.0f } } },
+            { { { 1.0f, 0.92387950f, 0.70710677f, 0.38268343f } } },
+            { { { 1.0f, 0.70710677f, -4.3711388e-008f, -0.70710677f } } },
+            { { { 1.0f, 0.38268343f, -0.70710677f, -0.92387950f } } }
+        };
+        static const XMVECTORF32 aUnityTableImaginary[4] =
+        {
+            { { { -0.0f, -0.0f, -0.0f, -0.0f } } },
+            { { { -0.0f, -0.38268343f, -0.70710677f, -0.92387950f } } },
+            { { { -0.0f, -0.70710677f, -1.0f, -0.70710677f } } },
+            { { { -0.0f, -0.92387950f, -0.70710677f, 0.38268343f } } }
+        };
+
+        for (size_t uIndex = 0; uIndex < uCount; ++uIndex)
+        {
+            ButterflyDIT4_4(pReal[uIndex * 4],
+                pReal[uIndex * 4 + 1],
+                pReal[uIndex * 4 + 2],
+                pReal[uIndex * 4 + 3],
+                pImaginary[uIndex * 4],
+                pImaginary[uIndex * 4 + 1],
+                pImaginary[uIndex * 4 + 2],
+                pImaginary[uIndex * 4 + 3],
+                reinterpret_cast<const XMVECTOR*>(aUnityTableReal),
+                reinterpret_cast<const XMVECTOR*>(aUnityTableImaginary),
+                1, true);
+        }
+    }
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  2^N-sample FFT.
+    //
+    // REMARKS:
+    //  For FFTs length 16 and below, call FFT16(), FFT8(), or FFT4().
+    //
+    // PARAMETERS:
+    //  pReal       - [inout] real components, must have at least (uLength*uCount)/4 elements
+    //  pImaginary  - [inout] imaginary components, must have at least (uLength*uCount)/4 elements
+    //  pUnityTable - [in]    unity table, must have at least uLength*uCount elements, see FFTInitializeUnityTable()
+    //  uLength     - [in]    FFT length in samples, must be a power of 2 > 16
+    //  uCount      - [in]    number of FFT iterations
+    //----------------------------------------------------------------------------------
+    inline void FFT (
+        _Inout_updates_((uLength * uCount) / 4) XMVECTOR* __restrict pReal,
+        _Inout_updates_((uLength * uCount) / 4) XMVECTOR* __restrict pImaginary,
+        _In_reads_(uLength * uCount) const XMVECTOR* __restrict pUnityTable,
+        _In_ const size_t uLength,
+        _In_ const size_t uCount = 1) noexcept
+    {
+        assert(pReal);
+        assert(pImaginary);
+        assert(pUnityTable);
+        assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
+        assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
+        assert(reinterpret_cast<uintptr_t>(pUnityTable) % 16 == 0);
+        assert(uLength > 16);
+        _Analysis_assume_(uLength > 16);
+        assert(ISPOWEROF2(uLength));
+        assert(ISPOWEROF2(uCount));
+
+        const XMVECTOR* __restrict pUnityTableReal = pUnityTable;
+        const XMVECTOR* __restrict pUnityTableImaginary = pUnityTable + (uLength >> 2);
+        const size_t uTotal              = uCount * uLength;
+        const size_t uTotal_vectors      = uTotal >> 2;
+        const size_t uStage_vectors      = uLength >> 2;
+        const size_t uStage_vectors_mask = uStage_vectors - 1;
+        const size_t uStride        = uLength >> 4; // stride between butterfly elements
+        const size_t uStrideMask    = uStride - 1;
+        const size_t uStride2       = uStride * 2;
+        const size_t uStride3       = uStride * 3;
+        const size_t uStrideInvMask = ~uStrideMask;
+
+        for (size_t uIndex=0; uIndex < (uTotal_vectors >> 2); ++uIndex)
+        {
+            const size_t n = ((uIndex & uStrideInvMask) << 2) + (uIndex & uStrideMask);
+            ButterflyDIT4_4(pReal[n],
+                            pReal[n + uStride],
+                            pReal[n + uStride2],
+                            pReal[n + uStride3],
+                            pImaginary[n ],
+                            pImaginary[n + uStride],
+                            pImaginary[n + uStride2],
+                            pImaginary[n + uStride3],
+                            pUnityTableReal + (n & uStage_vectors_mask),
+                            pUnityTableImaginary + (n & uStage_vectors_mask),
+                            uStride, false);
+        }
+
+        if (uLength > 16 * 4)
+        {
+            FFT(pReal, pImaginary, pUnityTable + (uLength >> 1), uLength >> 2, uCount * 4);
+        }
+        else if (uLength == 16 * 4)
+        {
+            FFT16(pReal, pImaginary, uCount * 4);
+        }
+        else if (uLength == 8 * 4)
+        {
+            FFT8(pReal, pImaginary, uCount * 4);
+        }
+        else if (uLength == 4 * 4)
+        {
+            FFT4(pReal, pImaginary, uCount * 4);
+        }
+    }
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  Initializes unity roots lookup table used by FFT functions.
+    //  Once initialized, the table need not be initialized again unless a
+    //  different FFT length is desired.
+    //
+    // REMARKS:
+    //  The unity tables of FFT length 16 and below are hard coded into the
+    //  respective FFT functions and so need not be initialized.
+    //
+    // PARAMETERS:
+    //  pUnityTable - [out] unity table, receives unity roots lookup table, must have at least uLength elements
+    //  uLength     - [in]  FFT length in frames, must be a power of 2 > 16
+    //----------------------------------------------------------------------------------
+    inline void FFTInitializeUnityTable (_Out_writes_(uLength) XMVECTOR* __restrict pUnityTable, _In_ size_t uLength) noexcept
+    {
+        using namespace DirectX;
+
+        assert(pUnityTable);
+        assert(uLength > 16);
+        _Analysis_assume_(uLength > 16);
+        assert(ISPOWEROF2(uLength));
+
+        // initialize unity table for recursive FFT lengths: uLength, uLength/4, uLength/16... > 16
+        // pUnityTable[0 to uLength*4-1] contains real components for current FFT length
+        // pUnityTable[uLength*4 to uLength*8-1] contains imaginary components for current FFT length
+        static const XMVECTORF32 vXM0123 = { { { 0.0f, 1.0f, 2.0f, 3.0f } } };
+        uLength >>= 2;
+        XMVECTOR vlStep = XMVectorReplicate(XM_PIDIV2 / float(uLength));
+        do
+        {
+            uLength >>= 2;
+            XMVECTOR vJP = vXM0123;
+            for (size_t j = 0; j < uLength; ++j)
+            {
+                XMVECTOR vSin, vCos;
+                XMVECTOR viJP, vlS;
+
+                pUnityTable[j] = g_XMOne;
+                pUnityTable[j + uLength * 4] = XMVectorZero();
+
+                vlS = XMVectorMultiply(vJP, vlStep);
+                XMVectorSinCos(&vSin, &vCos, vlS);
+                pUnityTable[j + uLength] = vCos;
+                pUnityTable[j + uLength * 5] = XMVectorMultiply(vSin, g_XMNegativeOne);
+
+                viJP = XMVectorAdd(vJP, vJP);
+                vlS = XMVectorMultiply(viJP, vlStep);
+                XMVectorSinCos(&vSin, &vCos, vlS);
+                pUnityTable[j + uLength * 2] = vCos;
+                pUnityTable[j + uLength * 6] = XMVectorMultiply(vSin, g_XMNegativeOne);
+
+                viJP = XMVectorAdd(viJP, vJP);
+                vlS = XMVectorMultiply(viJP, vlStep);
+                XMVectorSinCos(&vSin, &vCos, vlS);
+                pUnityTable[j + uLength * 3] = vCos;
+                pUnityTable[j + uLength * 7] = XMVectorMultiply(vSin, g_XMNegativeOne);
+
+                vJP = XMVectorAdd(vJP, g_XMFour);
+            }
+            vlStep = XMVectorMultiply(vlStep, g_XMFour);
+            pUnityTable += uLength * 8;
+        } while (uLength > 4);
+    }
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  The FFT functions generate output in bit reversed order.
+    //  Use this function to re-arrange them into order of increasing frequency.
+    //
+    // REMARKS:
+    //  Exponential values and bits correspond, so the reversed upper index can be omitted depending on the number of exponents.
+    //
+    // PARAMETERS:
+    //  pOutput     - [out] output buffer, receives samples in order of increasing frequency, cannot overlap pInput, must have at least (1<<uLog2Length)/4 elements
+    //  pInput      - [in]  input buffer, samples in bit reversed order as generated by FFT functions, cannot overlap pOutput, must have at least (1<<uLog2Length)/4 elements
+    //  uLog2Length - [in]  LOG (base 2) of FFT length in samples, must be >= 2
+    //----------------------------------------------------------------------------------
+    inline void FFTUnswizzle (
+        _Out_writes_((1 << uLog2Length) / 4) XMVECTOR* __restrict pOutput,
+        _In_reads_((1 << uLog2Length) / 4) const XMVECTOR* __restrict pInput,
+        _In_ const size_t uLog2Length) noexcept
+    {
+        assert(pOutput);
+        assert(pInput);
+        assert(uLog2Length >= 2);
+        _Analysis_assume_(uLog2Length >= 2);
+
+        float* __restrict pfOutput = reinterpret_cast<float*>(pOutput);
+        const size_t uLength = size_t(1) << (uLog2Length - 2);
+
+        static const unsigned char cSwizzleTable[256] = {
+            0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0, 0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0,
+            0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4, 0x24, 0x64, 0xA4, 0xE4, 0x34, 0x74, 0xB4, 0xF4,
+            0x08, 0x48, 0x88, 0xC8, 0x18, 0x58, 0x98, 0xD8, 0x28, 0x68, 0xA8, 0xE8, 0x38, 0x78, 0xB8, 0xF8,
+            0x0C, 0x4C, 0x8C, 0xCC, 0x1C, 0x5C, 0x9C, 0xDC, 0x2C, 0x6C, 0xAC, 0xEC, 0x3C, 0x7C, 0xBC, 0xFC,
+            0x01, 0x41, 0x81, 0xC1, 0x11, 0x51, 0x91, 0xD1, 0x21, 0x61, 0xA1, 0xE1, 0x31, 0x71, 0xB1, 0xF1,
+            0x05, 0x45, 0x85, 0xC5, 0x15, 0x55, 0x95, 0xD5, 0x25, 0x65, 0xA5, 0xE5, 0x35, 0x75, 0xB5, 0xF5,
+            0x09, 0x49, 0x89, 0xC9, 0x19, 0x59, 0x99, 0xD9, 0x29, 0x69, 0xA9, 0xE9, 0x39, 0x79, 0xB9, 0xF9,
+            0x0D, 0x4D, 0x8D, 0xCD, 0x1D, 0x5D, 0x9D, 0xDD, 0x2D, 0x6D, 0xAD, 0xED, 0x3D, 0x7D, 0xBD, 0xFD,
+            0x02, 0x42, 0x82, 0xC2, 0x12, 0x52, 0x92, 0xD2, 0x22, 0x62, 0xA2, 0xE2, 0x32, 0x72, 0xB2, 0xF2,
+            0x06, 0x46, 0x86, 0xC6, 0x16, 0x56, 0x96, 0xD6, 0x26, 0x66, 0xA6, 0xE6, 0x36, 0x76, 0xB6, 0xF6,
+            0x0A, 0x4A, 0x8A, 0xCA, 0x1A, 0x5A, 0x9A, 0xDA, 0x2A, 0x6A, 0xAA, 0xEA, 0x3A, 0x7A, 0xBA, 0xFA,
+            0x0E, 0x4E, 0x8E, 0xCE, 0x1E, 0x5E, 0x9E, 0xDE, 0x2E, 0x6E, 0xAE, 0xEE, 0x3E, 0x7E, 0xBE, 0xFE,
+            0x03, 0x43, 0x83, 0xC3, 0x13, 0x53, 0x93, 0xD3, 0x23, 0x63, 0xA3, 0xE3, 0x33, 0x73, 0xB3, 0xF3,
+            0x07, 0x47, 0x87, 0xC7, 0x17, 0x57, 0x97, 0xD7, 0x27, 0x67, 0xA7, 0xE7, 0x37, 0x77, 0xB7, 0xF7,
+            0x0B, 0x4B, 0x8B, 0xCB, 0x1B, 0x5B, 0x9B, 0xDB, 0x2B, 0x6B, 0xAB, 0xEB, 0x3B, 0x7B, 0xBB, 0xFB,
+            0x0F, 0x4F, 0x8F, 0xCF, 0x1F, 0x5F, 0x9F, 0xDF, 0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF
+        };
+        if ((uLog2Length & 1) == 0)
+        {
+            // even powers of two
+            const size_t uRev32 = 32 - uLog2Length;
+            for (size_t uIndex = 0; uIndex < uLength; ++uIndex)
+            {
+                XMFLOAT4A f4a;
+                XMStoreFloat4A(&f4a, pInput[uIndex]);
+                const size_t n = uIndex * 4;
+                const size_t uAddr = (static_cast<size_t>(cSwizzleTable[n & 0xff]) << 24) |
+                    (static_cast<size_t>(cSwizzleTable[(n >> 8) & 0xff]) << 16) |
+                    (static_cast<size_t>(cSwizzleTable[(n >> 16) & 0xff]) << 8) |
+                    (static_cast<size_t>(cSwizzleTable[(n >> 24)]));
+                pfOutput[uAddr >> uRev32] = f4a.x;
+                pfOutput[(0x40000000 | uAddr) >> uRev32] = f4a.y;
+                pfOutput[(0x80000000 | uAddr) >> uRev32] = f4a.z;
+                pfOutput[(0xC0000000 | uAddr) >> uRev32] = f4a.w;
+            }
+        }
+        else
+        {
+            // odd powers of two
+            const size_t uRev7 = size_t(1) << (uLog2Length - 3);
+            const size_t uRev32 = 32 - (uLog2Length - 3);
+            for (size_t uIndex = 0; uIndex < uLength; ++uIndex)
+            {
+                XMFLOAT4A f4a;
+                XMStoreFloat4A(&f4a, pInput[uIndex]);
+                const size_t n = (uIndex >> 1);
+                size_t uAddr = (((static_cast<size_t>(cSwizzleTable[n & 0xff]) << 24) |
+                    (static_cast<size_t>(cSwizzleTable[(n >> 8) & 0xff]) << 16) |
+                    (static_cast<size_t>(cSwizzleTable[(n >> 16) & 0xff]) << 8) |
+                    (static_cast<size_t>(cSwizzleTable[(n >> 24)]))) >> uRev32) |
+                    ((uIndex & 1) * uRev7 * 4);
+                pfOutput[uAddr] = f4a.x;
+                uAddr += uRev7;
+                pfOutput[uAddr] = f4a.y;
+                uAddr += uRev7;
+                pfOutput[uAddr] = f4a.z;
+                uAddr += uRev7;
+                pfOutput[uAddr] = f4a.w;
+            }
+        }
+    }
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  Convert complex components to polar form.
+    //
+    // PARAMETERS:
+    //  pOutput         - [out] output buffer, receives samples in polar form, must have at least uLength/4 elements
+    //  pInputReal      - [in]  input buffer (real components), must have at least uLength/4 elements
+    //  pInputImaginary - [in]  input buffer (imaginary components), must have at least uLength/4 elements
+    //  uLength         - [in]  FFT length in samples, must be a power of 2 >= 4
+    //----------------------------------------------------------------------------------
+#ifdef _MSC_VER
+#pragma warning(suppress: 6101)
+#endif
+    inline void FFTPolar(
+        _Out_writes_(uLength / 4) XMVECTOR* __restrict pOutput,
+        _In_reads_(uLength / 4) const XMVECTOR* __restrict pInputReal,
+        _In_reads_(uLength / 4) const XMVECTOR* __restrict pInputImaginary,
+        _In_ const size_t uLength) noexcept
+    {
+        using namespace DirectX;
+
+        assert(pOutput);
+        assert(pInputReal);
+        assert(pInputImaginary);
+        assert(uLength >= 4);
+        _Analysis_assume_(uLength >= 4);
+        assert(ISPOWEROF2(uLength));
+
+        const float flOneOverLength = 1.0f / float(uLength);
+
+        // result = sqrtf((real/uLength)^2 + (imaginary/uLength)^2) * 2
+        const XMVECTOR vOneOverLength = XMVectorReplicate(flOneOverLength);
+
+        for (size_t uIndex = 0; uIndex < (uLength >> 2); ++uIndex)
+        {
+            XMVECTOR vReal      = XMVectorMultiply(pInputReal[uIndex], vOneOverLength);
+            XMVECTOR vImaginary = XMVectorMultiply(pInputImaginary[uIndex], vOneOverLength);
+            XMVECTOR vRR        = XMVectorMultiply(vReal, vReal);
+            XMVECTOR vII        = XMVectorMultiply(vImaginary, vImaginary);
+            XMVECTOR vRRplusII  = XMVectorAdd(vRR, vII);
+            XMVECTOR vTotal     = XMVectorSqrt(vRRplusII);
+            pOutput[uIndex]     = XMVectorAdd(vTotal, vTotal);
+        }
+    }
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  Deinterleaves audio samples
+    //
+    // REMARKS:
+    //  For example, audio of the form [LRLRLR] becomes [LLLRRR].
+    //
+    // PARAMETERS:
+    //  pOutput       - [out] output buffer, receives samples in deinterleaved form, cannot overlap pInput, must have at least (uChannelCount*uFrameCount)/4 elements
+    //  pInput        - [in]  input buffer, cannot overlap pOutput, must have at least (uChannelCount*uFrameCount)/4 elements
+    //  uChannelCount - [in]  number of channels, must be > 1
+    //  uFrameCount   - [in]  number of frames of valid data, must be > 0
+    //----------------------------------------------------------------------------------
+    inline void Deinterleave (
+        _Out_writes_((uChannelCount * uFrameCount) / 4) XMVECTOR* __restrict pOutput,
+        _In_reads_((uChannelCount * uFrameCount) / 4) const XMVECTOR* __restrict pInput,
+        _In_ const size_t uChannelCount,
+        _In_ const size_t uFrameCount) noexcept
+    {
+        assert(pOutput);
+        assert(pInput);
+        assert(uChannelCount > 1);
+        assert(uFrameCount > 0);
+
+        float* __restrict pfOutput = reinterpret_cast<float* __restrict>(pOutput);
+        const float* __restrict pfInput  = reinterpret_cast<const float* __restrict>(pInput);
+
+        for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+        {
+            for (size_t uFrame = 0; uFrame < uFrameCount; ++uFrame)
+            {
+                pfOutput[uChannel * uFrameCount + uFrame] = pfInput[uFrame * uChannelCount + uChannel];
+            }
+        }
+    }
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  Interleaves audio samples
+    //
+    // REMARKS:
+    //  For example, audio of the form [LLLRRR] becomes [LRLRLR].
+    //
+    // PARAMETERS:
+    //  pOutput       - [out] output buffer, receives samples in interleaved form, cannot overlap pInput, must have at least (uChannelCount*uFrameCount)/4 elements
+    //  pInput        - [in]  input buffer, cannot overlap pOutput, must have at least (uChannelCount*uFrameCount)/4 elements
+    //  uChannelCount - [in]  number of channels, must be > 1
+    //  uFrameCount   - [in]  number of frames of valid data, must be > 0
+    //----------------------------------------------------------------------------------
+    inline void Interleave(
+        _Out_writes_((uChannelCount * uFrameCount) / 4) XMVECTOR* __restrict pOutput,
+        _In_reads_((uChannelCount * uFrameCount) / 4) const XMVECTOR* __restrict pInput,
+        _In_ const size_t uChannelCount,
+        _In_ const size_t uFrameCount) noexcept
+    {
+        assert(pOutput);
+        assert(pInput);
+        assert(uChannelCount > 1);
+        assert(uFrameCount > 0);
+
+        float* __restrict pfOutput = reinterpret_cast<float* __restrict>(pOutput);
+        const float* __restrict pfInput  = reinterpret_cast<const float* __restrict>(pInput);
+
+        for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+        {
+            for (size_t uFrame = 0; uFrame < uFrameCount; ++uFrame)
+            {
+                pfOutput[uFrame * uChannelCount + uChannel] = pfInput[uChannel * uFrameCount + uFrame];
+            }
+        }
+    }
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  This function applies a 2^N-sample FFT and unswizzles the result such
+    //  that the samples are in order of increasing frequency.
+    //  Audio is first deinterleaved if multichannel.
+    //
+    // PARAMETERS:
+    //  pReal         - [inout] real components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
+    //  pImaginary    - [out]   imaginary components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
+    //  pUnityTable   - [in]    unity table, must have at least (1<<uLog2Length) elements, see FFTInitializeUnityTable()
+    //  uChannelCount - [in]    number of channels, must be within [1, 6]
+    //  uLog2Length   - [in]    LOG (base 2) of FFT length in frames, must within [2, 9]
+    //----------------------------------------------------------------------------------
+    inline void FFTInterleaved(
+        _Inout_updates_(((1 << uLog2Length) * uChannelCount) / 4) XMVECTOR* __restrict pReal,
+        _Out_writes_(((1 << uLog2Length) * uChannelCount) / 4) XMVECTOR* __restrict pImaginary,
+        _In_reads_(1 << uLog2Length) const XMVECTOR* __restrict pUnityTable,
+        _In_ const size_t uChannelCount,
+        _In_ const size_t uLog2Length) noexcept
+    {
+        assert(pReal);
+        assert(pImaginary);
+        assert(pUnityTable);
+        assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
+        assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
+        assert(reinterpret_cast<uintptr_t>(pUnityTable) % 16 == 0);
+        assert(uChannelCount > 0 && uChannelCount <= 6);
+        assert(uLog2Length >= 2 && uLog2Length <= 9);
+
+        XM_ALIGNED_DATA(16) XMVECTOR vRealTemp[768];
+        XM_ALIGNED_DATA(16) XMVECTOR vImaginaryTemp[768];
+        const size_t uLength = size_t(1) << uLog2Length;
+
+        if (uChannelCount > 1)
+        {
+            Deinterleave(vRealTemp, pReal, uChannelCount, uLength);
+        }
+        else
+        {
+            memcpy_s(vRealTemp, sizeof(vRealTemp), pReal, (uLength >> 2) * sizeof(XMVECTOR));
+        }
+
+        memset(vImaginaryTemp, 0, (uChannelCount * (uLength >> 2)) * sizeof(XMVECTOR));
+
+        if (uLength > 16)
+        {
+            for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+            {
+                FFT(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)], pUnityTable, uLength);
+            }
+        }
+        else if (uLength == 16)
+        {
+            for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+            {
+                FFT16(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
+            }
+        }
+        else if (uLength == 8)
+        {
+            for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+            {
+                FFT8(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
+            }
+        }
+        else if (uLength == 4)
+        {
+            for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+            {
+                FFT4(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
+            }
+        }
+
+        for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+        {
+            FFTUnswizzle(&pReal[uChannel * (uLength >> 2)], &vRealTemp[uChannel * (uLength >> 2)], uLog2Length);
+            FFTUnswizzle(&pImaginary[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)], uLog2Length);
+        }
+    }
+
+    //----------------------------------------------------------------------------------
+    // DESCRIPTION:
+    //  This function applies a 2^N-sample inverse FFT.
+    //  Audio is interleaved if multichannel.
+    //
+    // PARAMETERS:
+    //  pReal         - [inout] real components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
+    //  pImaginary    - [in]    imaginary components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
+    //  pUnityTable   - [in]    unity table, must have at least (1<<uLog2Length) elements, see FFTInitializeUnityTable()
+    //  uChannelCount - [in]    number of channels, must be > 0
+    //  uLog2Length   - [in]    LOG (base 2) of FFT length in frames, must within [2, 9]
+    //----------------------------------------------------------------------------------
+    inline void IFFTDeinterleaved(
+        _Inout_updates_(((1 << uLog2Length) * uChannelCount) / 4) XMVECTOR* __restrict pReal,
+        _In_reads_(((1 << uLog2Length) * uChannelCount) / 4) const XMVECTOR* __restrict pImaginary,
+        _In_reads_(1 << uLog2Length) const XMVECTOR* __restrict pUnityTable,
+        _In_ const size_t uChannelCount,
+        _In_ const size_t uLog2Length) noexcept
+    {
+        using namespace DirectX;
+
+        assert(pReal);
+        assert(pImaginary);
+        assert(pUnityTable);
+        assert(reinterpret_cast<uintptr_t>(pReal) % 16 == 0);
+        assert(reinterpret_cast<uintptr_t>(pImaginary) % 16 == 0);
+        assert(reinterpret_cast<uintptr_t>(pUnityTable) % 16 == 0);
+        assert(uChannelCount > 0 && uChannelCount <= 6);
+        _Analysis_assume_(uChannelCount > 0 && uChannelCount <= 6);
+        assert(uLog2Length >= 2 && uLog2Length <= 9);
+        _Analysis_assume_(uLog2Length >= 2 && uLog2Length <= 9);
+
+        XM_ALIGNED_DATA(16) XMVECTOR vRealTemp[768] = {};
+        XM_ALIGNED_DATA(16) XMVECTOR vImaginaryTemp[768] = {};
+
+        const size_t uLength = size_t(1) << uLog2Length;
+
+        const XMVECTOR vRnp = XMVectorReplicate(1.0f / float(uLength));
+        const XMVECTOR vRnm = XMVectorReplicate(-1.0f / float(uLength));
+        for (size_t u = 0; u < uChannelCount * (uLength >> 2); u++)
+        {
+            vRealTemp[u] = XMVectorMultiply(pReal[u], vRnp);
+            vImaginaryTemp[u] = XMVectorMultiply(pImaginary[u], vRnm);
+        }
+
+        if (uLength > 16)
+        {
+            for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+            {
+                FFT(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)], pUnityTable, uLength);
+            }
+        }
+        else if (uLength == 16)
+        {
+            for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+            {
+                FFT16(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
+            }
+        }
+        else if (uLength == 8)
+        {
+            for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+            {
+                FFT8(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
+            }
+        }
+        else if (uLength == 4)
+        {
+            for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+            {
+                FFT4(&vRealTemp[uChannel * (uLength >> 2)], &vImaginaryTemp[uChannel * (uLength >> 2)]);
+            }
+        }
+
+        for (size_t uChannel = 0; uChannel < uChannelCount; ++uChannel)
+        {
+            FFTUnswizzle(&vImaginaryTemp[uChannel * (uLength >> 2)], &vRealTemp[uChannel * (uLength >> 2)], uLog2Length);
+        }
+
+        if (uChannelCount > 1)
+        {
+            Interleave(pReal, vImaginaryTemp, uChannelCount, uLength);
+        }
+        else
+        {
+            memcpy_s(pReal, uLength * uChannelCount * sizeof(float), vImaginaryTemp, (uLength >> 2) * sizeof(XMVECTOR));
+        }
+    }
+
+} // namespace XDSP
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
--- a/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-CMake-Dev17.yml
+++ b/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-CMake-Dev17.yml
@ -0,0 +1,121 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+# http://go.microsoft.com/fwlink/?LinkID=615560
+
+# Builds the library and test suite using CMake.
+
+schedules:
+- cron: "0 0 * * *"
+  displayName: 'Nightly build'
+  branches:
+    include:
+    - main
+
+trigger: none
+pr: none
+
+resources:
+  repositories:
+  - repository: self
+    type: git
+    ref: refs/heads/main
+
+name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
+
+variables:
+  VS_GENERATOR: 'Visual Studio 17 2022'
+  WIN10_SDK: '10.0.19041.0'
+  WIN11_SDK: '10.0.22000.0'
+  GITHUB_PAT: $(GITHUBPUBLICTOKEN)
+
+pool:
+  vmImage: windows-2022
+
+jobs:
+- job: CMAKE_BUILD
+  displayName: CMake using VS Generator BUILD_TESTING=ON
+  cancelTimeoutInMinutes: 1
+  steps:
+  - checkout: self
+    clean: true
+    fetchTags: false
+  - task: CmdLine@2
+    displayName: Fetch Tests
+    inputs:
+      script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
+  - task: CMake@1
+    displayName: 'CMake (MSVC): Config x64'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -B out -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
+  - task: CMake@1
+    displayName: 'CMake (MSVC): Build x64 Debug'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out -v --config Debug
+  - task: CMake@1
+    displayName: 'CMake (MSVC): Build x64 Release'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out -v --config RelWithDebInfo
+  - task: CMake@1
+    displayName: 'CMake (MSVC): Config x86'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A Win32 -B out2 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
+  - task: CMake@1
+    displayName: 'CMake (MSVC): Build x86 Debug'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out2 -v --config Debug
+  - task: CMake@1
+    displayName: 'CMake (MSVC): Build x86 Release'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out2 -v --config RelWithDebInfo
+  - task: CMake@1
+    displayName: 'CMake (MSVC): Config ARM64'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A ARM64 -B out3 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
+  - task: CMake@1
+    displayName: 'CMake (MSVC): Build ARM64 Debug'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out3 -v --config Debug
+  - task: CMake@1
+    displayName: 'CMake (MSVC): Build ARM64 Release'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out3 -v --config RelWithDebInfo
+  - task: CMake@1
+    displayName: 'CMake (ClangCl): Config x64'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -T clangcl -B out4 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
+  - task: CMake@1
+    displayName: 'CMake (ClangCl): Build x64 Debug'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out4 -v --config Debug
+  - task: CMake@1
+    displayName: 'CMake (ClangCl): Build x64 Release'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out4 -v --config RelWithDebInfo
+  - task: CMake@1
+    displayName: 'CMake (ClangCl): Config ARM64'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A ARM64 -T clangcl -B out5 -DCMAKE_SYSTEM_VERSION=$(WIN11_SDK)'
+  - task: CMake@1
+    displayName: 'CMake (ClangCl): Build ARM64 Debug'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out5 -v --config Debug
+  - task: CMake@1
+    displayName: 'CMake (ClangCl): Build ARM64 Release'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out5 -v --config RelWithDebInfo
--- a/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-CMake.yml
+++ b/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-CMake.yml
@ -0,0 +1,117 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+# http://go.microsoft.com/fwlink/?LinkID=615560
+
+# Builds the library and test suite using CMake.
+
+schedules:
+- cron: "0 0 * * *"
+  displayName: 'Nightly build'
+  branches:
+    include:
+    - main
+
+trigger:
+  branches:
+    include:
+    - main
+  paths:
+    include:
+    - CMakeLists.txt
+pr:
+  branches:
+    include:
+    - main
+  paths:
+    include:
+    - CMakeLists.txt
+
+resources:
+  repositories:
+  - repository: self
+    type: git
+    ref: refs/heads/main
+
+name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
+
+variables:
+  VS_GENERATOR: 'Visual Studio 16 2019'
+  WIN10_SDK: '10.0.19041.0'
+  GITHUB_PAT: $(GITHUBPUBLICTOKEN)
+
+pool:
+  vmImage: windows-2019
+
+jobs:
+- job: CMAKE_BUILD
+  displayName: CMake using VS Generator
+  cancelTimeoutInMinutes: 1
+  steps:
+  - checkout: self
+    clean: true
+    fetchTags: false
+  - task: CmdLine@2
+    displayName: Fetch Tests
+    inputs:
+      script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
+  - task: CMake@1
+    displayName: CMake (MSVC x64)
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -B out -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
+  - task: CMake@1
+    displayName: CMake (Build x64)
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out -v
+  - task: CMake@1
+    displayName: CMake Test (MSVC x64)
+    inputs:
+      cwd: Tests
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -B out -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
+  - task: CMake@1
+    displayName: CMake Test (Build x64)
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build out -v
+  - task: CMake@1
+    displayName: CMake (MSVC ARM64)
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A ARM64 -B out2 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
+  - task: CMake@1
+    displayName: CMake (Build ARM64)
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out2 -v
+  - task: CMake@1
+    displayName: CMake Test (MSVC ARM64)
+    inputs:
+      cwd: Tests
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A ARM64 -B out2 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
+  - task: CMake@1
+    displayName: CMake Test (Build ARM64)
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build out2 -v
+  - task: CMake@1
+    displayName: CMake (ClangCl)
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -T clangcl -B out3 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
+  - task: CMake@1
+    displayName: CMake (Build)
+    inputs:
+      cwd: '$(Build.SourcesDirectory)'
+      cmakeArgs: --build out3 -v
+  - task: CMake@1
+    displayName: CMake Test (ClangCL)
+    inputs:
+      cwd: Tests
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -T clangcl -B out3 -DCMAKE_SYSTEM_VERSION=$(WIN10_SDK)'
+  - task: CMake@1
+    displayName: CMake Test (Build)
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build out3 -v
--- a/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-Dev17.yml
+++ b/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-Dev17.yml
@ -0,0 +1,290 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+# http://go.microsoft.com/fwlink/?LinkID=615560
+
+# Builds the math3 test suite for DirectXMath.
+
+schedules:
+- cron: "0 0 * * *"
+  displayName: 'Nightly build'
+  branches:
+    include:
+    - main
+
+trigger: none
+pr: none
+
+resources:
+  repositories:
+  - repository: self
+    type: git
+    ref: refs/heads/main
+
+name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
+
+pool:
+  vmImage: windows-2022
+
+variables:
+  GITHUB_PAT: $(GITHUBPUBLICTOKEN)
+
+jobs:
+- job: BUILD_DEV17
+  displayName: 'Visual Studio 2022 (v143)'
+  cancelTimeoutInMinutes: 1
+  steps:
+  - checkout: self
+    clean: true
+    fetchTags: false
+  - task: DeleteFiles@1
+    displayName: Delete files from Tests
+    inputs:
+      SourceFolder: Tests
+      Contents: '**'
+      RemoveSourceFolder: true
+      RemoveDotFiles: true
+  - task: CmdLine@2
+    displayName: Fetch Tests
+    inputs:
+      script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86dbg
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86rel
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64dbg
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64rel
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln arm64dbg
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: ARM64
+      configuration: Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln arm64rel
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: ARM64
+      configuration: Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86dbg sse3
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: SSE3 Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86rel sse3
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: SSE3 Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64dbg sse3
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: SSE3 Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64rel sse3
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: SSE3 Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86dbg sse4
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: SSE4 Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86rel sse4
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: SSE4 Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64dbg sse4
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: SSE4 Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64rel sse4
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: SSE4 Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86dbg avx
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: AVX Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86rel avx
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: AVX Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64dbg avx
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: AVX Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64rel avx
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: AVX Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86dbg avx2
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: AVX2 Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86rel avx2
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: AVX2 Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64dbg avx2
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: AVX2 Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64rel avx2
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: AVX2 Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86dbg nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: NI Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86rel nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: NI Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64dbg nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: NI Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x64rel nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x64
+      configuration: NI Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln arm64dbg nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: ARM64
+      configuration: NI Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln arm86rel nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: ARM64
+      configuration: NI Release
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86dbg x87
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: x87 Debug
+      msbuildArchitecture: x64
+  - task: VSBuild@1
+    displayName: Build solution math3_2022.sln x86rel x87
+    inputs:
+      solution: Tests/math3/math3_2022.sln
+      vsVersion: 17.0
+      platform: x86
+      configuration: x87 Release
+      msbuildArchitecture: x64
--- a/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-MinGW.yml
+++ b/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-MinGW.yml
@ -0,0 +1,166 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+# http://go.microsoft.com/fwlink/?LinkID=615560
+
+# Builds the library and test suite using the MinGW compiler.
+
+schedules:
+- cron: "0 0 * * *"
+  displayName: 'Nightly build'
+  branches:
+    include:
+    - main
+
+trigger:
+  branches:
+    include:
+    - main
+  paths:
+    exclude:
+    - README.md
+    - HISTORY.md
+    - SECURITY.md
+    - LICENSE
+pr:
+  branches:
+    include:
+    - main
+  paths:
+    exclude:
+    - README.md
+    - HISTORY.md
+    - SECURITY.md
+    - LICENSE
+  drafts: false
+
+resources:
+  repositories:
+  - repository: self
+    type: git
+    ref: refs/heads/main
+
+name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
+
+pool:
+  vmImage: windows-2022
+
+variables:
+  GITHUB_PAT: $(GITHUBPUBLICTOKEN)
+  URL_MINGW32: https://github.com/brechtsanders/winlibs_mingw/releases/download/12.2.0-14.0.6-10.0.0-ucrt-r2/winlibs-i686-posix-dwarf-gcc-12.2.0-llvm-14.0.6-mingw-w64ucrt-10.0.0-r2.zip
+  HASH_MINGW32: 'fcd1e11b896190da01c83d5b5fb0d37b7c61585e53446c2dab0009debc3915e757213882c35e35396329338de6f0222ba012e23a5af86932db45186a225d1272'
+
+jobs:
+- job: MINGW32_BUILD
+  displayName: 'Minimalist GNU for Windows (MinGW32)'
+  steps:
+  - checkout: self
+    clean: true
+    fetchTags: false
+  - task: CmdLine@2
+    displayName: Fetch Tests
+    inputs:
+      script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
+      workingDirectory: $(Build.SourcesDirectory)
+  - task: PowerShell@2
+    displayName: Install MinGW32
+    inputs:
+      targetType: inline
+      script: |
+        $ProgressPreference = 'SilentlyContinue'
+        Write-Host "Downloading winlibs..."
+        Invoke-WebRequest -Uri "$(URL_MINGW32)" -OutFile "gw32.zip"
+        Write-Host "Downloaded."
+        $fileHash = Get-FileHash -Algorithm SHA512 gw32.zip | ForEach { $_.Hash} | Out-String
+        $filehash = $fileHash.Trim()
+        Write-Host "##[debug]SHA512: " $fileHash
+        if ($fileHash -ne '$(HASH_MINGW32)') {
+            Write-Error -Message "##[error]Computed hash does not match!" -ErrorAction Stop
+        }
+        Write-Host "Extracting winlibs..."
+        Expand-Archive -LiteralPath 'gw32.zip'
+        Write-Host "Extracted."
+        Write-Host "Added to path: $env:BUILD_SOURCESDIRECTORY\gw32\mingw32\bin"
+        Write-Host "##vso[task.prependpath]$env:BUILD_SOURCESDIRECTORY\gw32\mingw32\bin"
+
+      workingDirectory: $(Build.SourcesDirectory)
+  - task: CmdLine@2
+    displayName: GCC version
+    inputs:
+      script: g++ --version
+  - task: CMake@1
+    displayName: CMake (MinGW32) Dbg
+    inputs:
+      cwd: Tests
+      cmakeArgs: -B out -DCMAKE_BUILD_TYPE="Debug" -DDXMATH_ARCHITECTURE=x86 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
+  - task: CMake@1
+    displayName: CMake (MinGW32) Build Dbg
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build out
+  - task: CMake@1
+    displayName: CMake (MinGW32) Rel
+    inputs:
+      cwd: Tests
+      cmakeArgs: -B out2 -DCMAKE_BUILD_TYPE="RelWithDebInfo" -DDXMATH_ARCHITECTURE=x86 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
+  - task: CMake@1
+    displayName: CMake (MinGW32) Build Rel
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build out2
+  - task: CMake@1
+    displayName: CMake (MinGW32) Dbg NI
+    inputs:
+      cwd: Tests
+      cmakeArgs: -B out3 -DCMAKE_BUILD_TYPE="Debug" -DBUILD_NO_INTRINSICS=ON -DDXMATH_ARCHITECTURE=x86 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
+  - task: CMake@1
+    displayName: CMake (MinGW32) Build Dbg NI
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build out3
+
+- job: MINGW64_BUILD
+  displayName: 'Minimalist GNU for Windows (MinGW-W64) BUILD_TESTING=ON'
+  steps:
+  - checkout: self
+    clean: true
+    fetchTags: false
+  - task: CmdLine@2
+    displayName: Fetch Tests
+    inputs:
+      script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
+      workingDirectory: $(Build.SourcesDirectory)
+  - task: CmdLine@2
+    displayName: GCC version
+    inputs:
+      script: g++ --version
+  - task: CMake@1
+    displayName: CMake (MinGW-W64) Dbg
+    inputs:
+      cwd: Tests
+      cmakeArgs: -B out -DCMAKE_BUILD_TYPE="Debug" -DDXMATH_ARCHITECTURE=x64 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
+  - task: CMake@1
+    displayName: CMake (MinGW-W64) Build Dbg
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build out
+  - task: CMake@1
+    displayName: CMake (MinGW-W64) Rel
+    inputs:
+      cwd: Tests
+      cmakeArgs: -B out2 -DCMAKE_BUILD_TYPE="RelWithDebInfo" -DDXMATH_ARCHITECTURE=x64 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
+  - task: CMake@1
+    displayName: CMake (MinGW-W64) Build Rel
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build out2
+  - task: CMake@1
+    displayName: CMake (MinGW-W64) Dbg NI
+    inputs:
+      cwd: Tests
+      cmakeArgs: -B out3 -DCMAKE_BUILD_TYPE="Debug" -DBUILD_NO_INTRINSICS=ON -DDXMATH_ARCHITECTURE=x64 -DCMAKE_CXX_COMPILER="g++.exe" -G "MinGW Makefiles"
+  - task: CMake@1
+    displayName: CMake (MinGW-W64) Build Dbg NI
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build out3
--- a/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-WSL-11.yml
+++ b/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-WSL-11.yml
@ -0,0 +1,66 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+# http://go.microsoft.com/fwlink/?LinkID=615560
+
+# Builds the math3 test suite for Windows Subsystem for Linux (WSL)
+
+schedules:
+- cron: "0 3 * * *"
+  displayName: 'Nightly build'
+  branches:
+    include:
+    - main
+
+trigger: none
+pr: none
+
+resources:
+  repositories:
+  - repository: self
+    type: git
+    ref: refs/heads/main
+
+name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
+
+pool:
+  vmImage: ubuntu-22.04
+
+variables:
+  GITHUB_PAT: $(GITHUBPUBLICTOKEN)
+
+jobs:
+- job: BUILD_WSL
+  displayName: 'Windows Subsystem for Linux (WSL)'
+  steps:
+  - checkout: self
+    clean: true
+    fetchTags: false
+  - task: CmdLine@2
+    displayName: Fetch tests
+    inputs:
+      script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
+  - task: CMake@1
+    displayName: DirectXMath Tests
+    inputs:
+      cwd: Tests
+      cmakeArgs: .
+  - task: PowerShell@2
+    displayName: Fetch SAL.H
+    inputs:
+      targetType: inline
+      script: |
+        $ProgressPreference = 'SilentlyContinue'
+        Invoke-WebRequest -Uri https://raw.githubusercontent.com/dotnet/runtime/v8.0.1/src/coreclr/pal/inc/rt/sal.h -OutFile $(Build.SourcesDirectory)/Inc/sal.h
+        $fileHash = Get-FileHash -Algorithm SHA512 $(Build.SourcesDirectory)/Inc/sal.h | ForEach { $_.Hash} | Out-String
+        $filehash = $fileHash.Trim()
+        Write-Host "##[debug]SHA512: " $filehash
+        if ($fileHash -ne "0f5a80b97564217db2ba3e4624cc9eb308e19cc9911dae21d983c4ab37003f4756473297ba81b386c498514cedc1ef5a3553d7002edc09aeb6a1335df973095f") {
+            Write-Error -Message "##[error]Computed hash does not match!" -ErrorAction Stop
+        }
+
+  - task: CMake@1
+    displayName: DirectXMath Tests Build
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build . -v
--- a/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-WSL.yml
+++ b/vendor/directxmath-3.19.0/build/DirectXMath-GitHub-WSL.yml
@ -0,0 +1,85 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+# http://go.microsoft.com/fwlink/?LinkID=615560
+
+# Builds the math3 test suite for Windows Subsystem for Linux (WSL)
+
+schedules:
+- cron: "0 3 * * *"
+  displayName: 'Nightly build'
+  branches:
+    include:
+    - main
+
+trigger:
+  branches:
+    include:
+    - main
+  paths:
+    exclude:
+    - README.md
+    - HISTORY.md
+    - SECURITY.md
+    - LICENSE
+pr:
+  branches:
+    include:
+    - main
+  paths:
+    exclude:
+    - README.md
+    - HISTORY.md
+    - SECURITY.md
+    - LICENSE
+  drafts: false
+
+resources:
+  repositories:
+  - repository: self
+    type: git
+    ref: refs/heads/main
+
+name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
+
+pool:
+  vmImage: ubuntu-20.04
+
+variables:
+  GITHUB_PAT: $(GITHUBPUBLICTOKEN)
+
+jobs:
+- job: BUILD_WSL
+  displayName: 'Windows Subsystem for Linux (WSL)'
+  steps:
+  - checkout: self
+    clean: true
+    fetchTags: false
+  - task: CmdLine@2
+    displayName: Fetch tests
+    inputs:
+      script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
+  - task: CMake@1
+    displayName: DirectXMath Tests
+    inputs:
+      cwd: Tests
+      cmakeArgs: .
+  - task: PowerShell@2
+    displayName: Fetch SAL.H
+    inputs:
+      targetType: inline
+      script: |
+        $ProgressPreference = 'SilentlyContinue'
+        Invoke-WebRequest -Uri https://raw.githubusercontent.com/dotnet/runtime/v8.0.1/src/coreclr/pal/inc/rt/sal.h -OutFile $(Build.SourcesDirectory)/Inc/sal.h
+        $fileHash = Get-FileHash -Algorithm SHA512 $(Build.SourcesDirectory)/Inc/sal.h | ForEach { $_.Hash} | Out-String
+        $filehash = $fileHash.Trim()
+        Write-Host "##[debug]SHA512: " $filehash
+        if ($fileHash -ne "0f5a80b97564217db2ba3e4624cc9eb308e19cc9911dae21d983c4ab37003f4756473297ba81b386c498514cedc1ef5a3553d7002edc09aeb6a1335df973095f") {
+            Write-Error -Message "##[error]Computed hash does not match!" -ErrorAction Stop
+        }
+
+  - task: CMake@1
+    displayName: DirectXMath Tests Build
+    inputs:
+      cwd: Tests
+      cmakeArgs: --build . -v
--- a/vendor/directxmath-3.19.0/build/DirectXMath-GitHub.yml
+++ b/vendor/directxmath-3.19.0/build/DirectXMath-GitHub.yml
@ -0,0 +1,557 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+# http://go.microsoft.com/fwlink/?LinkID=615560
+
+# Builds the math3 test suite for DirectXMath.
+
+schedules:
+- cron: "0 0 * * *"
+  displayName: 'Nightly build'
+  branches:
+    include:
+    - main
+
+trigger:
+  branches:
+    include:
+    - main
+  paths:
+    exclude:
+    - README.md
+    - HISTORY.md
+    - SECURITY.md
+    - LICENSE
+pr:
+  branches:
+    include:
+    - main
+  paths:
+    exclude:
+    - README.md
+    - HISTORY.md
+    - SECURITY.md
+    - LICENSE
+  drafts: false
+
+resources:
+  repositories:
+  - repository: self
+    type: git
+    ref: refs/heads/main
+
+name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
+
+variables:
+  GITHUB_PAT: $(GITHUBPUBLICTOKEN)
+  Codeql.Enabled: true
+
+pool:
+  vmImage: windows-2019
+
+jobs:
+- job: BUILD_DEV16
+  displayName: 'Visual Studio 2019 (v142)'
+  cancelTimeoutInMinutes: 1
+  steps:
+  - checkout: self
+    clean: true
+    fetchTags: false
+  - task: DeleteFiles@1
+    displayName: Delete files from Tests
+    inputs:
+      SourceFolder: Tests
+      Contents: '**'
+      RemoveSourceFolder: true
+      RemoveDotFiles: true
+  - task: CmdLine@2
+    displayName: Fetch Tests
+    inputs:
+      script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86dbg
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86rel
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64dbg
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64rel
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln arm64dbg
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: ARM64
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln arm64rel
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: ARM64
+      configuration: Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86dbg sse3
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: SSE3 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86rel sse3
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: SSE3 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64dbg sse3
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: SSE3 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64rel sse3
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: SSE3 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86dbg sse4
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: SSE4 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86rel sse4
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: SSE4 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64dbg sse4
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: SSE4 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64rel sse4
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: SSE4 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86dbg avx
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: AVX Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86rel avx
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: AVX Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64dbg avx
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: AVX Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64rel avx
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: AVX Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86dbg avx2
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: AVX2 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86rel avx2
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: AVX2 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64dbg avx2
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: AVX2 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64rel avx2
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: AVX2 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86dbg nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: NI Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86rel nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: NI Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64dbg nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: NI Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x64rel nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: NI Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln arm64dbg nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: ARM64
+      configuration: NI Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln arm86rel nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: ARM64
+      configuration: NI Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86dbg x87
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: x87 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2019.sln x86rel x87
+    inputs:
+      solution: Tests/math3/math3_2019.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: x87 Release
+  - task: VSBuild@1
+    displayName: Build solution shmath_2019.sln x64dbg
+    inputs:
+      solution: Tests/shmath/shmath_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution shmath_2019.sln x64rel
+    inputs:
+      solution: Tests/shmath/shmath_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Release
+  - task: VSBuild@1
+    displayName: Build solution shmath_2019.sln arm64dbg
+    inputs:
+      solution: Tests/shmath/shmath_2019.sln
+      vsVersion: 16.0
+      platform: ARM64
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution shmath_2019.sln arm64rel
+    inputs:
+      solution: Tests/shmath/shmath_2019.sln
+      vsVersion: 16.0
+      platform: ARM64
+      configuration: Release
+  - task: VSBuild@1
+    displayName: Build solution XDSPTest_2019 x64dbg
+    inputs:
+      solution: Tests/xdsp/XDSPTest_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution XDSPTest_2019 x64rel
+    inputs:
+      solution: Tests/xdsp/XDSPTest_2019.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Release
+  - task: VSBuild@1
+    displayName: Build solution XDSPTest_2019 arm64dbg
+    inputs:
+      solution: Tests/xdsp/XDSPTest_2019.sln
+      vsVersion: 16.0
+      platform: ARM64
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution XDSPTest_2019 arm64rel
+    inputs:
+      solution: Tests/xdsp/XDSPTest_2019.sln
+      vsVersion: 16.0
+      platform: ARM64
+      configuration: Release
+
+- job: BUILD_DEV15
+  displayName: 'Visual Studio 2019 (v141)'
+  steps:
+  - checkout: self
+    clean: true
+    fetchTags: false
+  - task: CmdLine@2
+    displayName: Fetch Tests
+    inputs:
+      script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86dbg
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86rel
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64dbg
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64rel
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86dbg sse3
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: SSE3 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86rel sse3
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: SSE3 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64dbg sse3
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: SSE3 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64rel sse3
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: SSE3 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86dbg sse4
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: SSE4 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86rel sse4
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: SSE4 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64dbg sse4
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: SSE4 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64rel sse4
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: SSE4 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86dbg avx
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: AVX Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86rel avx
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: AVX Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64dbg avx
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: AVX Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64rel avx
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: AVX Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86dbg avx2
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: AVX2 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86rel avx2
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: AVX2 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64dbg avx2
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: AVX2 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64rel avx2
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: AVX2 Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86dbg nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: NI Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86rel nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: NI Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64dbg nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: NI Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x64rel nointrinsics
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: NI Release
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86dbg x87
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: x87 Debug
+  - task: VSBuild@1
+    displayName: Build solution math3_2017.sln x86rel x87
+    inputs:
+      solution: Tests/math3/math3_2017.sln
+      vsVersion: 16.0
+      platform: x86
+      configuration: x87 Release
+  - task: VSBuild@1
+    displayName: Build solution shmath_2017.sln x64dbg
+    inputs:
+      solution: Tests/shmath/shmath_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution shmath_2017.sln x64rel
+    inputs:
+      solution: Tests/shmath/shmath_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Release
+  - task: VSBuild@1
+    displayName: Build solution XDSPTest_2017 x64dbg
+    inputs:
+      solution: Tests/xdsp/XDSPTest_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Debug
+  - task: VSBuild@1
+    displayName: Build solution XDSPTest_2017 x64rel
+    inputs:
+      solution: Tests/xdsp/XDSPTest_2017.sln
+      vsVersion: 16.0
+      platform: x64
+      configuration: Release
--- a/vendor/directxmath-3.19.0/build/DirectXMath-SDL.yml
+++ b/vendor/directxmath-3.19.0/build/DirectXMath-SDL.yml
@ -0,0 +1,86 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+# http://go.microsoft.com/fwlink/?LinkID=615560
+
+# Runs various SDL recommended tools on the code.
+
+schedules:
+- cron: "0 3 * * 0,3,5"
+  displayName: 'Three times a week'
+  branches:
+    include:
+    - main
+
+trigger: none
+pr: none
+
+resources:
+  repositories:
+  - repository: self
+    type: git
+    ref: refs/heads/main
+
+name: $(Year:yyyy).$(Month).$(DayOfMonth)$(Rev:.r)
+
+variables:
+  VS_GENERATOR: 'Visual Studio 17 2022'
+  GITHUB_PAT: $(GITHUBPUBLICTOKEN)
+
+pool:
+  vmImage: windows-2022
+
+jobs:
+- job: SDL_BUILD
+  displayName: 'Build using required SDL tools'
+  workspace:
+    clean: all
+  steps:
+  - checkout: self
+    clean: true
+    fetchTags: false
+  - task: NodeTool@0
+    displayName: 'NPM install'
+    inputs:
+      versionSpec: 14.x
+  - task: securedevelopmentteam.vss-secure-development-tools.build-task-credscan.CredScan@3
+    displayName: 'Run Credential Scanner'
+    inputs:
+      debugMode: false
+      folderSuppression: false
+  - task: PoliCheck@2
+    displayName: 'Run PoliCheck'
+    inputs:
+      result: PoliCheck.xml
+  - task: CmdLine@2
+    displayName: Fetch Tests
+    inputs:
+      script: git clone --quiet --no-tags https://%GITHUB_PAT%@github.com/walbourn/directxmathtest.git Tests
+  - task: Armory@2
+    displayName: Run ARMory
+  - task: CMake@1
+    displayName: 'CMake (MSVC): Config x64'
+    inputs:
+      cwd: '$(Build.SourcesDirectory)/Tests/headertest'
+      cmakeArgs: '-G "$(VS_GENERATOR)" -A x64 -B out'
+  - task: CodeQL3000Init@0
+    inputs:
+      Enabled: true
+  - task: VSBuild@1
+    displayName: 'Build C++ with CodeQL'
+    inputs:
+      solution: '$(Build.SourcesDirectory)/Tests/headertest/out/headertest.sln'
+      vsVersion: 17.0
+      platform: x64
+      configuration: Release
+      msbuildArchitecture: x64
+  - task: CodeQL3000Finalize@0
+    condition: always()
+  - task: securedevelopmentteam.vss-secure-development-tools.build-task-postanalysis.PostAnalysis@2
+    displayName: 'Post Analysis'
+    inputs:
+      GdnBreakAllTools: true
+      GdnBreakPolicy: 'Microsoft'
+      GdnBreakPolicyMinSev: 'Error'
+  - task: ComponentGovernanceComponentDetection@0
+    displayName: Component Detection
--- a/vendor/directxmath-3.19.0/build/DirectXMath-config.cmake.in
+++ b/vendor/directxmath-3.19.0/build/DirectXMath-config.cmake.in
@ -0,0 +1,5 @@
+@PACKAGE_INIT@
+
+include(${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake)
+
+check_required_components("@PROJECT_NAME@")
--- a/vendor/directxmath-3.19.0/build/DirectXMath.pc.in
+++ b/vendor/directxmath-3.19.0/build/DirectXMath.pc.in
@ -0,0 +1,10 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+libdir=@DIRECTXMATH_LIBDIR_FOR_PKG_CONFIG@
+includedir=@DIRECTXMATH_INCLUDEDIR_FOR_PKG_CONFIG@
+
+Name: @PROJECT_NAME@
+Description: @PROJECT_DESCRIPTION@
+URL: @PROJECT_HOMEPAGE_URL@
+Version: @PROJECT_VERSION@
+Cflags: -I${includedir}
+Libs:
--- a/vendor/directxmath-3.19.0/build/JoinPaths.cmake
+++ b/vendor/directxmath-3.19.0/build/JoinPaths.cmake
@ -0,0 +1,23 @@
+# This module provides function for joining paths
+# known from most languages
+#
+# SPDX-License-Identifier: (MIT OR CC0-1.0)
+# Copyright 2020 Jan Tojnar
+# https://github.com/jtojnar/cmake-snips
+#
+# Modelled after Python’s os.path.join
+# https://docs.python.org/3.7/library/os.path.html#os.path.join
+# Windows not supported
+function(join_paths joined_path first_path_segment)
+    set(temp_path "${first_path_segment}")
+    foreach(current_segment IN LISTS ARGN)
+        if(NOT ("${current_segment}" STREQUAL ""))
+            if(IS_ABSOLUTE "${current_segment}")
+                set(temp_path "${current_segment}")
+            else()
+                set(temp_path "${temp_path}/${current_segment}")
+            endif()
+        endif()
+    endforeach()
+    set(${joined_path} "${temp_path}" PARENT_SCOPE)
+endfunction()