mame/3rdparty/bx/tests/simd_bench.cpp
Julian Sikorski 0837e7451a WIP: sync bgfx, bx and bimg with latest upstream (#5723)
* Sync with bgfx upstream revision b91d0b6

* Sync with bx upstream revision d60912b

* Sync with bimg upstream revision bd81f60

* Add astc-codec decoder

* Rename VertexDecl to VertexLayout

* Rename UniformType enum Int1 to Sampler.

* Add NVN stub

* Fix unused-const-variable error on macOS

* Drop redundant explicit language parameters
buildoptions_cpp are only applied to c++ files and buildoptions_objcpp are only
applied to objective c++ files. As such, hardcoding -x offers no benefit while
preventing overrides (such as one needed by 3rdparty/bgfx/src/renderer_vk.cpp on
macOS) from working.

* Re-introduce -x c++ in places where C code is compiled as C++ to prevent clang from throwing a warning

* Build bgfx as Objective-C++ on macOS
It is needed due to included headers

* Enable Direct3D12 and Vulkan bgfx rendering backends

* Enable building of spirv shaders

* Properly escape /c in cmd call

* Comment out dx12 bgfx renderer

* Honor VERBOSE setting during shaders build

* Only invert hlsl shader XYZ_TO_sRGB matrix for opengl

* Add spirv shaders

* OpenGL ES needs transposed matrix too

* Metal needs transposed matrix as well
2019-10-13 07:50:38 -04:00

134 lines
3.4 KiB
C++

/*
* Copyright 2010-2019 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bx#license-bsd-2-clause
*/
#include <bx/allocator.h>
#include <bx/rng.h>
#include <bx/simd_t.h>
#include <bx/timer.h>
#include <stdio.h>
static void flushCache()
{
static uint32_t length = 1 << 26;
static uint8_t* input = new uint8_t[length];
static uint8_t* output = new uint8_t[length];
bx::memCopy(output, input, length);
}
typedef bx::simd128_t (*SimdRsqrtFn)(bx::simd128_t _a);
template<SimdRsqrtFn simdRsqrtFn>
void simd_rsqrt_bench(bx::simd128_t* _dst, bx::simd128_t* _src, uint32_t _numVertices)
{
for (uint32_t ii = 0, num = _numVertices/4; ii < num; ++ii)
{
bx::simd128_t* ptr = &_src[ii*4];
bx::simd128_t tmp0 = bx::simd_ld(ptr + 0);
bx::simd128_t tmp1 = bx::simd_ld(ptr + 1);
bx::simd128_t tmp2 = bx::simd_ld(ptr + 2);
bx::simd128_t tmp3 = bx::simd_ld(ptr + 3);
bx::simd128_t rsqrt0 = simdRsqrtFn(tmp0);
bx::simd128_t rsqrt1 = simdRsqrtFn(tmp1);
bx::simd128_t rsqrt2 = simdRsqrtFn(tmp2);
bx::simd128_t rsqrt3 = simdRsqrtFn(tmp3);
ptr = &_dst[ii*4];
bx::simd_st(ptr + 0, rsqrt0);
bx::simd_st(ptr + 1, rsqrt1);
bx::simd_st(ptr + 2, rsqrt2);
bx::simd_st(ptr + 3, rsqrt3);
}
}
void simd_bench_pass(bx::simd128_t* _dst, bx::simd128_t* _src, uint32_t _numVertices)
{
const uint32_t numIterations = 10;
{
int64_t elapsed = 0;
for (uint32_t test = 0; test < numIterations; ++test)
{
flushCache();
elapsed += -bx::getHPCounter();
simd_rsqrt_bench<bx::simd_rsqrt_est>(_dst, _src, _numVertices);
elapsed += bx::getHPCounter();
}
printf(" simd_rsqrt_est: %15f\n", double(elapsed) );
}
{
int64_t elapsed = 0;
for (uint32_t test = 0; test < numIterations; ++test)
{
flushCache();
elapsed += -bx::getHPCounter();
simd_rsqrt_bench<bx::simd_rsqrt_nr>(_dst, _src, _numVertices);
elapsed += bx::getHPCounter();
}
printf(" simd_rsqrt_nr: %15f\n", double(elapsed) );
}
{
int64_t elapsed = 0;
for (uint32_t test = 0; test < numIterations; ++test)
{
flushCache();
elapsed += -bx::getHPCounter();
simd_rsqrt_bench<bx::simd_rsqrt_carmack>(_dst, _src, _numVertices);
elapsed += bx::getHPCounter();
}
printf("simd_rsqrt_carmack: %15f\n", double(elapsed) );
}
{
int64_t elapsed = 0;
for (uint32_t test = 0; test < numIterations; ++test)
{
flushCache();
elapsed += -bx::getHPCounter();
simd_rsqrt_bench<bx::simd_rsqrt>(_dst, _src, _numVertices);
elapsed += bx::getHPCounter();
}
printf(" simd_rsqrt: %15f\n", double(elapsed) );
}
}
void simd_bench()
{
bx::DefaultAllocator allocator;
bx::RngMwc rng;
const uint32_t numVertices = 1024*1024;
uint8_t* data = (uint8_t*)BX_ALIGNED_ALLOC(&allocator, 2*numVertices*sizeof(bx::simd128_t), 16);
bx::simd128_t* src = (bx::simd128_t*)data;
bx::simd128_t* dst = &src[numVertices];
printf("\n -- positive & negative --\n");
for (uint32_t ii = 0; ii < numVertices; ++ii)
{
float* ptr = (float*)&src[ii];
bx::store(ptr, bx::randUnitSphere(&rng) );
ptr[3] = 1.0f;
}
simd_bench_pass(dst, src, numVertices);
printf("\n -- positive only --\n");
for (uint32_t ii = 0; ii < numVertices; ++ii)
{
float* ptr = (float*)&src[ii];
ptr[0] = bx::abs(ptr[0]);
ptr[1] = bx::abs(ptr[1]);
ptr[2] = bx::abs(ptr[2]);
ptr[3] = bx::abs(ptr[3]);
}
simd_bench_pass(dst, src, numVertices);
BX_ALIGNED_FREE(&allocator, data, 16);
}