Introduce validity checks for RGB utilities and fix bugs uncovered [Vas Crabb]

* Added several missing functions to rgbgen
* Fixed logical shift right in rgbgen
* Fixed sra that should be sra_imm in rdptpipe
* Added some simple SSE4.1 optimisations in rgbsse
* Re-organised rgbsse, rgbvmx and rgbgen to be in more logical order
* Fixed return on some modifying operators
* Made some more reference parameters const
* Removed inline qualifier from a number of methods as it's implied when body is present at declaration
* Mark some constructors explicit
This commit is contained in:
Vas Crabb 2016-07-14 00:50:19 +10:00
parent 32873fc1dd
commit ac7c2ce2d4
8 changed files with 896 additions and 386 deletions

View File

@ -10,7 +10,10 @@
#include "emu.h" #include "emu.h"
#include "validity.h" #include "validity.h"
#include "emuopts.h" #include "emuopts.h"
#include "video/rgbutil.h"
#include <ctype.h> #include <ctype.h>
@ -49,6 +52,20 @@ inline int validity_checker::get_defstr_index(const char *string, bool suppress_
} }
//-------------------------------------------------
// random_u64
// random_s64
// random_u32
// random_s32
//-------------------------------------------------
#undef rand
inline INT32 validity_checker::random_i32() { return INT32(random_u32()); }
inline UINT32 validity_checker::random_u32() { return rand() ^ (rand() << 15); }
inline INT64 validity_checker::random_i64() { return INT64(random_u64()); }
inline UINT64 validity_checker::random_u64() { return UINT64(random_u32()) ^ (UINT64(random_u32()) << 30); }
//------------------------------------------------- //-------------------------------------------------
// validate_tag - ensure that the given tag // validate_tag - ensure that the given tag
// meets the general requirements // meets the general requirements
@ -182,6 +199,7 @@ bool validity_checker::check_all_matching(const char *string)
validate_begin(); validate_begin();
validate_core(); validate_core();
validate_inlines(); validate_inlines();
validate_rgb();
// if we had warnings or errors, output // if we had warnings or errors, output
if (m_errors > 0 || m_warnings > 0 || !m_verbose_text.empty()) if (m_errors > 0 || m_warnings > 0 || !m_verbose_text.empty())
@ -362,13 +380,12 @@ void validity_checker::validate_core()
void validity_checker::validate_inlines() void validity_checker::validate_inlines()
{ {
#undef rand volatile UINT64 testu64a = random_i64();
volatile UINT64 testu64a = rand() ^ (rand() << 15) ^ ((UINT64)rand() << 30) ^ ((UINT64)rand() << 45); volatile INT64 testi64a = random_i64();
volatile INT64 testi64a = rand() ^ (rand() << 15) ^ ((INT64)rand() << 30) ^ ((INT64)rand() << 45); volatile UINT32 testu32a = random_u32();
volatile UINT32 testu32a = rand() ^ (rand() << 15); volatile UINT32 testu32b = random_u32();
volatile UINT32 testu32b = rand() ^ (rand() << 15); volatile INT32 testi32a = random_i32();
volatile INT32 testi32a = rand() ^ (rand() << 15); volatile INT32 testi32b = random_i32();
volatile INT32 testi32b = rand() ^ (rand() << 15);
INT32 resulti32, expectedi32; INT32 resulti32, expectedi32;
UINT32 resultu32, expectedu32; UINT32 resultu32, expectedu32;
INT64 resulti64, expectedi64; INT64 resulti64, expectedi64;
@ -481,6 +498,545 @@ void validity_checker::validate_inlines()
} }
//-------------------------------------------------
// validate_rgb - validate optimised RGB utility
// class
//-------------------------------------------------
void validity_checker::validate_rgb()
{
/*
This performs cursory tests of most of the vector-optimised RGB
utilities, concentrating on the low-level maths. It uses random
values most of the time for a quick go/no-go indication rather
than trying to exercise edge cases. It doesn't matter too much
if the compiler optimises out some of the operations since it's
really intended to check for logic bugs in the vector code.
The following functions are not tested yet:
rgbaint_t()
clamp_and_clear(const UINT32)
sign_extend(const UINT32, const UINT32)
min(const INT32)
max(const INT32)
blend(const rgbaint_t&, UINT8)
scale_and_clamp(const rgbaint_t&)
scale_imm_and_clamp(const INT32)
scale2_add_and_clamp(const rgbaint_t&, const rgbaint_t&, const rgbaint_t&)
scale_add_and_clamp(const rgbaint_t&, const rgbaint_t&);
scale_imm_add_and_clamp(const INT32, const rgbaint_t&);
cmpeq(const rgbaint_t&)
cmpeq_imm(const INT32)
cmpeq_imm_rgba(const INT32, const INT32, const INT32, const INT32)
cmpgt(const rgbaint_t&)
cmpgt_imm(const INT32)
cmpgt_imm_rgba(const INT32, const INT32, const INT32, const INT32)
cmplt(const rgbaint_t&)
cmplt_imm(const INT32)
cmplt_imm_rgba(const INT32, const INT32, const INT32, const INT32)
static bilinear_filter(UINT32, UINT32, UINT32, UINT32, UINT8, UINT8)
bilinear_filter_rgbaint(UINT32, UINT32, UINT32, UINT32, UINT8, UINT8)
*/
volatile INT32 expected_a, expected_r, expected_g, expected_b;
volatile INT32 actual_a, actual_r, actual_g, actual_b;
volatile INT32 imm;
rgbaint_t rgb, other;
rgb_t packed;
auto check_expected = [&] (const char *desc)
{
const volatile INT32 a = rgb.get_a32();
const volatile INT32 r = rgb.get_r32();
const volatile INT32 g = rgb.get_g32();
const volatile INT32 b = rgb.get_b32();
if (a != expected_a) osd_printf_error("Error testing %s get_a32() = %d (expected %d)\n", desc, a, expected_a);
if (r != expected_r) osd_printf_error("Error testing %s get_r32() = %d (expected %d)\n", desc, r, expected_r);
if (g != expected_g) osd_printf_error("Error testing %s get_g32() = %d (expected %d)\n", desc, g, expected_g);
if (b != expected_b) osd_printf_error("Error testing %s get_b32() = %d (expected %d)\n", desc, b, expected_b);
};
// check set/get
expected_a = random_i32();
expected_r = random_i32();
expected_g = random_i32();
expected_b = random_i32();
rgb.set(expected_a, expected_r, expected_g, expected_b);
check_expected("rgbaint_t::set(a, r, g, b)");
// check construct/set
expected_a = random_i32();
expected_r = random_i32();
expected_g = random_i32();
expected_b = random_i32();
rgb.set(rgbaint_t(expected_a, expected_r, expected_g, expected_b));
check_expected("rgbaint_t::set(rgbaint_t)");
// check construct/assign
expected_a = random_i32();
expected_r = random_i32();
expected_g = random_i32();
expected_b = random_i32();
rgb = rgbaint_t(expected_a, expected_r, expected_g, expected_b);
check_expected("rgbaint_t assignment");
// check piecewise set
rgb.set_a(expected_a = random_i32());
check_expected("rgbaint_t::set_a");
rgb.set_r(expected_r = random_i32());
check_expected("rgbaint_t::set_r");
rgb.set_g(expected_g = random_i32());
check_expected("rgbaint_t::set_g");
rgb.set_b(expected_b = random_i32());
check_expected("rgbaint_t::set_b");
// test merge_alpha
expected_a = rand();
rgb.merge_alpha(rgbaint_t(expected_a, rand(), rand(), rand()));
check_expected("rgbaint_t::merge_alpha");
// test RGB addition (method)
expected_a += actual_a = random_i32();
expected_r += actual_r = random_i32();
expected_g += actual_g = random_i32();
expected_b += actual_b = random_i32();
rgb.add(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
check_expected("rgbaint_t::add");
// test RGB addition (operator)
expected_a += actual_a = random_i32();
expected_r += actual_r = random_i32();
expected_g += actual_g = random_i32();
expected_b += actual_b = random_i32();
rgb += rgbaint_t(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::operator+=");
// test offset addition (method)
imm = random_i32();
expected_a += imm;
expected_r += imm;
expected_g += imm;
expected_b += imm;
rgb.add_imm(imm);
check_expected("rgbaint_t::add_imm");
// test offset addition (operator)
imm = random_i32();
expected_a += imm;
expected_r += imm;
expected_g += imm;
expected_b += imm;
rgb += imm;
check_expected("rgbaint_t::operator+=");
// test immediate RGB addition
expected_a += actual_a = random_i32();
expected_r += actual_r = random_i32();
expected_g += actual_g = random_i32();
expected_b += actual_b = random_i32();
rgb.add_imm_rgba(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::add_imm_rgba");
// test RGB subtraction (method)
expected_a -= actual_a = random_i32();
expected_r -= actual_r = random_i32();
expected_g -= actual_g = random_i32();
expected_b -= actual_b = random_i32();
rgb.sub(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
check_expected("rgbaint_t::sub");
// test RGB subtraction (operator)
expected_a -= actual_a = random_i32();
expected_r -= actual_r = random_i32();
expected_g -= actual_g = random_i32();
expected_b -= actual_b = random_i32();
rgb -= rgbaint_t(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::operator-=");
// test offset subtraction
imm = random_i32();
expected_a -= imm;
expected_r -= imm;
expected_g -= imm;
expected_b -= imm;
rgb.sub_imm(imm);
check_expected("rgbaint_t::sub_imm");
// test immediate RGB subtraction
expected_a -= actual_a = random_i32();
expected_r -= actual_r = random_i32();
expected_g -= actual_g = random_i32();
expected_b -= actual_b = random_i32();
rgb.sub_imm_rgba(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::sub_imm_rgba");
// test reversed RGB subtraction
expected_a = (actual_a = random_i32()) - expected_a;
expected_r = (actual_r = random_i32()) - expected_r;
expected_g = (actual_g = random_i32()) - expected_g;
expected_b = (actual_b = random_i32()) - expected_b;
rgb.subr(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
check_expected("rgbaint_t::subr");
// test reversed offset subtraction
imm = random_i32();
expected_a = imm - expected_a;
expected_r = imm - expected_r;
expected_g = imm - expected_g;
expected_b = imm - expected_b;
rgb.subr_imm(imm);
check_expected("rgbaint_t::subr_imm");
// test reversed immediate RGB subtraction
expected_a = (actual_a = random_i32()) - expected_a;
expected_r = (actual_r = random_i32()) - expected_r;
expected_g = (actual_g = random_i32()) - expected_g;
expected_b = (actual_b = random_i32()) - expected_b;
rgb.subr_imm_rgba(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::subr_imm_rgba");
// test RGB multiplication (method)
expected_a *= actual_a = random_i32();
expected_r *= actual_r = random_i32();
expected_g *= actual_g = random_i32();
expected_b *= actual_b = random_i32();
rgb.mul(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
check_expected("rgbaint_t::mul");
// test RGB multiplication (operator)
expected_a *= actual_a = random_i32();
expected_r *= actual_r = random_i32();
expected_g *= actual_g = random_i32();
expected_b *= actual_b = random_i32();
rgb *= rgbaint_t(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::operator*=");
// test factor multiplication (method)
imm = random_i32();
expected_a *= imm;
expected_r *= imm;
expected_g *= imm;
expected_b *= imm;
rgb.mul_imm(imm);
check_expected("rgbaint_t::mul_imm");
// test factor multiplication (operator)
imm = random_i32();
expected_a *= imm;
expected_r *= imm;
expected_g *= imm;
expected_b *= imm;
rgb *= imm;
check_expected("rgbaint_t::operator*=");
// test immediate RGB multiplication
expected_a *= actual_a = random_i32();
expected_r *= actual_r = random_i32();
expected_g *= actual_g = random_i32();
expected_b *= actual_b = random_i32();
rgb.mul_imm_rgba(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::mul_imm_rgba");
// test RGB and not
expected_a &= ~(actual_a = random_i32());
expected_r &= ~(actual_r = random_i32());
expected_g &= ~(actual_g = random_i32());
expected_b &= ~(actual_b = random_i32());
rgb.andnot_reg(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
check_expected("rgbaint_t::andnot_reg");
// test RGB or
expected_a |= actual_a = random_i32();
expected_r |= actual_r = random_i32();
expected_g |= actual_g = random_i32();
expected_b |= actual_b = random_i32();
rgb.or_reg(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
check_expected("rgbaint_t::or_reg");
// test RGB and
expected_a &= actual_a = random_i32();
expected_r &= actual_r = random_i32();
expected_g &= actual_g = random_i32();
expected_b &= actual_b = random_i32();
rgb.and_reg(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
check_expected("rgbaint_t::and_reg");
// test RGB xor
expected_a ^= actual_a = random_i32();
expected_r ^= actual_r = random_i32();
expected_g ^= actual_g = random_i32();
expected_b ^= actual_b = random_i32();
rgb.xor_reg(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
check_expected("rgbaint_t::xor_reg");
// test uniform or
imm = random_i32();
expected_a |= imm;
expected_r |= imm;
expected_g |= imm;
expected_b |= imm;
rgb.or_imm(imm);
check_expected("rgbaint_t::or_imm");
// test uniform and
imm = random_i32();
expected_a &= imm;
expected_r &= imm;
expected_g &= imm;
expected_b &= imm;
rgb.and_imm(imm);
check_expected("rgbaint_t::and_imm");
// test uniform xor
imm = random_i32();
expected_a ^= imm;
expected_r ^= imm;
expected_g ^= imm;
expected_b ^= imm;
rgb.xor_imm(imm);
check_expected("rgbaint_t::xor_imm");
// test immediate RGB or
expected_a |= actual_a = random_i32();
expected_r |= actual_r = random_i32();
expected_g |= actual_g = random_i32();
expected_b |= actual_b = random_i32();
rgb.or_imm_rgba(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::or_imm_rgba");
// test immediate RGB and
expected_a &= actual_a = random_i32();
expected_r &= actual_r = random_i32();
expected_g &= actual_g = random_i32();
expected_b &= actual_b = random_i32();
rgb.and_imm_rgba(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::and_imm_rgba");
// test immediate RGB xor
expected_a ^= actual_a = random_i32();
expected_r ^= actual_r = random_i32();
expected_g ^= actual_g = random_i32();
expected_b ^= actual_b = random_i32();
rgb.xor_imm_rgba(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::xor_imm_rgba");
// test 8-bit get
expected_a = INT32(UINT32(expected_a) & 0x00ff);
expected_r = INT32(UINT32(expected_r) & 0x00ff);
expected_g = INT32(UINT32(expected_g) & 0x00ff);
expected_b = INT32(UINT32(expected_b) & 0x00ff);
actual_a = INT32(UINT32(rgb.get_a()));
actual_r = INT32(UINT32(rgb.get_r()));
actual_g = INT32(UINT32(rgb.get_g()));
actual_b = INT32(UINT32(rgb.get_b()));
if (actual_a != expected_a) osd_printf_error("Error testing rgbaint_t::get_a() = %d (expected %d)\n", actual_a, expected_a);
if (actual_r != expected_r) osd_printf_error("Error testing rgbaint_t::get_r() = %d (expected %d)\n", actual_r, expected_r);
if (actual_g != expected_g) osd_printf_error("Error testing rgbaint_t::get_g() = %d (expected %d)\n", actual_g, expected_g);
if (actual_b != expected_b) osd_printf_error("Error testing rgbaint_t::get_b() = %d (expected %d)\n", actual_b, expected_b);
// test set from packed RGBA
imm = random_i32();
expected_a = INT32((UINT32(imm) >> 24) & 0x00ff);
expected_r = INT32((UINT32(imm) >> 16) & 0x00ff);
expected_g = INT32((UINT32(imm) >> 8) & 0x00ff);
expected_b = INT32((UINT32(imm) >> 0) & 0x00ff);
rgb.set(UINT32(imm));
check_expected("rgbaint_t::set(UINT32)");
// while we have a value loaded that we know doesn't exceed 8-bit range, check the non-clamping convert-to-rgba
packed = rgb.to_rgba();
if (UINT32(imm) != UINT32(packed))
osd_printf_error("Error testing rgbaint_t::to_rgba() = %08x (expected %08x)\n", UINT32(packed), UINT32(imm));
// test construct from packed RGBA and assign
imm = random_i32();
expected_a = INT32((UINT32(imm) >> 24) & 0x00ff);
expected_r = INT32((UINT32(imm) >> 16) & 0x00ff);
expected_g = INT32((UINT32(imm) >> 8) & 0x00ff);
expected_b = INT32((UINT32(imm) >> 0) & 0x00ff);
rgb = rgbaint_t(UINT32(imm));
check_expected("rgbaint_t(UINT32)");
// while we have a value loaded that we know doesn't exceed 8-bit range, check the non-clamping convert-to-rgba
packed = rgb.to_rgba();
if (UINT32(imm) != UINT32(packed))
osd_printf_error("Error testing rgbaint_t::to_rgba() = %08x (expected %08x)\n", UINT32(packed), UINT32(imm));
// test set with rgb_t
packed = random_u32();
expected_a = INT32(UINT32(packed.a()));
expected_r = INT32(UINT32(packed.r()));
expected_g = INT32(UINT32(packed.g()));
expected_b = INT32(UINT32(packed.b()));
rgb.set(packed);
check_expected("rgbaint_t::set(rgba_t)");
// test construct with rgb_t
packed = random_u32();
expected_a = INT32(UINT32(packed.a()));
expected_r = INT32(UINT32(packed.r()));
expected_g = INT32(UINT32(packed.g()));
expected_b = INT32(UINT32(packed.b()));
rgb = rgbaint_t(packed);
check_expected("rgbaint_t::set(rgba_t)");
// test clamping convert-to-rgba with hand-crafted values to catch edge cases
rgb.set(std::numeric_limits<INT32>::min(), -1, 0, 1);
packed = rgb.to_rgba_clamp();
if (UINT32(0x00000001) != UINT32(packed))
osd_printf_error("Error testing rgbaint_t::to_rgba_clamp() = %08x (expected 0x00000001)\n", UINT32(packed));
rgb.set(254, 255, 256, std::numeric_limits<INT32>::max());
packed = rgb.to_rgba_clamp();
if (UINT32(0xfeffffff) != UINT32(packed))
osd_printf_error("Error testing rgbaint_t::to_rgba_clamp() = %08x (expected 0xfeffffff)\n", UINT32(packed));
rgb.set(std::numeric_limits<INT32>::max(), std::numeric_limits<INT32>::min(), 256, -1);
packed = rgb.to_rgba_clamp();
if (UINT32(0xff00ff00) != UINT32(packed))
osd_printf_error("Error testing rgbaint_t::to_rgba_clamp() = %08x (expected 0xff00ff00)\n", UINT32(packed));
rgb.set(0, 255, 1, 254);
packed = rgb.to_rgba_clamp();
if (UINT32(0x00ff01fe) != UINT32(packed))
osd_printf_error("Error testing rgbaint_t::to_rgba_clamp() = %08x (expected 0x00ff01fe)\n", UINT32(packed));
// test in-place clamping with hand-crafted values to catch edge cases
expected_a = 0;
expected_r = 0;
expected_g = 0;
expected_b = 1;
rgb.set(std::numeric_limits<INT32>::min(), -1, 0, 1);
rgb.clamp_to_uint8();
check_expected("rgbaint_t::clamp_to_uint8");
expected_a = 254;
expected_r = 255;
expected_g = 255;
expected_b = 255;
rgb.set(254, 255, 256, std::numeric_limits<INT32>::max());
rgb.clamp_to_uint8();
check_expected("rgbaint_t::clamp_to_uint8");
expected_a = 255;
expected_r = 0;
expected_g = 255;
expected_b = 0;
rgb.set(std::numeric_limits<INT32>::max(), std::numeric_limits<INT32>::min(), 256, -1);
rgb.clamp_to_uint8();
check_expected("rgbaint_t::clamp_to_uint8");
expected_a = 0;
expected_r = 255;
expected_g = 1;
expected_b = 254;
rgb.set(0, 255, 1, 254);
rgb.clamp_to_uint8();
check_expected("rgbaint_t::clamp_to_uint8");
// test shift left
expected_a = (actual_a = random_i32()) << 19;
expected_r = (actual_r = random_i32()) << 3;
expected_g = (actual_g = random_i32()) << 21;
expected_b = (actual_b = random_i32()) << 6;
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb.shl(rgbaint_t(19, 3, 21, 6));
check_expected("rgbaint_t::shl");
// test shift left immediate
expected_a = (actual_a = random_i32()) << 7;
expected_r = (actual_r = random_i32()) << 7;
expected_g = (actual_g = random_i32()) << 7;
expected_b = (actual_b = random_i32()) << 7;
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb.shl_imm(7);
check_expected("rgbaint_t::shl_imm");
// test logical shift right
expected_a = INT32(UINT32(actual_a = random_i32()) >> 8);
expected_r = INT32(UINT32(actual_r = random_i32()) >> 18);
expected_g = INT32(UINT32(actual_g = random_i32()) >> 26);
expected_b = INT32(UINT32(actual_b = random_i32()) >> 4);
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb.shr(rgbaint_t(8, 18, 26, 4));
check_expected("rgbaint_t::shr");
// test logical shift right with opposite signs
expected_a = INT32(UINT32(actual_a = -actual_a) >> 21);
expected_r = INT32(UINT32(actual_r = -actual_r) >> 13);
expected_g = INT32(UINT32(actual_g = -actual_g) >> 11);
expected_b = INT32(UINT32(actual_b = -actual_b) >> 17);
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb.shr(rgbaint_t(21, 13, 11, 17));
check_expected("rgbaint_t::shr");
// test logical shift right immediate
expected_a = INT32(UINT32(actual_a = random_i32()) >> 5);
expected_r = INT32(UINT32(actual_r = random_i32()) >> 5);
expected_g = INT32(UINT32(actual_g = random_i32()) >> 5);
expected_b = INT32(UINT32(actual_b = random_i32()) >> 5);
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb.shr_imm(5);
check_expected("rgbaint_t::shr_imm");
// test logical shift right immediate with opposite signs
expected_a = INT32(UINT32(actual_a = -actual_a) >> 15);
expected_r = INT32(UINT32(actual_r = -actual_r) >> 15);
expected_g = INT32(UINT32(actual_g = -actual_g) >> 15);
expected_b = INT32(UINT32(actual_b = -actual_b) >> 15);
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb.shr_imm(15);
check_expected("rgbaint_t::shr_imm");
// test arithmetic shift right
expected_a = (actual_a = random_i32()) >> 16;
expected_r = (actual_r = random_i32()) >> 20;
expected_g = (actual_g = random_i32()) >> 14;
expected_b = (actual_b = random_i32()) >> 2;
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb.sra(rgbaint_t(16, 20, 14, 2));
check_expected("rgbaint_t::sra");
// test arithmetic shift right with opposite signs
expected_a = (actual_a = -actual_a) >> 1;
expected_r = (actual_r = -actual_r) >> 29;
expected_g = (actual_g = -actual_g) >> 10;
expected_b = (actual_b = -actual_b) >> 22;
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb.sra(rgbaint_t(1, 29, 10, 22));
check_expected("rgbaint_t::sra");
// test arithmetic shift right immediate (method)
expected_a = (actual_a = random_i32()) >> 12;
expected_r = (actual_r = random_i32()) >> 12;
expected_g = (actual_g = random_i32()) >> 12;
expected_b = (actual_b = random_i32()) >> 12;
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb.sra_imm(12);
check_expected("rgbaint_t::sra_imm");
// test arithmetic shift right immediate with opposite signs (method)
expected_a = (actual_a = -actual_a) >> 9;
expected_r = (actual_r = -actual_r) >> 9;
expected_g = (actual_g = -actual_g) >> 9;
expected_b = (actual_b = -actual_b) >> 9;
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb.sra_imm(9);
check_expected("rgbaint_t::sra_imm");
// test arithmetic shift right immediate (operator)
expected_a = (actual_a = random_i32()) >> 7;
expected_r = (actual_r = random_i32()) >> 7;
expected_g = (actual_g = random_i32()) >> 7;
expected_b = (actual_b = random_i32()) >> 7;
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb >>= 7;
check_expected("rgbaint_t::operator>>=");
// test arithmetic shift right immediate with opposite signs (operator)
expected_a = (actual_a = -actual_a) >> 11;
expected_r = (actual_r = -actual_r) >> 11;
expected_g = (actual_g = -actual_g) >> 11;
expected_b = (actual_b = -actual_b) >> 11;
rgb.set(actual_a, actual_r, actual_g, actual_b);
rgb >>= 11;
check_expected("rgbaint_t::operator>>=");
}
//------------------------------------------------- //-------------------------------------------------
// validate_driver - validate basic driver // validate_driver - validate basic driver
// information // information
@ -1024,42 +1580,45 @@ void validity_checker::output_callback(osd_output_channel channel, const char *m
std::string output; std::string output;
switch (channel) switch (channel)
{ {
case OSD_OUTPUT_CHANNEL_ERROR: case OSD_OUTPUT_CHANNEL_ERROR:
// count the error // count the error
m_errors++; m_errors++;
// output the source(driver) device 'tag' // output the source(driver) device 'tag'
build_output_prefix(output); build_output_prefix(output);
// generate the string // generate the string
strcatvprintf(output, msg, args); strcatvprintf(output, msg, args);
m_error_text.append(output); m_error_text.append(output);
break; break;
case OSD_OUTPUT_CHANNEL_WARNING:
// count the error
m_warnings++;
// output the source(driver) device 'tag' case OSD_OUTPUT_CHANNEL_WARNING:
build_output_prefix(output); // count the error
m_warnings++;
// generate the string and output to the original target // output the source(driver) device 'tag'
strcatvprintf(output, msg, args); build_output_prefix(output);
m_warning_text.append(output);
break;
case OSD_OUTPUT_CHANNEL_VERBOSE:
// if we're not verbose, skip it
if (!m_print_verbose) break;
// output the source(driver) device 'tag' // generate the string and output to the original target
build_output_prefix(output); strcatvprintf(output, msg, args);
m_warning_text.append(output);
break;
// generate the string and output to the original target case OSD_OUTPUT_CHANNEL_VERBOSE:
strcatvprintf(output, msg, args); // if we're not verbose, skip it
m_verbose_text.append(output); if (!m_print_verbose) break;
break;
default: // output the source(driver) device 'tag'
chain_output(channel, msg, args); build_output_prefix(output);
break;
// generate the string and output to the original target
strcatvprintf(output, msg, args);
m_verbose_text.append(output);
break;
default:
chain_output(channel, msg, args);
break;
} }
} }
@ -1075,7 +1634,7 @@ void validity_checker::output_via_delegate(osd_output_channel channel, const cha
// call through to the delegate with the proper parameters // call through to the delegate with the proper parameters
va_start(argptr, format); va_start(argptr, format);
this->chain_output(channel, format, argptr); chain_output(channel, format, argptr);
va_end(argptr); va_end(argptr);
} }

View File

@ -8,10 +8,10 @@
***************************************************************************/ ***************************************************************************/
#pragma once #ifndef MAME_EMU_VALIDITY_H
#define MAME_EMU_VALIDITY_H
#ifndef __VALIDITY_H__ #pragma once
#define __VALIDITY_H__
#include "emu.h" #include "emu.h"
#include "drivenum.h" #include "drivenum.h"
@ -28,10 +28,6 @@ class machine_config;
// core validity checker class // core validity checker class
class validity_checker : public osd_output class validity_checker : public osd_output
{ {
// internal map types
typedef std::unordered_map<std::string,const game_driver *> game_driver_map;
typedef std::unordered_map<std::string,FPTR> int_map;
public: public:
validity_checker(emu_options &options); validity_checker(emu_options &options);
~validity_checker(); ~validity_checker();
@ -63,6 +59,10 @@ protected:
virtual void output_callback(osd_output_channel channel, const char *msg, va_list args) override; virtual void output_callback(osd_output_channel channel, const char *msg, va_list args) override;
private: private:
// internal map types
typedef std::unordered_map<std::string,const game_driver *> game_driver_map;
typedef std::unordered_map<std::string,FPTR> int_map;
// internal helpers // internal helpers
const char *ioport_string_from_index(UINT32 index); const char *ioport_string_from_index(UINT32 index);
int get_defstr_index(const char *string, bool suppress_error = false); int get_defstr_index(const char *string, bool suppress_error = false);
@ -75,6 +75,7 @@ private:
// internal sub-checks // internal sub-checks
void validate_core(); void validate_core();
void validate_inlines(); void validate_inlines();
void validate_rgb();
void validate_driver(); void validate_driver();
void validate_roms(); void validate_roms();
void validate_analog_input_field(ioport_field &field); void validate_analog_input_field(ioport_field &field);
@ -88,6 +89,12 @@ private:
void output_via_delegate(osd_output_channel channel, const char *format, ...) ATTR_PRINTF(3,4); void output_via_delegate(osd_output_channel channel, const char *format, ...) ATTR_PRINTF(3,4);
void output_indented_errors(std::string &text, const char *header); void output_indented_errors(std::string &text, const char *header);
// random number generation
INT32 random_i32();
UINT32 random_u32();
INT64 random_i64();
UINT64 random_u64();
// internal driver list // internal driver list
driver_enumerator m_drivlist; driver_enumerator m_drivlist;
@ -115,4 +122,4 @@ private:
bool m_validate_all; bool m_validate_all;
}; };
#endif #endif // MAME_EMU_VALIDITY_H

View File

@ -8,8 +8,8 @@
***************************************************************************/ ***************************************************************************/
#ifndef __RGBGEN__ #ifndef MAME_EMU_VIDEO_RGBGEN_H
#define __RGBGEN__ #define MAME_EMU_VIDEO_RGBGEN_H
/*************************************************************************** /***************************************************************************
@ -19,36 +19,48 @@
class rgbaint_t class rgbaint_t
{ {
public: public:
inline rgbaint_t(): m_a(0), m_r(0), m_g(0), m_b(0) { } rgbaint_t(): m_a(0), m_r(0), m_g(0), m_b(0) { }
inline rgbaint_t(UINT32 rgba) { set(rgba); } explicit rgbaint_t(UINT32 rgba) { set(rgba); }
inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); } rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
inline rgbaint_t(const rgb_t& rgba) { set(rgba); } explicit rgbaint_t(const rgb_t& rgba) { set(rgba); }
inline void set(rgbaint_t& other) { set(other.m_a, other.m_r, other.m_g, other.m_b); } void set(const rgbaint_t& other) { set(other.m_a, other.m_r, other.m_g, other.m_b); }
inline void set(UINT32 rgba) { set((rgba >> 24) & 0xff, (rgba >> 16) & 0xff, (rgba >> 8) & 0xff, rgba & 0xff); } void set(UINT32 rgba) { set((rgba >> 24) & 0xff, (rgba >> 16) & 0xff, (rgba >> 8) & 0xff, rgba & 0xff); }
inline void set(INT32 a, INT32 r, INT32 g, INT32 b) void set(INT32 a, INT32 r, INT32 g, INT32 b)
{ {
m_a = a; m_a = a;
m_r = r; m_r = r;
m_g = g; m_g = g;
m_b = b; m_b = b;
} }
inline void set(const rgb_t& rgba) { set(rgba.a(), rgba.r(), rgba.g(), rgba.b()); } void set(const rgb_t& rgba) { set(rgba.a(), rgba.r(), rgba.g(), rgba.b()); }
inline rgb_t to_rgba() const rgb_t to_rgba() const { return rgb_t(get_a(), get_r(), get_g(), get_b()); }
{
return rgb_t(m_a, m_r, m_g, m_b);
}
inline rgb_t to_rgba_clamp() const rgb_t to_rgba_clamp() const
{ {
UINT8 a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a; const UINT8 a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a;
UINT8 r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r; const UINT8 r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r;
UINT8 g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g; const UINT8 g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g;
UINT8 b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b; const UINT8 b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b;
return rgb_t(a, r, g, b); return rgb_t(a, r, g, b);
} }
void set_a(const INT32 value) { m_a = value; }
void set_r(const INT32 value) { m_r = value; }
void set_g(const INT32 value) { m_g = value; }
void set_b(const INT32 value) { m_b = value; }
UINT8 get_a() const { return UINT8(UINT32(m_a)); }
UINT8 get_r() const { return UINT8(UINT32(m_r)); }
UINT8 get_g() const { return UINT8(UINT32(m_g)); }
UINT8 get_b() const { return UINT8(UINT32(m_b)); }
INT32 get_a32() const { return m_a; }
INT32 get_r32() const { return m_r; }
INT32 get_g32() const { return m_g; }
INT32 get_b32() const { return m_b; }
inline void add(const rgbaint_t& color) inline void add(const rgbaint_t& color)
{ {
add_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); add_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
@ -85,7 +97,7 @@ public:
m_b -= b; m_b -= b;
} }
inline void subr(rgbaint_t& color) inline void subr(const rgbaint_t& color)
{ {
subr_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); subr_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
} }
@ -103,67 +115,7 @@ public:
m_b = b - m_b; m_b = b - m_b;
} }
inline void set_a(const INT32 value) inline void mul(const rgbaint_t& color)
{
m_a = value;
}
inline void set_r(const INT32 value)
{
m_r = value;
}
inline void set_g(const INT32 value)
{
m_g = value;
}
inline void set_b(const INT32 value)
{
m_b = value;
}
inline UINT8 get_a() const
{
return m_a;
}
inline UINT8 get_r() const
{
return m_r;
}
inline UINT8 get_g() const
{
return m_g;
}
inline UINT8 get_b() const
{
return m_b;
}
inline INT32 get_a32() const
{
return m_a;
}
inline INT32 get_r32() const
{
return m_r;
}
inline INT32 get_g32() const
{
return m_g;
}
inline INT32 get_b32() const
{
return m_b;
}
inline void mul(rgbaint_t& color)
{ {
mul_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); mul_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
} }
@ -202,10 +154,10 @@ public:
inline void shr(const rgbaint_t& shift) inline void shr(const rgbaint_t& shift)
{ {
m_a >>= shift.m_a; m_a = INT32(UINT32(m_a) >> shift.m_a);
m_r >>= shift.m_r; m_r = INT32(UINT32(m_r) >> shift.m_r);
m_g >>= shift.m_g; m_g = INT32(UINT32(m_g) >> shift.m_g);
m_b >>= shift.m_b; m_b = INT32(UINT32(m_b) >> shift.m_b);
} }
inline void shr_imm(const UINT8 shift) inline void shr_imm(const UINT8 shift)
@ -213,10 +165,10 @@ public:
if (shift == 0) if (shift == 0)
return; return;
m_a >>= shift; m_a = INT32(UINT32(m_a) >> shift);
m_r >>= shift; m_r = INT32(UINT32(m_r) >> shift);
m_g >>= shift; m_g = INT32(UINT32(m_g) >> shift);
m_b >>= shift; m_b = INT32(UINT32(m_b) >> shift);
} }
inline void sra(const rgbaint_t& shift) inline void sra(const rgbaint_t& shift)
@ -321,22 +273,12 @@ public:
inline void clamp_and_clear(const UINT32 sign) inline void clamp_and_clear(const UINT32 sign)
{ {
if (m_a & sign) if (m_a & sign) m_a = 0;
m_a = 0; if (m_r & sign) m_r = 0;
if (m_g & sign) m_g = 0;
if (m_b & sign) m_b = 0;
if (m_r & sign) clamp_to_uint8();
m_r = 0;
if (m_g & sign)
m_g = 0;
if (m_b & sign)
m_b = 0;
m_a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a;
m_r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r;
m_g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g;
m_b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b;
} }
inline void clamp_to_uint8() inline void clamp_to_uint8()
@ -370,6 +312,14 @@ public:
m_b = (m_b > value) ? value : m_b; m_b = (m_b > value) ? value : m_b;
} }
inline void max(const INT32 value)
{
m_a = (m_a < value) ? value : m_a;
m_r = (m_r < value) ? value : m_r;
m_g = (m_g < value) ? value : m_g;
m_b = (m_b < value) ? value : m_b;
}
void blend(const rgbaint_t& other, UINT8 factor); void blend(const rgbaint_t& other, UINT8 factor);
void scale_and_clamp(const rgbaint_t& scale); void scale_and_clamp(const rgbaint_t& scale);
@ -426,35 +376,50 @@ public:
m_b = (m_b < value) ? 0xffffffff : 0; m_b = (m_b < value) ? 0xffffffff : 0;
} }
inline void merge_alpha(rgbaint_t& alpha) inline void merge_alpha(const rgbaint_t& alpha)
{ {
m_a = alpha.m_a; m_a = alpha.m_a;
} }
inline rgbaint_t operator=(const rgbaint_t& other) rgbaint_t &operator=(const rgbaint_t& other)
{ {
m_a = other.m_a; set(other.m_a, other.m_r, other.m_g, other.m_b);
m_r = other.m_r;
m_g = other.m_g;
m_b = other.m_b;
return *this; return *this;
} }
inline rgbaint_t& operator+=(const rgbaint_t& other) rgbaint_t& operator+=(const rgbaint_t& other)
{ {
m_a += other.m_a; add_imm_rgba(other.m_a, other.m_r, other.m_g, other.m_b);
m_r += other.m_r;
m_g += other.m_g;
m_b += other.m_b;
return *this; return *this;
} }
inline rgbaint_t& operator+=(const INT32 other) rgbaint_t& operator+=(const INT32 other)
{ {
m_a += other; add_imm_rgba(other, other, other, other);
m_r += other; return *this;
m_g += other; }
m_b += other;
rgbaint_t &operator-=(const rgbaint_t& other)
{
sub_imm_rgba(other.m_a, other.m_r, other.m_g, other.m_b);
return *this;
}
rgbaint_t& operator*=(const rgbaint_t& other)
{
mul_imm_rgba(other.m_a, other.m_r, other.m_g, other.m_b);
return *this;
}
rgbaint_t& operator*=(const INT32 other)
{
mul_imm_rgba(other, other, other, other);
return *this;
}
rgbaint_t& operator>>=(const INT32 shift)
{
sra_imm(shift);
return *this; return *this;
} }
@ -477,7 +442,7 @@ public:
return ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff); return ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff);
} }
inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v) void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
{ {
UINT32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8); UINT32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
UINT32 rb1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8); UINT32 rb1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8);
@ -504,4 +469,4 @@ protected:
INT32 m_b; INT32 m_b;
}; };
#endif /* __RGBGEN__ */ #endif // MAME_EMU_VIDEO_RGBGEN_H

View File

@ -166,13 +166,13 @@ const struct rgbaint_t::_statics rgbaint_t::statics =
void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor) void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor)
{ {
__m128i scale1 = _mm_set1_epi32(factor); const __m128i scale1 = _mm_set1_epi32(factor);
__m128i scale2 = _mm_sub_epi32(_mm_set1_epi32(0x100), scale1); const rgbaint_t scale2(_mm_sub_epi32(_mm_set1_epi32(0x100), scale1));
rgbaint_t scaled_other(other); rgbaint_t scaled_other(other);
scaled_other.mul(scale2); scaled_other.mul(scale2);
mul(scale1); mul(rgbaint_t(scale1));
add(scaled_other); add(scaled_other);
sra_imm(8); sra_imm(8);
} }

View File

@ -10,10 +10,16 @@
***************************************************************************/ ***************************************************************************/
#ifndef __RGBSSE__ #ifndef MAME_EMU_VIDEO_RGBSSE_H
#define __RGBSSE__ #define MAME_EMU_VIDEO_RGBSSE_H
#pragma once
#include <emmintrin.h> #include <emmintrin.h>
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
/*************************************************************************** /***************************************************************************
TYPE DEFINITIONS TYPE DEFINITIONS
@ -22,16 +28,16 @@
class rgbaint_t class rgbaint_t
{ {
public: public:
inline rgbaint_t() { } rgbaint_t() { }
inline rgbaint_t(UINT32 rgba) { set(rgba); } explicit rgbaint_t(UINT32 rgba) { set(rgba); }
inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); } rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
inline rgbaint_t(const rgb_t& rgb) { set(rgb); } explicit rgbaint_t(const rgb_t& rgb) { set(rgb); }
inline rgbaint_t(__m128i rgba) { m_value = rgba; } explicit rgbaint_t(__m128i rgba) { m_value = rgba; }
inline void set(rgbaint_t& other) { m_value = other.m_value; } void set(const rgbaint_t& other) { m_value = other.m_value; }
inline void set(UINT32 rgba) { m_value = _mm_and_si128(_mm_set1_epi32(0xff), _mm_set_epi32(rgba >> 24, rgba >> 16, rgba >> 8, rgba)); } void set(UINT32 rgba) { m_value = _mm_and_si128(_mm_set1_epi32(0xff), _mm_set_epi32(rgba >> 24, rgba >> 16, rgba >> 8, rgba)); }
inline void set(INT32 a, INT32 r, INT32 g, INT32 b) { m_value = _mm_set_epi32(a, r, g, b); } void set(INT32 a, INT32 r, INT32 g, INT32 b) { m_value = _mm_set_epi32(a, r, g, b); }
inline void set(const rgb_t& rgb) { m_value = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(rgb), _mm_setzero_si128()), _mm_setzero_si128()); } void set(const rgb_t& rgb) { m_value = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(rgb), _mm_setzero_si128()), _mm_setzero_si128()); }
inline rgb_t to_rgba() const inline rgb_t to_rgba() const
{ {
@ -43,6 +49,35 @@ public:
return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128())); return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128()));
} }
#ifdef __SSE4_1__
void set_a(const INT32 value) { m_value = _mm_insert_epi32(m_value, value, 3); }
void set_r(const INT32 value) { m_value = _mm_insert_epi32(m_value, value, 2); }
void set_g(const INT32 value) { m_value = _mm_insert_epi32(m_value, value, 1); }
void set_b(const INT32 value) { m_value = _mm_insert_epi32(m_value, value, 0); }
#else
void set_a(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, alpha_mask()), _mm_set_epi32(value, 0, 0, 0)); }
void set_r(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, red_mask()), _mm_set_epi32(0, value, 0, 0)); }
void set_g(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, green_mask()), _mm_set_epi32(0, 0, value, 0)); }
void set_b(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, blue_mask()), _mm_set_epi32(0, 0, 0, value)); }
#endif
UINT8 get_a() const { return UINT8(unsigned(_mm_extract_epi16(m_value, 6))); }
UINT8 get_r() const { return UINT8(unsigned(_mm_extract_epi16(m_value, 4))); }
UINT8 get_g() const { return UINT8(unsigned(_mm_extract_epi16(m_value, 2))); }
UINT8 get_b() const { return UINT8(unsigned(_mm_extract_epi16(m_value, 0))); }
#ifdef __SSE4_1__
INT32 get_a32() const { return _mm_extract_epi32(m_value, 3); }
INT32 get_r32() const { return _mm_extract_epi32(m_value, 2); }
INT32 get_g32() const { return _mm_extract_epi32(m_value, 1); }
INT32 get_b32() const { return _mm_extract_epi32(m_value, 0); }
#else
INT32 get_a32() const { return (_mm_extract_epi16(m_value, 7) << 16) | _mm_extract_epi16(m_value, 6); }
INT32 get_r32() const { return (_mm_extract_epi16(m_value, 5) << 16) | _mm_extract_epi16(m_value, 4); }
INT32 get_g32() const { return (_mm_extract_epi16(m_value, 3) << 16) | _mm_extract_epi16(m_value, 2); }
INT32 get_b32() const { return (_mm_extract_epi16(m_value, 1) << 16) | _mm_extract_epi16(m_value, 0); }
#endif
inline void add(const rgbaint_t& color2) inline void add(const rgbaint_t& color2)
{ {
m_value = _mm_add_epi32(m_value, color2.m_value); m_value = _mm_add_epi32(m_value, color2.m_value);
@ -73,7 +108,7 @@ public:
m_value = _mm_sub_epi32(m_value, _mm_set_epi32(a, r, g, b)); m_value = _mm_sub_epi32(m_value, _mm_set_epi32(a, r, g, b));
} }
inline void subr(rgbaint_t& color2) inline void subr(const rgbaint_t& color2)
{ {
m_value = _mm_sub_epi32(color2.m_value, m_value); m_value = _mm_sub_epi32(color2.m_value, m_value);
} }
@ -88,66 +123,6 @@ public:
m_value = _mm_sub_epi32(_mm_set_epi32(a, r, g, b), m_value); m_value = _mm_sub_epi32(_mm_set_epi32(a, r, g, b), m_value);
} }
inline void set_a(const INT32 value)
{
m_value = _mm_or_si128(_mm_and_si128(m_value, alpha_mask()), _mm_set_epi32(value, 0, 0, 0));
}
inline void set_r(const INT32 value)
{
m_value = _mm_or_si128(_mm_and_si128(m_value, red_mask()), _mm_set_epi32(0, value, 0, 0));
}
inline void set_g(const INT32 value)
{
m_value = _mm_or_si128(_mm_and_si128(m_value, green_mask()), _mm_set_epi32(0, 0, value, 0));
}
inline void set_b(const INT32 value)
{
m_value = _mm_or_si128(_mm_and_si128(m_value, blue_mask()), _mm_set_epi32(0, 0, 0, value));
}
inline UINT8 get_a() const
{
return _mm_extract_epi16(m_value, 6);
}
inline UINT8 get_r() const
{
return _mm_extract_epi16(m_value, 4);
}
inline UINT8 get_g() const
{
return _mm_extract_epi16(m_value, 2);
}
inline UINT8 get_b() const
{
return _mm_extract_epi16(m_value, 0);
}
inline INT32 get_a32() const
{
return (_mm_extract_epi16(m_value, 7) << 16) | _mm_extract_epi16(m_value, 6);
}
inline INT32 get_r32() const
{
return (_mm_extract_epi16(m_value, 5) << 16) | _mm_extract_epi16(m_value, 4);
}
inline INT32 get_g32() const
{
return (_mm_extract_epi16(m_value, 3) << 16) | _mm_extract_epi16(m_value, 2);
}
inline INT32 get_b32() const
{
return (_mm_extract_epi16(m_value, 1) << 16) | _mm_extract_epi16(m_value, 0);
}
inline void mul(const rgbaint_t& color) inline void mul(const rgbaint_t& color)
{ {
__m128i tmp1 = _mm_mul_epu32(m_value, color.m_value); __m128i tmp1 = _mm_mul_epu32(m_value, color.m_value);
@ -414,7 +389,7 @@ public:
m_value = _mm_cmplt_epi32(m_value, _mm_set_epi32(a, r, g, b)); m_value = _mm_cmplt_epi32(m_value, _mm_set_epi32(a, r, g, b));
} }
inline rgbaint_t operator=(const rgbaint_t& other) inline rgbaint_t &operator=(const rgbaint_t& other)
{ {
m_value = other.m_value; m_value = other.m_value;
return *this; return *this;
@ -459,8 +434,12 @@ public:
inline void merge_alpha(const rgbaint_t& alpha) inline void merge_alpha(const rgbaint_t& alpha)
{ {
#ifdef __SSE4_1__
m_value = _mm_insert_epi32(m_value, _mm_extract_epi32(alpha.m_value, 3), 3);
#else
m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 7), 7); m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 7), 7);
m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 6), 6); m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 6), 6);
#endif
} }
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v) static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
@ -487,7 +466,7 @@ public:
return _mm_cvtsi128_si32(color01); return _mm_cvtsi128_si32(color01);
} }
inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v) void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
{ {
__m128i color00 = _mm_cvtsi32_si128(rgb00); __m128i color00 = _mm_cvtsi32_si128(rgb00);
__m128i color01 = _mm_cvtsi32_si128(rgb01); __m128i color01 = _mm_cvtsi32_si128(rgb01);
@ -519,11 +498,11 @@ protected:
INT16 scale_table[256][8]; INT16 scale_table[256][8];
}; };
static inline __m128i alpha_mask() { return *(__m128i *)&statics.alpha_mask[0]; } static __m128i alpha_mask() { return *(__m128i *)&statics.alpha_mask[0]; }
static inline __m128i red_mask() { return *(__m128i *)&statics.red_mask[0]; } static __m128i red_mask() { return *(__m128i *)&statics.red_mask[0]; }
static inline __m128i green_mask() { return *(__m128i *)&statics.green_mask[0]; } static __m128i green_mask() { return *(__m128i *)&statics.green_mask[0]; }
static inline __m128i blue_mask() { return *(__m128i *)&statics.blue_mask[0]; } static __m128i blue_mask() { return *(__m128i *)&statics.blue_mask[0]; }
static inline __m128i scale_factor(UINT8 index) { return *(__m128i *)&statics.scale_table[index][0]; } static __m128i scale_factor(UINT8 index) { return *(__m128i *)&statics.scale_table[index][0]; }
__m128i m_value; __m128i m_value;

View File

@ -9,10 +9,10 @@
***************************************************************************/ ***************************************************************************/
#ifndef __RGBUTIL__ #ifndef MAME_EMU_VIDEO_RGBUTIL_H
#define __RGBUTIL__ #define MAME_EMU_VIDEO_RGBUTIL_H
/* use SSE on 64-bit implementations, where it can be assumed */ // use SSE on 64-bit implementations, where it can be assumed
#if (!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64) #if (!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64)
#include "rgbsse.h" #include "rgbsse.h"
#elif defined(__ALTIVEC__) #elif defined(__ALTIVEC__)
@ -21,4 +21,4 @@
#include "rgbgen.h" #include "rgbgen.h"
#endif #endif
#endif /* __RGBUTIL__ */ #endif // MAME_EMU_VIDEO_RGBUTIL_H

View File

@ -28,15 +28,15 @@ protected:
typedef __vector unsigned int VECU32; typedef __vector unsigned int VECU32;
public: public:
inline rgbaint_t() { set(0, 0, 0, 0); } rgbaint_t() { set(0, 0, 0, 0); }
inline rgbaint_t(UINT32 rgba) { set(rgba); } explicit rgbaint_t(UINT32 rgba) { set(rgba); }
inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); } rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
inline rgbaint_t(const rgb_t& rgb) { set(rgb); } explicit rgbaint_t(const rgb_t& rgb) { set(rgb); }
inline rgbaint_t(VECS32 rgba) : m_value(rgba) { } explicit rgbaint_t(VECS32 rgba) : m_value(rgba) { }
inline void set(rgbaint_t& other) { m_value = other.m_value; } void set(const rgbaint_t& other) { m_value = other.m_value; }
inline void set(UINT32 rgba) void set(UINT32 rgba)
{ {
const VECU32 zero = { 0, 0, 0, 0 }; const VECU32 zero = { 0, 0, 0, 0 };
#ifdef __LITTLE_ENDIAN__ #ifdef __LITTLE_ENDIAN__
@ -48,7 +48,7 @@ public:
#endif #endif
} }
inline void set(INT32 a, INT32 r, INT32 g, INT32 b) void set(INT32 a, INT32 r, INT32 g, INT32 b)
{ {
#ifdef __LITTLE_ENDIAN__ #ifdef __LITTLE_ENDIAN__
const VECS32 result = { b, g, r, a }; const VECS32 result = { b, g, r, a };
@ -58,7 +58,7 @@ public:
m_value = result; m_value = result;
} }
inline void set(const rgb_t& rgb) void set(const rgb_t& rgb)
{ {
const VECU32 zero = { 0, 0, 0, 0 }; const VECU32 zero = { 0, 0, 0, 0 };
#ifdef __LITTLE_ENDIAN__ #ifdef __LITTLE_ENDIAN__
@ -88,6 +88,118 @@ public:
return result; return result;
} }
void set_a(const INT32 value)
{
const VECS32 temp = { value, value, value, value };
m_value = vec_perm(m_value, temp, alpha_perm);
}
void set_r(const INT32 value)
{
const VECS32 temp = { value, value, value, value };
m_value = vec_perm(m_value, temp, red_perm);
}
void set_g(const INT32 value)
{
const VECS32 temp = { value, value, value, value };
m_value = vec_perm(m_value, temp, green_perm);
}
void set_b(const INT32 value)
{
const VECS32 temp = { value, value, value, value };
m_value = vec_perm(m_value, temp, blue_perm);
}
UINT8 get_a() const
{
UINT8 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(VECU8(m_value), 12), 0, &result);
#else
vec_ste(vec_splat(VECU8(m_value), 3), 0, &result);
#endif
return result;
}
UINT8 get_r() const
{
UINT8 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(VECU8(m_value), 8), 0, &result);
#else
vec_ste(vec_splat(VECU8(m_value), 7), 0, &result);
#endif
return result;
}
UINT8 get_g() const
{
UINT8 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(VECU8(m_value), 4), 0, &result);
#else
vec_ste(vec_splat(VECU8(m_value), 11), 0, &result);
#endif
return result;
}
UINT8 get_b() const
{
UINT8 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(VECU8(m_value), 0), 0, &result);
#else
vec_ste(vec_splat(VECU8(m_value), 15), 0, &result);
#endif
return result;
}
INT32 get_a32() const
{
INT32 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(m_value, 3), 0, &result);
#else
vec_ste(vec_splat(m_value, 0), 0, &result);
#endif
return result;
}
INT32 get_r32() const
{
INT32 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(m_value, 2), 0, &result);
#else
vec_ste(vec_splat(m_value, 1), 0, &result);
#endif
return result;
}
INT32 get_g32() const
{
INT32 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(m_value, 1), 0, &result);
#else
vec_ste(vec_splat(m_value, 2), 0, &result);
#endif
return result;
}
INT32 get_b32() const
{
INT32 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(m_value, 0), 0, &result);
#else
vec_ste(vec_splat(m_value, 3), 0, &result);
#endif
return result;
}
inline void add(const rgbaint_t& color2) inline void add(const rgbaint_t& color2)
{ {
m_value = vec_add(m_value, color2.m_value); m_value = vec_add(m_value, color2.m_value);
@ -130,7 +242,7 @@ public:
m_value = vec_sub(m_value, temp); m_value = vec_sub(m_value, temp);
} }
inline void subr(rgbaint_t& color2) inline void subr(const rgbaint_t& color2)
{ {
m_value = vec_sub(color2.m_value, m_value); m_value = vec_sub(color2.m_value, m_value);
} }
@ -151,118 +263,6 @@ public:
m_value = vec_sub(temp, m_value); m_value = vec_sub(temp, m_value);
} }
inline void set_a(const INT32 value)
{
const VECS32 temp = { value, value, value, value };
m_value = vec_perm(m_value, temp, alpha_perm);
}
inline void set_r(const INT32 value)
{
const VECS32 temp = { value, value, value, value };
m_value = vec_perm(m_value, temp, red_perm);
}
inline void set_g(const INT32 value)
{
const VECS32 temp = { value, value, value, value };
m_value = vec_perm(m_value, temp, green_perm);
}
inline void set_b(const INT32 value)
{
const VECS32 temp = { value, value, value, value };
m_value = vec_perm(m_value, temp, blue_perm);
}
inline UINT8 get_a() const
{
UINT8 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(VECU8(m_value), 12), 0, &result);
#else
vec_ste(vec_splat(VECU8(m_value), 3), 0, &result);
#endif
return result;
}
inline UINT8 get_r() const
{
UINT8 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(VECU8(m_value), 8), 0, &result);
#else
vec_ste(vec_splat(VECU8(m_value), 7), 0, &result);
#endif
return result;
}
inline UINT8 get_g() const
{
UINT8 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(VECU8(m_value), 4), 0, &result);
#else
vec_ste(vec_splat(VECU8(m_value), 11), 0, &result);
#endif
return result;
}
inline UINT8 get_b() const
{
UINT8 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(VECU8(m_value), 0), 0, &result);
#else
vec_ste(vec_splat(VECU8(m_value), 15), 0, &result);
#endif
return result;
}
inline INT32 get_a32() const
{
INT32 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(m_value, 3), 0, &result);
#else
vec_ste(vec_splat(m_value, 0), 0, &result);
#endif
return result;
}
inline INT32 get_r32() const
{
INT32 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(m_value, 2), 0, &result);
#else
vec_ste(vec_splat(m_value, 1), 0, &result);
#endif
return result;
}
inline INT32 get_g32() const
{
INT32 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(m_value, 1), 0, &result);
#else
vec_ste(vec_splat(m_value, 2), 0, &result);
#endif
return result;
}
inline INT32 get_b32() const
{
INT32 result;
#ifdef __LITTLE_ENDIAN__
vec_ste(vec_splat(m_value, 0), 0, &result);
#else
vec_ste(vec_splat(m_value, 3), 0, &result);
#endif
return result;
}
inline void mul(const rgbaint_t& color) inline void mul(const rgbaint_t& color)
{ {
const VECU32 shift = vec_splat_u32(-16); const VECU32 shift = vec_splat_u32(-16);
@ -545,7 +545,7 @@ public:
m_value = VECS32(vec_cmplt(m_value, temp)); m_value = VECS32(vec_cmplt(m_value, temp));
} }
inline rgbaint_t operator=(const rgbaint_t& other) inline rgbaint_t &operator=(const rgbaint_t& other)
{ {
m_value = other.m_value; m_value = other.m_value;
return *this; return *this;
@ -607,7 +607,7 @@ public:
m_value = vec_perm(m_value, alpha.m_value, alpha_perm); m_value = vec_perm(m_value, alpha.m_value, alpha_perm);
} }
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v) static UINT32 bilinear_filter(const UINT32 &rgb00, const UINT32 &rgb01, const UINT32 &rgb10, const UINT32 &rgb11, UINT8 u, UINT8 v)
{ {
const VECS32 zero = vec_splat_s32(0); const VECS32 zero = vec_splat_s32(0);
@ -650,7 +650,7 @@ public:
return result; return result;
} }
inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v) void bilinear_filter_rgbaint(const UINT32 &rgb00, const UINT32 &rgb01, const UINT32 &rgb10, const UINT32 &rgb11, UINT8 u, UINT8 v)
{ {
const VECS32 zero = vec_splat_s32(0); const VECS32 zero = vec_splat_s32(0);
@ -688,13 +688,13 @@ public:
} }
protected: protected:
VECS32 m_value; VECS32 m_value;
static const VECU8 alpha_perm; static const VECU8 alpha_perm;
static const VECU8 red_perm; static const VECU8 red_perm;
static const VECU8 green_perm; static const VECU8 green_perm;
static const VECU8 blue_perm; static const VECU8 blue_perm;
static const VECS16 scale_table[256]; static const VECS16 scale_table[256];
}; };

View File

@ -342,7 +342,7 @@ void n64_texture_pipe_t::cycle_linear_lerp(color_t* TEX, color_t* prev, INT32 SS
TEX->add(t0); TEX->add(t0);
TEX->add(t2); TEX->add(t2);
TEX->add(t3); TEX->add(t3);
TEX->sra(2); TEX->sra_imm(2);
} }
} }