diff --git a/src/emu/validity.cpp b/src/emu/validity.cpp
index d732d3e892e..d9e26597f27 100644
--- a/src/emu/validity.cpp
+++ b/src/emu/validity.cpp
@@ -10,7 +10,10 @@
 
 #include "emu.h"
 #include "validity.h"
+
 #include "emuopts.h"
+#include "video/rgbutil.h"
+
 #include <ctype.h>
 
 
@@ -49,6 +52,20 @@ inline int validity_checker::get_defstr_index(const char *string, bool suppress_
 }
 
 
+//-------------------------------------------------
+//  random_u64
+//  random_s64
+//  random_u32
+//  random_s32
+//-------------------------------------------------
+#undef rand
+inline INT32 validity_checker::random_i32() { return INT32(random_u32()); }
+inline UINT32 validity_checker::random_u32() { return rand() ^ (rand() << 15); }
+inline INT64 validity_checker::random_i64() { return INT64(random_u64()); }
+inline UINT64 validity_checker::random_u64() { return UINT64(random_u32()) ^ (UINT64(random_u32()) << 30); }
+
+
+
 //-------------------------------------------------
 //  validate_tag - ensure that the given tag
 //  meets the general requirements
@@ -182,6 +199,7 @@ bool validity_checker::check_all_matching(const char *string)
 	validate_begin();
 	validate_core();
 	validate_inlines();
+	validate_rgb();
 
 	// if we had warnings or errors, output
 	if (m_errors > 0 || m_warnings > 0 || !m_verbose_text.empty())
@@ -362,13 +380,12 @@ void validity_checker::validate_core()
 
 void validity_checker::validate_inlines()
 {
-#undef rand
-	volatile UINT64 testu64a = rand() ^ (rand() << 15) ^ ((UINT64)rand() << 30) ^ ((UINT64)rand() << 45);
-	volatile INT64 testi64a = rand() ^ (rand() << 15) ^ ((INT64)rand() << 30) ^ ((INT64)rand() << 45);
-	volatile UINT32 testu32a = rand() ^ (rand() << 15);
-	volatile UINT32 testu32b = rand() ^ (rand() << 15);
-	volatile INT32 testi32a = rand() ^ (rand() << 15);
-	volatile INT32 testi32b = rand() ^ (rand() << 15);
+	volatile UINT64 testu64a = random_i64();
+	volatile INT64 testi64a = random_i64();
+	volatile UINT32 testu32a = random_u32();
+	volatile UINT32 testu32b = random_u32();
+	volatile INT32 testi32a = random_i32();
+	volatile INT32 testi32b = random_i32();
 	INT32 resulti32, expectedi32;
 	UINT32 resultu32, expectedu32;
 	INT64 resulti64, expectedi64;
@@ -481,6 +498,545 @@ void validity_checker::validate_inlines()
 }
 
 
+//-------------------------------------------------
+//  validate_rgb - validate optimised RGB utility
+//  class
+//-------------------------------------------------
+
+void validity_checker::validate_rgb()
+{
+	/*
+	    This performs cursory tests of most of the vector-optimised RGB
+		utilities, concentrating on the low-level maths.  It uses random
+		values most of the time for a quick go/no-go indication rather
+		than trying to exercise edge cases.  It doesn't matter too much
+		if the compiler optimises out some of the operations since it's
+		really intended to check for logic bugs in the vector code.
+
+		The following functions are not tested yet:
+		rgbaint_t()
+		clamp_and_clear(const UINT32)
+		sign_extend(const UINT32, const UINT32)
+		min(const INT32)
+		max(const INT32)
+		blend(const rgbaint_t&, UINT8)
+		scale_and_clamp(const rgbaint_t&)
+		scale_imm_and_clamp(const INT32)
+		scale2_add_and_clamp(const rgbaint_t&, const rgbaint_t&, const rgbaint_t&)
+		scale_add_and_clamp(const rgbaint_t&, const rgbaint_t&);
+		scale_imm_add_and_clamp(const INT32, const rgbaint_t&);
+		cmpeq(const rgbaint_t&)
+		cmpeq_imm(const INT32)
+		cmpeq_imm_rgba(const INT32, const INT32, const INT32, const INT32)
+		cmpgt(const rgbaint_t&)
+		cmpgt_imm(const INT32)
+		cmpgt_imm_rgba(const INT32, const INT32, const INT32, const INT32)
+		cmplt(const rgbaint_t&)
+		cmplt_imm(const INT32)
+		cmplt_imm_rgba(const INT32, const INT32, const INT32, const INT32)
+		static bilinear_filter(UINT32, UINT32, UINT32, UINT32, UINT8, UINT8)
+		bilinear_filter_rgbaint(UINT32, UINT32, UINT32, UINT32, UINT8, UINT8)
+	*/
+
+	volatile INT32 expected_a, expected_r, expected_g, expected_b;
+	volatile INT32 actual_a, actual_r, actual_g, actual_b;
+	volatile INT32 imm;
+	rgbaint_t rgb, other;
+	rgb_t packed;
+	auto check_expected = [&] (const char *desc)
+	{
+		const volatile INT32 a = rgb.get_a32();
+		const volatile INT32 r = rgb.get_r32();
+		const volatile INT32 g = rgb.get_g32();
+		const volatile INT32 b = rgb.get_b32();
+		if (a != expected_a) osd_printf_error("Error testing %s get_a32() = %d (expected %d)\n", desc, a, expected_a);
+		if (r != expected_r) osd_printf_error("Error testing %s get_r32() = %d (expected %d)\n", desc, r, expected_r);
+		if (g != expected_g) osd_printf_error("Error testing %s get_g32() = %d (expected %d)\n", desc, g, expected_g);
+		if (b != expected_b) osd_printf_error("Error testing %s get_b32() = %d (expected %d)\n", desc, b, expected_b);
+	};
+
+	// check set/get
+	expected_a = random_i32();
+	expected_r = random_i32();
+	expected_g = random_i32();
+	expected_b = random_i32();
+	rgb.set(expected_a, expected_r, expected_g, expected_b);
+	check_expected("rgbaint_t::set(a, r, g, b)");
+
+	// check construct/set
+	expected_a = random_i32();
+	expected_r = random_i32();
+	expected_g = random_i32();
+	expected_b = random_i32();
+	rgb.set(rgbaint_t(expected_a, expected_r, expected_g, expected_b));
+	check_expected("rgbaint_t::set(rgbaint_t)");
+
+	// check construct/assign
+	expected_a = random_i32();
+	expected_r = random_i32();
+	expected_g = random_i32();
+	expected_b = random_i32();
+	rgb = rgbaint_t(expected_a, expected_r, expected_g, expected_b);
+	check_expected("rgbaint_t assignment");
+
+	// check piecewise set
+	rgb.set_a(expected_a = random_i32());
+	check_expected("rgbaint_t::set_a");
+	rgb.set_r(expected_r = random_i32());
+	check_expected("rgbaint_t::set_r");
+	rgb.set_g(expected_g = random_i32());
+	check_expected("rgbaint_t::set_g");
+	rgb.set_b(expected_b = random_i32());
+	check_expected("rgbaint_t::set_b");
+
+	// test merge_alpha
+	expected_a = rand();
+	rgb.merge_alpha(rgbaint_t(expected_a, rand(), rand(), rand()));
+	check_expected("rgbaint_t::merge_alpha");
+
+	// test RGB addition (method)
+	expected_a += actual_a = random_i32();
+	expected_r += actual_r = random_i32();
+	expected_g += actual_g = random_i32();
+	expected_b += actual_b = random_i32();
+	rgb.add(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
+	check_expected("rgbaint_t::add");
+
+	// test RGB addition (operator)
+	expected_a += actual_a = random_i32();
+	expected_r += actual_r = random_i32();
+	expected_g += actual_g = random_i32();
+	expected_b += actual_b = random_i32();
+	rgb += rgbaint_t(actual_a, actual_r, actual_g, actual_b);
+	check_expected("rgbaint_t::operator+=");
+
+	// test offset addition (method)
+	imm = random_i32();
+	expected_a += imm;
+	expected_r += imm;
+	expected_g += imm;
+	expected_b += imm;
+	rgb.add_imm(imm);
+	check_expected("rgbaint_t::add_imm");
+
+	// test offset addition (operator)
+	imm = random_i32();
+	expected_a += imm;
+	expected_r += imm;
+	expected_g += imm;
+	expected_b += imm;
+	rgb += imm;
+	check_expected("rgbaint_t::operator+=");
+
+	// test immediate RGB addition
+	expected_a += actual_a = random_i32();
+	expected_r += actual_r = random_i32();
+	expected_g += actual_g = random_i32();
+	expected_b += actual_b = random_i32();
+	rgb.add_imm_rgba(actual_a, actual_r, actual_g, actual_b);
+	check_expected("rgbaint_t::add_imm_rgba");
+
+	// test RGB subtraction (method)
+	expected_a -= actual_a = random_i32();
+	expected_r -= actual_r = random_i32();
+	expected_g -= actual_g = random_i32();
+	expected_b -= actual_b = random_i32();
+	rgb.sub(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
+	check_expected("rgbaint_t::sub");
+
+	// test RGB subtraction (operator)
+	expected_a -= actual_a = random_i32();
+	expected_r -= actual_r = random_i32();
+	expected_g -= actual_g = random_i32();
+	expected_b -= actual_b = random_i32();
+	rgb -= rgbaint_t(actual_a, actual_r, actual_g, actual_b);
+	check_expected("rgbaint_t::operator-=");
+
+	// test offset subtraction
+	imm = random_i32();
+	expected_a -= imm;
+	expected_r -= imm;
+	expected_g -= imm;
+	expected_b -= imm;
+	rgb.sub_imm(imm);
+	check_expected("rgbaint_t::sub_imm");
+
+	// test immediate RGB subtraction
+	expected_a -= actual_a = random_i32();
+	expected_r -= actual_r = random_i32();
+	expected_g -= actual_g = random_i32();
+	expected_b -= actual_b = random_i32();
+	rgb.sub_imm_rgba(actual_a, actual_r, actual_g, actual_b);
+	check_expected("rgbaint_t::sub_imm_rgba");
+
+	// test reversed RGB subtraction
+	expected_a = (actual_a = random_i32()) - expected_a;
+	expected_r = (actual_r = random_i32()) - expected_r;
+	expected_g = (actual_g = random_i32()) - expected_g;
+	expected_b = (actual_b = random_i32()) - expected_b;
+	rgb.subr(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
+	check_expected("rgbaint_t::subr");
+
+	// test reversed offset subtraction
+	imm = random_i32();
+	expected_a = imm - expected_a;
+	expected_r = imm - expected_r;
+	expected_g = imm - expected_g;
+	expected_b = imm - expected_b;
+	rgb.subr_imm(imm);
+	check_expected("rgbaint_t::subr_imm");
+
+	// test reversed immediate RGB subtraction
+	expected_a = (actual_a = random_i32()) - expected_a;
+	expected_r = (actual_r = random_i32()) - expected_r;
+	expected_g = (actual_g = random_i32()) - expected_g;
+	expected_b = (actual_b = random_i32()) - expected_b;
+	rgb.subr_imm_rgba(actual_a, actual_r, actual_g, actual_b);
+	check_expected("rgbaint_t::subr_imm_rgba");
+
+	// test RGB multiplication (method)
+	expected_a *= actual_a = random_i32();
+	expected_r *= actual_r = random_i32();
+	expected_g *= actual_g = random_i32();
+	expected_b *= actual_b = random_i32();
+	rgb.mul(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
+	check_expected("rgbaint_t::mul");
+
+	// test RGB multiplication (operator)
+	expected_a *= actual_a = random_i32();
+	expected_r *= actual_r = random_i32();
+	expected_g *= actual_g = random_i32();
+	expected_b *= actual_b = random_i32();
+	rgb *= rgbaint_t(actual_a, actual_r, actual_g, actual_b);
+	check_expected("rgbaint_t::operator*=");
+
+	// test factor multiplication (method)
+	imm = random_i32();
+	expected_a *= imm;
+	expected_r *= imm;
+	expected_g *= imm;
+	expected_b *= imm;
+	rgb.mul_imm(imm);
+	check_expected("rgbaint_t::mul_imm");
+
+	// test factor multiplication (operator)
+	imm = random_i32();
+	expected_a *= imm;
+	expected_r *= imm;
+	expected_g *= imm;
+	expected_b *= imm;
+	rgb *= imm;
+	check_expected("rgbaint_t::operator*=");
+
+	// test immediate RGB multiplication
+	expected_a *= actual_a = random_i32();
+	expected_r *= actual_r = random_i32();
+	expected_g *= actual_g = random_i32();
+	expected_b *= actual_b = random_i32();
+	rgb.mul_imm_rgba(actual_a, actual_r, actual_g, actual_b);
+	check_expected("rgbaint_t::mul_imm_rgba");
+
+	// test RGB and not
+	expected_a &= ~(actual_a = random_i32());
+	expected_r &= ~(actual_r = random_i32());
+	expected_g &= ~(actual_g = random_i32());
+	expected_b &= ~(actual_b = random_i32());
+	rgb.andnot_reg(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
+	check_expected("rgbaint_t::andnot_reg");
+
+	// test RGB or
+	expected_a |= actual_a = random_i32();
+	expected_r |= actual_r = random_i32();
+	expected_g |= actual_g = random_i32();
+	expected_b |= actual_b = random_i32();
+	rgb.or_reg(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
+	check_expected("rgbaint_t::or_reg");
+
+	// test RGB and
+	expected_a &= actual_a = random_i32();
+	expected_r &= actual_r = random_i32();
+	expected_g &= actual_g = random_i32();
+	expected_b &= actual_b = random_i32();
+	rgb.and_reg(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
+	check_expected("rgbaint_t::and_reg");
+
+	// test RGB xor
+	expected_a ^= actual_a = random_i32();
+	expected_r ^= actual_r = random_i32();
+	expected_g ^= actual_g = random_i32();
+	expected_b ^= actual_b = random_i32();
+	rgb.xor_reg(rgbaint_t(actual_a, actual_r, actual_g, actual_b));
+	check_expected("rgbaint_t::xor_reg");
+
+	// test uniform or
+	imm = random_i32();
+	expected_a |= imm;
+	expected_r |= imm;
+	expected_g |= imm;
+	expected_b |= imm;
+	rgb.or_imm(imm);
+	check_expected("rgbaint_t::or_imm");
+
+	// test uniform and
+	imm = random_i32();
+	expected_a &= imm;
+	expected_r &= imm;
+	expected_g &= imm;
+	expected_b &= imm;
+	rgb.and_imm(imm);
+	check_expected("rgbaint_t::and_imm");
+
+	// test uniform xor
+	imm = random_i32();
+	expected_a ^= imm;
+	expected_r ^= imm;
+	expected_g ^= imm;
+	expected_b ^= imm;
+	rgb.xor_imm(imm);
+	check_expected("rgbaint_t::xor_imm");
+
+	// test immediate RGB or
+	expected_a |= actual_a = random_i32();
+	expected_r |= actual_r = random_i32();
+	expected_g |= actual_g = random_i32();
+	expected_b |= actual_b = random_i32();
+	rgb.or_imm_rgba(actual_a, actual_r, actual_g, actual_b);
+	check_expected("rgbaint_t::or_imm_rgba");
+
+	// test immediate RGB and
+	expected_a &= actual_a = random_i32();
+	expected_r &= actual_r = random_i32();
+	expected_g &= actual_g = random_i32();
+	expected_b &= actual_b = random_i32();
+	rgb.and_imm_rgba(actual_a, actual_r, actual_g, actual_b);
+	check_expected("rgbaint_t::and_imm_rgba");
+
+	// test immediate RGB xor
+	expected_a ^= actual_a = random_i32();
+	expected_r ^= actual_r = random_i32();
+	expected_g ^= actual_g = random_i32();
+	expected_b ^= actual_b = random_i32();
+	rgb.xor_imm_rgba(actual_a, actual_r, actual_g, actual_b);
+	check_expected("rgbaint_t::xor_imm_rgba");
+
+	// test 8-bit get
+	expected_a = INT32(UINT32(expected_a) & 0x00ff);
+	expected_r = INT32(UINT32(expected_r) & 0x00ff);
+	expected_g = INT32(UINT32(expected_g) & 0x00ff);
+	expected_b = INT32(UINT32(expected_b) & 0x00ff);
+	actual_a = INT32(UINT32(rgb.get_a()));
+	actual_r = INT32(UINT32(rgb.get_r()));
+	actual_g = INT32(UINT32(rgb.get_g()));
+	actual_b = INT32(UINT32(rgb.get_b()));
+	if (actual_a != expected_a) osd_printf_error("Error testing rgbaint_t::get_a() = %d (expected %d)\n", actual_a, expected_a);
+	if (actual_r != expected_r) osd_printf_error("Error testing rgbaint_t::get_r() = %d (expected %d)\n", actual_r, expected_r);
+	if (actual_g != expected_g) osd_printf_error("Error testing rgbaint_t::get_g() = %d (expected %d)\n", actual_g, expected_g);
+	if (actual_b != expected_b) osd_printf_error("Error testing rgbaint_t::get_b() = %d (expected %d)\n", actual_b, expected_b);
+
+	// test set from packed RGBA
+	imm = random_i32();
+	expected_a = INT32((UINT32(imm) >> 24) & 0x00ff);
+	expected_r = INT32((UINT32(imm) >> 16) & 0x00ff);
+	expected_g = INT32((UINT32(imm) >> 8) & 0x00ff);
+	expected_b = INT32((UINT32(imm) >> 0) & 0x00ff);
+	rgb.set(UINT32(imm));
+	check_expected("rgbaint_t::set(UINT32)");
+
+	// while we have a value loaded that we know doesn't exceed 8-bit range, check the non-clamping convert-to-rgba
+	packed = rgb.to_rgba();
+	if (UINT32(imm) != UINT32(packed))
+		osd_printf_error("Error testing rgbaint_t::to_rgba() = %08x (expected %08x)\n", UINT32(packed), UINT32(imm));
+
+	// test construct from packed RGBA and assign
+	imm = random_i32();
+	expected_a = INT32((UINT32(imm) >> 24) & 0x00ff);
+	expected_r = INT32((UINT32(imm) >> 16) & 0x00ff);
+	expected_g = INT32((UINT32(imm) >> 8) & 0x00ff);
+	expected_b = INT32((UINT32(imm) >> 0) & 0x00ff);
+	rgb = rgbaint_t(UINT32(imm));
+	check_expected("rgbaint_t(UINT32)");
+
+	// while we have a value loaded that we know doesn't exceed 8-bit range, check the non-clamping convert-to-rgba
+	packed = rgb.to_rgba();
+	if (UINT32(imm) != UINT32(packed))
+		osd_printf_error("Error testing rgbaint_t::to_rgba() = %08x (expected %08x)\n", UINT32(packed), UINT32(imm));
+
+	// test set with rgb_t
+	packed = random_u32();
+	expected_a = INT32(UINT32(packed.a()));
+	expected_r = INT32(UINT32(packed.r()));
+	expected_g = INT32(UINT32(packed.g()));
+	expected_b = INT32(UINT32(packed.b()));
+	rgb.set(packed);
+	check_expected("rgbaint_t::set(rgba_t)");
+
+	// test construct with rgb_t
+	packed = random_u32();
+	expected_a = INT32(UINT32(packed.a()));
+	expected_r = INT32(UINT32(packed.r()));
+	expected_g = INT32(UINT32(packed.g()));
+	expected_b = INT32(UINT32(packed.b()));
+	rgb = rgbaint_t(packed);
+	check_expected("rgbaint_t::set(rgba_t)");
+
+	// test clamping convert-to-rgba with hand-crafted values to catch edge cases
+	rgb.set(std::numeric_limits<INT32>::min(), -1, 0, 1);
+	packed = rgb.to_rgba_clamp();
+	if (UINT32(0x00000001) != UINT32(packed))
+		osd_printf_error("Error testing rgbaint_t::to_rgba_clamp() = %08x (expected 0x00000001)\n", UINT32(packed));
+	rgb.set(254, 255, 256, std::numeric_limits<INT32>::max());
+	packed = rgb.to_rgba_clamp();
+	if (UINT32(0xfeffffff) != UINT32(packed))
+		osd_printf_error("Error testing rgbaint_t::to_rgba_clamp() = %08x (expected 0xfeffffff)\n", UINT32(packed));
+	rgb.set(std::numeric_limits<INT32>::max(), std::numeric_limits<INT32>::min(), 256, -1);
+	packed = rgb.to_rgba_clamp();
+	if (UINT32(0xff00ff00) != UINT32(packed))
+		osd_printf_error("Error testing rgbaint_t::to_rgba_clamp() = %08x (expected 0xff00ff00)\n", UINT32(packed));
+	rgb.set(0, 255, 1, 254);
+	packed = rgb.to_rgba_clamp();
+	if (UINT32(0x00ff01fe) != UINT32(packed))
+		osd_printf_error("Error testing rgbaint_t::to_rgba_clamp() = %08x (expected 0x00ff01fe)\n", UINT32(packed));
+
+	// test in-place clamping with hand-crafted values to catch edge cases
+	expected_a = 0;
+	expected_r = 0;
+	expected_g = 0;
+	expected_b = 1;
+	rgb.set(std::numeric_limits<INT32>::min(), -1, 0, 1);
+	rgb.clamp_to_uint8();
+	check_expected("rgbaint_t::clamp_to_uint8");
+	expected_a = 254;
+	expected_r = 255;
+	expected_g = 255;
+	expected_b = 255;
+	rgb.set(254, 255, 256, std::numeric_limits<INT32>::max());
+	rgb.clamp_to_uint8();
+	check_expected("rgbaint_t::clamp_to_uint8");
+	expected_a = 255;
+	expected_r = 0;
+	expected_g = 255;
+	expected_b = 0;
+	rgb.set(std::numeric_limits<INT32>::max(), std::numeric_limits<INT32>::min(), 256, -1);
+	rgb.clamp_to_uint8();
+	check_expected("rgbaint_t::clamp_to_uint8");
+	expected_a = 0;
+	expected_r = 255;
+	expected_g = 1;
+	expected_b = 254;
+	rgb.set(0, 255, 1, 254);
+	rgb.clamp_to_uint8();
+	check_expected("rgbaint_t::clamp_to_uint8");
+
+	// test shift left
+	expected_a = (actual_a = random_i32()) << 19;
+	expected_r = (actual_r = random_i32()) << 3;
+	expected_g = (actual_g = random_i32()) << 21;
+	expected_b = (actual_b = random_i32()) << 6;
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb.shl(rgbaint_t(19, 3, 21, 6));
+	check_expected("rgbaint_t::shl");
+
+	// test shift left immediate
+	expected_a = (actual_a = random_i32()) << 7;
+	expected_r = (actual_r = random_i32()) << 7;
+	expected_g = (actual_g = random_i32()) << 7;
+	expected_b = (actual_b = random_i32()) << 7;
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb.shl_imm(7);
+	check_expected("rgbaint_t::shl_imm");
+
+	// test logical shift right
+	expected_a = INT32(UINT32(actual_a = random_i32()) >> 8);
+	expected_r = INT32(UINT32(actual_r = random_i32()) >> 18);
+	expected_g = INT32(UINT32(actual_g = random_i32()) >> 26);
+	expected_b = INT32(UINT32(actual_b = random_i32()) >> 4);
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb.shr(rgbaint_t(8, 18, 26, 4));
+	check_expected("rgbaint_t::shr");
+
+	// test logical shift right with opposite signs
+	expected_a = INT32(UINT32(actual_a = -actual_a) >> 21);
+	expected_r = INT32(UINT32(actual_r = -actual_r) >> 13);
+	expected_g = INT32(UINT32(actual_g = -actual_g) >> 11);
+	expected_b = INT32(UINT32(actual_b = -actual_b) >> 17);
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb.shr(rgbaint_t(21, 13, 11, 17));
+	check_expected("rgbaint_t::shr");
+
+	// test logical shift right immediate
+	expected_a = INT32(UINT32(actual_a = random_i32()) >> 5);
+	expected_r = INT32(UINT32(actual_r = random_i32()) >> 5);
+	expected_g = INT32(UINT32(actual_g = random_i32()) >> 5);
+	expected_b = INT32(UINT32(actual_b = random_i32()) >> 5);
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb.shr_imm(5);
+	check_expected("rgbaint_t::shr_imm");
+
+	// test logical shift right immediate with opposite signs
+	expected_a = INT32(UINT32(actual_a = -actual_a) >> 15);
+	expected_r = INT32(UINT32(actual_r = -actual_r) >> 15);
+	expected_g = INT32(UINT32(actual_g = -actual_g) >> 15);
+	expected_b = INT32(UINT32(actual_b = -actual_b) >> 15);
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb.shr_imm(15);
+	check_expected("rgbaint_t::shr_imm");
+
+	// test arithmetic shift right
+	expected_a = (actual_a = random_i32()) >> 16;
+	expected_r = (actual_r = random_i32()) >> 20;
+	expected_g = (actual_g = random_i32()) >> 14;
+	expected_b = (actual_b = random_i32()) >> 2;
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb.sra(rgbaint_t(16, 20, 14, 2));
+	check_expected("rgbaint_t::sra");
+
+	// test arithmetic shift right with opposite signs
+	expected_a = (actual_a = -actual_a) >> 1;
+	expected_r = (actual_r = -actual_r) >> 29;
+	expected_g = (actual_g = -actual_g) >> 10;
+	expected_b = (actual_b = -actual_b) >> 22;
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb.sra(rgbaint_t(1, 29, 10, 22));
+	check_expected("rgbaint_t::sra");
+
+	// test arithmetic shift right immediate (method)
+	expected_a = (actual_a = random_i32()) >> 12;
+	expected_r = (actual_r = random_i32()) >> 12;
+	expected_g = (actual_g = random_i32()) >> 12;
+	expected_b = (actual_b = random_i32()) >> 12;
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb.sra_imm(12);
+	check_expected("rgbaint_t::sra_imm");
+
+	// test arithmetic shift right immediate with opposite signs (method)
+	expected_a = (actual_a = -actual_a) >> 9;
+	expected_r = (actual_r = -actual_r) >> 9;
+	expected_g = (actual_g = -actual_g) >> 9;
+	expected_b = (actual_b = -actual_b) >> 9;
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb.sra_imm(9);
+	check_expected("rgbaint_t::sra_imm");
+
+	// test arithmetic shift right immediate (operator)
+	expected_a = (actual_a = random_i32()) >> 7;
+	expected_r = (actual_r = random_i32()) >> 7;
+	expected_g = (actual_g = random_i32()) >> 7;
+	expected_b = (actual_b = random_i32()) >> 7;
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb >>= 7;
+	check_expected("rgbaint_t::operator>>=");
+
+	// test arithmetic shift right immediate with opposite signs (operator)
+	expected_a = (actual_a = -actual_a) >> 11;
+	expected_r = (actual_r = -actual_r) >> 11;
+	expected_g = (actual_g = -actual_g) >> 11;
+	expected_b = (actual_b = -actual_b) >> 11;
+	rgb.set(actual_a, actual_r, actual_g, actual_b);
+	rgb >>= 11;
+	check_expected("rgbaint_t::operator>>=");
+}
+
+
 //-------------------------------------------------
 //  validate_driver - validate basic driver
 //  information
@@ -1024,42 +1580,45 @@ void validity_checker::output_callback(osd_output_channel channel, const char *m
 	std::string output;
 	switch (channel)
 	{
-		case OSD_OUTPUT_CHANNEL_ERROR:
-			// count the error
-			m_errors++;
+	case OSD_OUTPUT_CHANNEL_ERROR:
+		// count the error
+		m_errors++;
 
-			// output the source(driver) device 'tag'
-			build_output_prefix(output);
+		// output the source(driver) device 'tag'
+		build_output_prefix(output);
 
-			// generate the string
-			strcatvprintf(output, msg, args);
-			m_error_text.append(output);
-			break;
-		case OSD_OUTPUT_CHANNEL_WARNING:
-			// count the error
-			m_warnings++;
+		// generate the string
+		strcatvprintf(output, msg, args);
+		m_error_text.append(output);
+		break;
 
-			// output the source(driver) device 'tag'
-			build_output_prefix(output);
+	case OSD_OUTPUT_CHANNEL_WARNING:
+		// count the error
+		m_warnings++;
 
-			// generate the string and output to the original target
-			strcatvprintf(output, msg, args);
-			m_warning_text.append(output);
-			break;
-		case OSD_OUTPUT_CHANNEL_VERBOSE:
-			// if we're not verbose, skip it
-			if (!m_print_verbose) break;
+		// output the source(driver) device 'tag'
+		build_output_prefix(output);
 
-			// output the source(driver) device 'tag'
-			build_output_prefix(output);
+		// generate the string and output to the original target
+		strcatvprintf(output, msg, args);
+		m_warning_text.append(output);
+		break;
 
-			// generate the string and output to the original target
-			strcatvprintf(output, msg, args);
-			m_verbose_text.append(output);
-			break;
-		default:
-			chain_output(channel, msg, args);
-			break;
+	case OSD_OUTPUT_CHANNEL_VERBOSE:
+		// if we're not verbose, skip it
+		if (!m_print_verbose) break;
+
+		// output the source(driver) device 'tag'
+		build_output_prefix(output);
+
+		// generate the string and output to the original target
+		strcatvprintf(output, msg, args);
+		m_verbose_text.append(output);
+		break;
+
+	default:
+		chain_output(channel, msg, args);
+		break;
 	}
 }
 
@@ -1075,7 +1634,7 @@ void validity_checker::output_via_delegate(osd_output_channel channel, const cha
 
 	// call through to the delegate with the proper parameters
 	va_start(argptr, format);
-	this->chain_output(channel, format, argptr);
+	chain_output(channel, format, argptr);
 	va_end(argptr);
 }
 
diff --git a/src/emu/validity.h b/src/emu/validity.h
index cca0eda18da..a25a922e7b6 100644
--- a/src/emu/validity.h
+++ b/src/emu/validity.h
@@ -8,10 +8,10 @@
 
 ***************************************************************************/
 
-#pragma once
+#ifndef MAME_EMU_VALIDITY_H
+#define MAME_EMU_VALIDITY_H
 
-#ifndef __VALIDITY_H__
-#define __VALIDITY_H__
+#pragma once
 
 #include "emu.h"
 #include "drivenum.h"
@@ -28,10 +28,6 @@ class machine_config;
 // core validity checker class
 class validity_checker : public osd_output
 {
-	// internal map types
-	typedef std::unordered_map<std::string,const game_driver *> game_driver_map;
-	typedef std::unordered_map<std::string,FPTR> int_map;
-
 public:
 	validity_checker(emu_options &options);
 	~validity_checker();
@@ -63,6 +59,10 @@ protected:
 	virtual void output_callback(osd_output_channel channel, const char *msg, va_list args) override;
 
 private:
+	// internal map types
+	typedef std::unordered_map<std::string,const game_driver *> game_driver_map;
+	typedef std::unordered_map<std::string,FPTR> int_map;
+
 	// internal helpers
 	const char *ioport_string_from_index(UINT32 index);
 	int get_defstr_index(const char *string, bool suppress_error = false);
@@ -75,6 +75,7 @@ private:
 	// internal sub-checks
 	void validate_core();
 	void validate_inlines();
+	void validate_rgb();
 	void validate_driver();
 	void validate_roms();
 	void validate_analog_input_field(ioport_field &field);
@@ -88,6 +89,12 @@ private:
 	void output_via_delegate(osd_output_channel channel, const char *format, ...) ATTR_PRINTF(3,4);
 	void output_indented_errors(std::string &text, const char *header);
 
+	// random number generation
+	INT32 random_i32();
+	UINT32 random_u32();
+	INT64 random_i64();
+	UINT64 random_u64();
+
 	// internal driver list
 	driver_enumerator       m_drivlist;
 
@@ -115,4 +122,4 @@ private:
 	bool                    m_validate_all;
 };
 
-#endif
+#endif // MAME_EMU_VALIDITY_H
diff --git a/src/emu/video/rgbgen.h b/src/emu/video/rgbgen.h
index 42014d14106..22cd253b3f8 100644
--- a/src/emu/video/rgbgen.h
+++ b/src/emu/video/rgbgen.h
@@ -8,8 +8,8 @@
 
 ***************************************************************************/
 
-#ifndef __RGBGEN__
-#define __RGBGEN__
+#ifndef MAME_EMU_VIDEO_RGBGEN_H
+#define MAME_EMU_VIDEO_RGBGEN_H
 
 
 /***************************************************************************
@@ -19,36 +19,48 @@
 class rgbaint_t
 {
 public:
-	inline rgbaint_t(): m_a(0), m_r(0), m_g(0), m_b(0) { }
-	inline rgbaint_t(UINT32 rgba) { set(rgba); }
-	inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
-	inline rgbaint_t(const rgb_t& rgba) { set(rgba); }
+	rgbaint_t(): m_a(0), m_r(0), m_g(0), m_b(0) { }
+	explicit rgbaint_t(UINT32 rgba) { set(rgba); }
+	rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
+	explicit rgbaint_t(const rgb_t& rgba) { set(rgba); }
 
-	inline void set(rgbaint_t& other) { set(other.m_a, other.m_r, other.m_g, other.m_b); }
-	inline void set(UINT32 rgba) { set((rgba >> 24) & 0xff, (rgba >> 16) & 0xff, (rgba >> 8) & 0xff, rgba & 0xff); }
-	inline void set(INT32 a, INT32 r, INT32 g, INT32 b)
+	void set(const rgbaint_t& other) { set(other.m_a, other.m_r, other.m_g, other.m_b); }
+	void set(UINT32 rgba) { set((rgba >> 24) & 0xff, (rgba >> 16) & 0xff, (rgba >> 8) & 0xff, rgba & 0xff); }
+	void set(INT32 a, INT32 r, INT32 g, INT32 b)
 	{
 		m_a = a;
 		m_r = r;
 		m_g = g;
 		m_b = b;
 	}
-	inline void set(const rgb_t& rgba) { set(rgba.a(), rgba.r(), rgba.g(), rgba.b()); }
+	void set(const rgb_t& rgba) { set(rgba.a(), rgba.r(), rgba.g(), rgba.b()); }
 
-	inline rgb_t to_rgba() const
-	{
-		return rgb_t(m_a, m_r, m_g, m_b);
-	}
+	rgb_t to_rgba() const { return rgb_t(get_a(), get_r(), get_g(), get_b()); }
 
-	inline rgb_t to_rgba_clamp() const
+	rgb_t to_rgba_clamp() const
 	{
-		UINT8 a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a;
-		UINT8 r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r;
-		UINT8 g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g;
-		UINT8 b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b;
+		const UINT8 a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a;
+		const UINT8 r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r;
+		const UINT8 g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g;
+		const UINT8 b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b;
 		return rgb_t(a, r, g, b);
 	}
 
+	void set_a(const INT32 value) { m_a = value; }
+	void set_r(const INT32 value) { m_r = value; }
+	void set_g(const INT32 value) { m_g = value; }
+	void set_b(const INT32 value) { m_b = value; }
+
+	UINT8 get_a() const { return UINT8(UINT32(m_a)); }
+	UINT8 get_r() const { return UINT8(UINT32(m_r)); }
+	UINT8 get_g() const { return UINT8(UINT32(m_g)); }
+	UINT8 get_b() const { return UINT8(UINT32(m_b)); }
+
+	INT32 get_a32() const { return m_a; }
+	INT32 get_r32() const { return m_r; }
+	INT32 get_g32() const { return m_g; }
+	INT32 get_b32() const { return m_b; }
+
 	inline void add(const rgbaint_t& color)
 	{
 		add_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
@@ -85,7 +97,7 @@ public:
 		m_b -= b;
 	}
 
-	inline void subr(rgbaint_t& color)
+	inline void subr(const rgbaint_t& color)
 	{
 		subr_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
 	}
@@ -103,67 +115,7 @@ public:
 		m_b = b - m_b;
 	}
 
-	inline void set_a(const INT32 value)
-	{
-		m_a = value;
-	}
-
-	inline void set_r(const INT32 value)
-	{
-		m_r = value;
-	}
-
-	inline void set_g(const INT32 value)
-	{
-		m_g = value;
-	}
-
-	inline void set_b(const INT32 value)
-	{
-		m_b = value;
-	}
-
-	inline UINT8 get_a() const
-	{
-		return m_a;
-	}
-
-	inline UINT8 get_r() const
-	{
-		return m_r;
-	}
-
-	inline UINT8 get_g() const
-	{
-		return m_g;
-	}
-
-	inline UINT8 get_b() const
-	{
-		return m_b;
-	}
-
-	inline INT32 get_a32() const
-	{
-		return m_a;
-	}
-
-	inline INT32 get_r32() const
-	{
-		return m_r;
-	}
-
-	inline INT32 get_g32() const
-	{
-		return m_g;
-	}
-
-	inline INT32 get_b32() const
-	{
-		return m_b;
-	}
-
-	inline void mul(rgbaint_t& color)
+	inline void mul(const rgbaint_t& color)
 	{
 		mul_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
 	}
@@ -202,10 +154,10 @@ public:
 
 	inline void shr(const rgbaint_t& shift)
 	{
-		m_a >>= shift.m_a;
-		m_r >>= shift.m_r;
-		m_g >>= shift.m_g;
-		m_b >>= shift.m_b;
+		m_a = INT32(UINT32(m_a) >> shift.m_a);
+		m_r = INT32(UINT32(m_r) >> shift.m_r);
+		m_g = INT32(UINT32(m_g) >> shift.m_g);
+		m_b = INT32(UINT32(m_b) >> shift.m_b);
 	}
 
 	inline void shr_imm(const UINT8 shift)
@@ -213,10 +165,10 @@ public:
 		if (shift == 0)
 			return;
 
-		m_a >>= shift;
-		m_r >>= shift;
-		m_g >>= shift;
-		m_b >>= shift;
+		m_a = INT32(UINT32(m_a) >> shift);
+		m_r = INT32(UINT32(m_r) >> shift);
+		m_g = INT32(UINT32(m_g) >> shift);
+		m_b = INT32(UINT32(m_b) >> shift);
 	}
 
 	inline void sra(const rgbaint_t& shift)
@@ -321,22 +273,12 @@ public:
 
 	inline void clamp_and_clear(const UINT32 sign)
 	{
-		if (m_a & sign)
-			m_a = 0;
+		if (m_a & sign) m_a = 0;
+		if (m_r & sign) m_r = 0;
+		if (m_g & sign) m_g = 0;
+		if (m_b & sign) m_b = 0;
 
-		if (m_r & sign)
-			m_r = 0;
-
-		if (m_g & sign)
-			m_g = 0;
-
-		if (m_b & sign)
-			m_b = 0;
-
-		m_a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a;
-		m_r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r;
-		m_g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g;
-		m_b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b;
+		clamp_to_uint8();
 	}
 
 	inline void clamp_to_uint8()
@@ -370,6 +312,14 @@ public:
 		m_b = (m_b > value) ? value : m_b;
 	}
 
+	inline void max(const INT32 value)
+	{
+		m_a = (m_a < value) ? value : m_a;
+		m_r = (m_r < value) ? value : m_r;
+		m_g = (m_g < value) ? value : m_g;
+		m_b = (m_b < value) ? value : m_b;
+	}
+
 	void blend(const rgbaint_t& other, UINT8 factor);
 
 	void scale_and_clamp(const rgbaint_t& scale);
@@ -426,35 +376,50 @@ public:
 		m_b = (m_b < value) ? 0xffffffff : 0;
 	}
 
-	inline void merge_alpha(rgbaint_t& alpha)
+	inline void merge_alpha(const rgbaint_t& alpha)
 	{
 		m_a = alpha.m_a;
 	}
 
-	inline rgbaint_t operator=(const rgbaint_t& other)
+	rgbaint_t &operator=(const rgbaint_t& other)
 	{
-		m_a = other.m_a;
-		m_r = other.m_r;
-		m_g = other.m_g;
-		m_b = other.m_b;
+		set(other.m_a, other.m_r, other.m_g, other.m_b);
 		return *this;
 	}
 
-	inline rgbaint_t& operator+=(const rgbaint_t& other)
+	rgbaint_t& operator+=(const rgbaint_t& other)
 	{
-		m_a += other.m_a;
-		m_r += other.m_r;
-		m_g += other.m_g;
-		m_b += other.m_b;
+		add_imm_rgba(other.m_a, other.m_r, other.m_g, other.m_b);
 		return *this;
 	}
 
-	inline rgbaint_t& operator+=(const INT32 other)
+	rgbaint_t& operator+=(const INT32 other)
 	{
-		m_a += other;
-		m_r += other;
-		m_g += other;
-		m_b += other;
+		add_imm_rgba(other, other, other, other);
+		return *this;
+	}
+
+	rgbaint_t &operator-=(const rgbaint_t& other)
+	{
+		sub_imm_rgba(other.m_a, other.m_r, other.m_g, other.m_b);
+		return *this;
+	}
+
+	rgbaint_t& operator*=(const rgbaint_t& other)
+	{
+		mul_imm_rgba(other.m_a, other.m_r, other.m_g, other.m_b);
+		return *this;
+	}
+
+	rgbaint_t& operator*=(const INT32 other)
+	{
+		mul_imm_rgba(other, other, other, other);
+		return *this;
+	}
+
+	rgbaint_t& operator>>=(const INT32 shift)
+	{
+		sra_imm(shift);
 		return *this;
 	}
 
@@ -477,7 +442,7 @@ public:
 		return ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff);
 	}
 
-	inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
+	void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
 	{
 		UINT32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
 		UINT32 rb1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8);
@@ -504,4 +469,4 @@ protected:
 	INT32 m_b;
 };
 
-#endif /* __RGBGEN__ */
+#endif // MAME_EMU_VIDEO_RGBGEN_H
diff --git a/src/emu/video/rgbsse.cpp b/src/emu/video/rgbsse.cpp
index ed20479fd9e..a26abed5609 100644
--- a/src/emu/video/rgbsse.cpp
+++ b/src/emu/video/rgbsse.cpp
@@ -166,13 +166,13 @@ const struct rgbaint_t::_statics rgbaint_t::statics =
 
 void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor)
 {
-	__m128i scale1 = _mm_set1_epi32(factor);
-	__m128i scale2 = _mm_sub_epi32(_mm_set1_epi32(0x100), scale1);
+	const __m128i scale1 = _mm_set1_epi32(factor);
+	const rgbaint_t scale2(_mm_sub_epi32(_mm_set1_epi32(0x100), scale1));
 
 	rgbaint_t scaled_other(other);
 	scaled_other.mul(scale2);
 
-	mul(scale1);
+	mul(rgbaint_t(scale1));
 	add(scaled_other);
 	sra_imm(8);
 }
diff --git a/src/emu/video/rgbsse.h b/src/emu/video/rgbsse.h
index 99e99ddcde2..6b7c1bd6043 100644
--- a/src/emu/video/rgbsse.h
+++ b/src/emu/video/rgbsse.h
@@ -10,10 +10,16 @@
 
 ***************************************************************************/
 
-#ifndef __RGBSSE__
-#define __RGBSSE__
+#ifndef MAME_EMU_VIDEO_RGBSSE_H
+#define MAME_EMU_VIDEO_RGBSSE_H
+
+#pragma once
 
 #include <emmintrin.h>
+#ifdef __SSE4_1__
+#include <smmintrin.h>
+#endif
+
 
 /***************************************************************************
     TYPE DEFINITIONS
@@ -22,16 +28,16 @@
 class rgbaint_t
 {
 public:
-	inline rgbaint_t() { }
-	inline rgbaint_t(UINT32 rgba) { set(rgba); }
-	inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
-	inline rgbaint_t(const rgb_t& rgb) { set(rgb); }
-	inline rgbaint_t(__m128i rgba) { m_value = rgba; }
+	rgbaint_t() { }
+	explicit rgbaint_t(UINT32 rgba) { set(rgba); }
+	rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
+	explicit rgbaint_t(const rgb_t& rgb) { set(rgb); }
+	explicit rgbaint_t(__m128i rgba) { m_value = rgba; }
 
-	inline void set(rgbaint_t& other) { m_value = other.m_value; }
-	inline void set(UINT32 rgba) { m_value = _mm_and_si128(_mm_set1_epi32(0xff), _mm_set_epi32(rgba >> 24, rgba >> 16, rgba >> 8, rgba)); }
-	inline void set(INT32 a, INT32 r, INT32 g, INT32 b) { m_value = _mm_set_epi32(a, r, g, b); }
-	inline void set(const rgb_t& rgb) { m_value = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(rgb), _mm_setzero_si128()), _mm_setzero_si128()); }
+	void set(const rgbaint_t& other) { m_value = other.m_value; }
+	void set(UINT32 rgba) { m_value = _mm_and_si128(_mm_set1_epi32(0xff), _mm_set_epi32(rgba >> 24, rgba >> 16, rgba >> 8, rgba)); }
+	void set(INT32 a, INT32 r, INT32 g, INT32 b) { m_value = _mm_set_epi32(a, r, g, b); }
+	void set(const rgb_t& rgb) { m_value = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(rgb), _mm_setzero_si128()), _mm_setzero_si128()); }
 
 	inline rgb_t to_rgba() const
 	{
@@ -43,6 +49,35 @@ public:
 		return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128()));
 	}
 
+#ifdef __SSE4_1__
+	void set_a(const INT32 value) { m_value = _mm_insert_epi32(m_value, value, 3); }
+	void set_r(const INT32 value) { m_value = _mm_insert_epi32(m_value, value, 2); }
+	void set_g(const INT32 value) { m_value = _mm_insert_epi32(m_value, value, 1); }
+	void set_b(const INT32 value) { m_value = _mm_insert_epi32(m_value, value, 0); }
+#else
+	void set_a(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, alpha_mask()), _mm_set_epi32(value, 0, 0, 0)); }
+	void set_r(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, red_mask()), _mm_set_epi32(0, value, 0, 0)); }
+	void set_g(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, green_mask()), _mm_set_epi32(0, 0, value, 0)); }
+	void set_b(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, blue_mask()), _mm_set_epi32(0, 0, 0, value)); }
+#endif
+
+	UINT8 get_a() const { return UINT8(unsigned(_mm_extract_epi16(m_value, 6))); }
+	UINT8 get_r() const { return UINT8(unsigned(_mm_extract_epi16(m_value, 4))); }
+	UINT8 get_g() const { return UINT8(unsigned(_mm_extract_epi16(m_value, 2))); }
+	UINT8 get_b() const { return UINT8(unsigned(_mm_extract_epi16(m_value, 0))); }
+
+#ifdef __SSE4_1__
+	INT32 get_a32() const { return _mm_extract_epi32(m_value, 3); }
+	INT32 get_r32() const { return _mm_extract_epi32(m_value, 2); }
+	INT32 get_g32() const { return _mm_extract_epi32(m_value, 1); }
+	INT32 get_b32() const { return _mm_extract_epi32(m_value, 0); }
+#else
+	INT32 get_a32() const { return (_mm_extract_epi16(m_value, 7) << 16) | _mm_extract_epi16(m_value, 6); }
+	INT32 get_r32() const { return (_mm_extract_epi16(m_value, 5) << 16) | _mm_extract_epi16(m_value, 4); }
+	INT32 get_g32() const { return (_mm_extract_epi16(m_value, 3) << 16) | _mm_extract_epi16(m_value, 2); }
+	INT32 get_b32() const { return (_mm_extract_epi16(m_value, 1) << 16) | _mm_extract_epi16(m_value, 0); }
+#endif
+
 	inline void add(const rgbaint_t& color2)
 	{
 		m_value = _mm_add_epi32(m_value, color2.m_value);
@@ -73,7 +108,7 @@ public:
 		m_value = _mm_sub_epi32(m_value, _mm_set_epi32(a, r, g, b));
 	}
 
-	inline void subr(rgbaint_t& color2)
+	inline void subr(const rgbaint_t& color2)
 	{
 		m_value = _mm_sub_epi32(color2.m_value, m_value);
 	}
@@ -88,66 +123,6 @@ public:
 		m_value = _mm_sub_epi32(_mm_set_epi32(a, r, g, b), m_value);
 	}
 
-	inline void set_a(const INT32 value)
-	{
-		m_value = _mm_or_si128(_mm_and_si128(m_value, alpha_mask()), _mm_set_epi32(value, 0, 0, 0));
-	}
-
-	inline void set_r(const INT32 value)
-	{
-		m_value = _mm_or_si128(_mm_and_si128(m_value, red_mask()), _mm_set_epi32(0, value, 0, 0));
-	}
-
-	inline void set_g(const INT32 value)
-	{
-		m_value = _mm_or_si128(_mm_and_si128(m_value, green_mask()), _mm_set_epi32(0, 0, value, 0));
-	}
-
-	inline void set_b(const INT32 value)
-	{
-		m_value = _mm_or_si128(_mm_and_si128(m_value, blue_mask()), _mm_set_epi32(0, 0, 0, value));
-	}
-
-	inline UINT8 get_a() const
-	{
-		return _mm_extract_epi16(m_value, 6);
-	}
-
-	inline UINT8 get_r() const
-	{
-		return _mm_extract_epi16(m_value, 4);
-	}
-
-	inline UINT8 get_g() const
-	{
-		return _mm_extract_epi16(m_value, 2);
-	}
-
-	inline UINT8 get_b() const
-	{
-		return _mm_extract_epi16(m_value, 0);
-	}
-
-	inline INT32 get_a32() const
-	{
-		return (_mm_extract_epi16(m_value, 7) << 16) | _mm_extract_epi16(m_value, 6);
-	}
-
-	inline INT32 get_r32() const
-	{
-		return (_mm_extract_epi16(m_value, 5) << 16) | _mm_extract_epi16(m_value, 4);
-	}
-
-	inline INT32 get_g32() const
-	{
-		return (_mm_extract_epi16(m_value, 3) << 16) | _mm_extract_epi16(m_value, 2);
-	}
-
-	inline INT32 get_b32() const
-	{
-		return (_mm_extract_epi16(m_value, 1) << 16) | _mm_extract_epi16(m_value, 0);
-	}
-
 	inline void mul(const rgbaint_t& color)
 	{
 		__m128i tmp1 = _mm_mul_epu32(m_value, color.m_value);
@@ -414,7 +389,7 @@ public:
 		m_value = _mm_cmplt_epi32(m_value, _mm_set_epi32(a, r, g, b));
 	}
 
-	inline rgbaint_t operator=(const rgbaint_t& other)
+	inline rgbaint_t &operator=(const rgbaint_t& other)
 	{
 		m_value = other.m_value;
 		return *this;
@@ -459,8 +434,12 @@ public:
 
 	inline void merge_alpha(const rgbaint_t& alpha)
 	{
+#ifdef __SSE4_1__
+		m_value = _mm_insert_epi32(m_value, _mm_extract_epi32(alpha.m_value, 3), 3);
+#else
 		m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 7), 7);
 		m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 6), 6);
+#endif
 	}
 
 	static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
@@ -487,7 +466,7 @@ public:
 		return _mm_cvtsi128_si32(color01);
 	}
 
-	inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
+	void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
 	{
 		__m128i color00 = _mm_cvtsi32_si128(rgb00);
 		__m128i color01 = _mm_cvtsi32_si128(rgb01);
@@ -519,11 +498,11 @@ protected:
 		INT16   scale_table[256][8];
 	};
 
-	static inline __m128i alpha_mask() { return *(__m128i *)&statics.alpha_mask[0]; }
-	static inline __m128i red_mask() { return *(__m128i *)&statics.red_mask[0]; }
-	static inline __m128i green_mask() { return *(__m128i *)&statics.green_mask[0]; }
-	static inline __m128i blue_mask() { return *(__m128i *)&statics.blue_mask[0]; }
-	static inline __m128i scale_factor(UINT8 index) { return *(__m128i *)&statics.scale_table[index][0]; }
+	static __m128i alpha_mask() { return *(__m128i *)&statics.alpha_mask[0]; }
+	static __m128i red_mask() { return *(__m128i *)&statics.red_mask[0]; }
+	static __m128i green_mask() { return *(__m128i *)&statics.green_mask[0]; }
+	static __m128i blue_mask() { return *(__m128i *)&statics.blue_mask[0]; }
+	static __m128i scale_factor(UINT8 index) { return *(__m128i *)&statics.scale_table[index][0]; }
 
 	__m128i m_value;
 
diff --git a/src/emu/video/rgbutil.h b/src/emu/video/rgbutil.h
index 15def4da2c0..e0ccd299a4e 100644
--- a/src/emu/video/rgbutil.h
+++ b/src/emu/video/rgbutil.h
@@ -9,10 +9,10 @@
 
 ***************************************************************************/
 
-#ifndef __RGBUTIL__
-#define __RGBUTIL__
+#ifndef MAME_EMU_VIDEO_RGBUTIL_H
+#define MAME_EMU_VIDEO_RGBUTIL_H
 
-/* use SSE on 64-bit implementations, where it can be assumed */
+// use SSE on 64-bit implementations, where it can be assumed
 #if (!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64)
 #include "rgbsse.h"
 #elif defined(__ALTIVEC__)
@@ -21,4 +21,4 @@
 #include "rgbgen.h"
 #endif
 
-#endif /* __RGBUTIL__ */
+#endif // MAME_EMU_VIDEO_RGBUTIL_H
diff --git a/src/emu/video/rgbvmx.h b/src/emu/video/rgbvmx.h
index e034261461e..4aca3ff8fa9 100644
--- a/src/emu/video/rgbvmx.h
+++ b/src/emu/video/rgbvmx.h
@@ -28,15 +28,15 @@ protected:
 	typedef __vector unsigned int   VECU32;
 
 public:
-	inline rgbaint_t() { set(0, 0, 0, 0); }
-	inline rgbaint_t(UINT32 rgba) { set(rgba); }
-	inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
-	inline rgbaint_t(const rgb_t& rgb) { set(rgb); }
-	inline rgbaint_t(VECS32 rgba) : m_value(rgba) { }
+	rgbaint_t() { set(0, 0, 0, 0); }
+	explicit rgbaint_t(UINT32 rgba) { set(rgba); }
+	rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
+	explicit rgbaint_t(const rgb_t& rgb) { set(rgb); }
+	explicit rgbaint_t(VECS32 rgba) : m_value(rgba) { }
 
-	inline void set(rgbaint_t& other) { m_value = other.m_value; }
+	void set(const rgbaint_t& other) { m_value = other.m_value; }
 
-	inline void set(UINT32 rgba)
+	void set(UINT32 rgba)
 	{
 		const VECU32 zero = { 0, 0, 0, 0 };
 #ifdef __LITTLE_ENDIAN__
@@ -48,7 +48,7 @@ public:
 #endif
 	}
 
-	inline void set(INT32 a, INT32 r, INT32 g, INT32 b)
+	void set(INT32 a, INT32 r, INT32 g, INT32 b)
 	{
 #ifdef __LITTLE_ENDIAN__
 		const VECS32 result = { b, g, r, a };
@@ -58,7 +58,7 @@ public:
 		m_value = result;
 	}
 
-	inline void set(const rgb_t& rgb)
+	void set(const rgb_t& rgb)
 	{
 		const VECU32 zero = { 0, 0, 0, 0 };
 #ifdef __LITTLE_ENDIAN__
@@ -88,6 +88,118 @@ public:
 		return result;
 	}
 
+	void set_a(const INT32 value)
+	{
+		const VECS32 temp = { value, value, value, value };
+		m_value = vec_perm(m_value, temp, alpha_perm);
+	}
+
+	void set_r(const INT32 value)
+	{
+		const VECS32 temp = { value, value, value, value };
+		m_value = vec_perm(m_value, temp, red_perm);
+	}
+
+	void set_g(const INT32 value)
+	{
+		const VECS32 temp = { value, value, value, value };
+		m_value = vec_perm(m_value, temp, green_perm);
+	}
+
+	void set_b(const INT32 value)
+	{
+		const VECS32 temp = { value, value, value, value };
+		m_value = vec_perm(m_value, temp, blue_perm);
+	}
+
+	UINT8 get_a() const
+	{
+		UINT8 result;
+#ifdef __LITTLE_ENDIAN__
+		vec_ste(vec_splat(VECU8(m_value), 12), 0, &result);
+#else
+		vec_ste(vec_splat(VECU8(m_value), 3), 0, &result);
+#endif
+		return result;
+	}
+
+	UINT8 get_r() const
+	{
+		UINT8 result;
+#ifdef __LITTLE_ENDIAN__
+		vec_ste(vec_splat(VECU8(m_value), 8), 0, &result);
+#else
+		vec_ste(vec_splat(VECU8(m_value), 7), 0, &result);
+#endif
+		return result;
+	}
+
+	UINT8 get_g() const
+	{
+		UINT8 result;
+#ifdef __LITTLE_ENDIAN__
+		vec_ste(vec_splat(VECU8(m_value), 4), 0, &result);
+#else
+		vec_ste(vec_splat(VECU8(m_value), 11), 0, &result);
+#endif
+		return result;
+	}
+
+	UINT8 get_b() const
+	{
+		UINT8 result;
+#ifdef __LITTLE_ENDIAN__
+		vec_ste(vec_splat(VECU8(m_value), 0), 0, &result);
+#else
+		vec_ste(vec_splat(VECU8(m_value), 15), 0, &result);
+#endif
+		return result;
+	}
+
+	INT32 get_a32() const
+	{
+		INT32 result;
+#ifdef __LITTLE_ENDIAN__
+		vec_ste(vec_splat(m_value, 3), 0, &result);
+#else
+		vec_ste(vec_splat(m_value, 0), 0, &result);
+#endif
+		return result;
+	}
+
+	INT32 get_r32() const
+	{
+		INT32 result;
+#ifdef __LITTLE_ENDIAN__
+		vec_ste(vec_splat(m_value, 2), 0, &result);
+#else
+		vec_ste(vec_splat(m_value, 1), 0, &result);
+#endif
+		return result;
+	}
+
+	INT32 get_g32() const
+	{
+		INT32 result;
+#ifdef __LITTLE_ENDIAN__
+		vec_ste(vec_splat(m_value, 1), 0, &result);
+#else
+		vec_ste(vec_splat(m_value, 2), 0, &result);
+#endif
+		return result;
+	}
+
+	INT32 get_b32() const
+	{
+		INT32 result;
+#ifdef __LITTLE_ENDIAN__
+		vec_ste(vec_splat(m_value, 0), 0, &result);
+#else
+		vec_ste(vec_splat(m_value, 3), 0, &result);
+#endif
+		return result;
+	}
+
 	inline void add(const rgbaint_t& color2)
 	{
 		m_value = vec_add(m_value, color2.m_value);
@@ -130,7 +242,7 @@ public:
 		m_value = vec_sub(m_value, temp);
 	}
 
-	inline void subr(rgbaint_t& color2)
+	inline void subr(const rgbaint_t& color2)
 	{
 		m_value = vec_sub(color2.m_value, m_value);
 	}
@@ -151,118 +263,6 @@ public:
 		m_value = vec_sub(temp, m_value);
 	}
 
-	inline void set_a(const INT32 value)
-	{
-		const VECS32 temp = { value, value, value, value };
-		m_value = vec_perm(m_value, temp, alpha_perm);
-	}
-
-	inline void set_r(const INT32 value)
-	{
-		const VECS32 temp = { value, value, value, value };
-		m_value = vec_perm(m_value, temp, red_perm);
-	}
-
-	inline void set_g(const INT32 value)
-	{
-		const VECS32 temp = { value, value, value, value };
-		m_value = vec_perm(m_value, temp, green_perm);
-	}
-
-	inline void set_b(const INT32 value)
-	{
-		const VECS32 temp = { value, value, value, value };
-		m_value = vec_perm(m_value, temp, blue_perm);
-	}
-
-	inline UINT8 get_a() const
-	{
-		UINT8 result;
-#ifdef __LITTLE_ENDIAN__
-		vec_ste(vec_splat(VECU8(m_value), 12), 0, &result);
-#else
-		vec_ste(vec_splat(VECU8(m_value), 3), 0, &result);
-#endif
-		return result;
-	}
-
-	inline UINT8 get_r() const
-	{
-		UINT8 result;
-#ifdef __LITTLE_ENDIAN__
-		vec_ste(vec_splat(VECU8(m_value), 8), 0, &result);
-#else
-		vec_ste(vec_splat(VECU8(m_value), 7), 0, &result);
-#endif
-		return result;
-	}
-
-	inline UINT8 get_g() const
-	{
-		UINT8 result;
-#ifdef __LITTLE_ENDIAN__
-		vec_ste(vec_splat(VECU8(m_value), 4), 0, &result);
-#else
-		vec_ste(vec_splat(VECU8(m_value), 11), 0, &result);
-#endif
-		return result;
-	}
-
-	inline UINT8 get_b() const
-	{
-		UINT8 result;
-#ifdef __LITTLE_ENDIAN__
-		vec_ste(vec_splat(VECU8(m_value), 0), 0, &result);
-#else
-		vec_ste(vec_splat(VECU8(m_value), 15), 0, &result);
-#endif
-		return result;
-	}
-
-	inline INT32 get_a32() const
-	{
-		INT32 result;
-#ifdef __LITTLE_ENDIAN__
-		vec_ste(vec_splat(m_value, 3), 0, &result);
-#else
-		vec_ste(vec_splat(m_value, 0), 0, &result);
-#endif
-		return result;
-	}
-
-	inline INT32 get_r32() const
-	{
-		INT32 result;
-#ifdef __LITTLE_ENDIAN__
-		vec_ste(vec_splat(m_value, 2), 0, &result);
-#else
-		vec_ste(vec_splat(m_value, 1), 0, &result);
-#endif
-		return result;
-	}
-
-	inline INT32 get_g32() const
-	{
-		INT32 result;
-#ifdef __LITTLE_ENDIAN__
-		vec_ste(vec_splat(m_value, 1), 0, &result);
-#else
-		vec_ste(vec_splat(m_value, 2), 0, &result);
-#endif
-		return result;
-	}
-
-	inline INT32 get_b32() const
-	{
-		INT32 result;
-#ifdef __LITTLE_ENDIAN__
-		vec_ste(vec_splat(m_value, 0), 0, &result);
-#else
-		vec_ste(vec_splat(m_value, 3), 0, &result);
-#endif
-		return result;
-	}
-
 	inline void mul(const rgbaint_t& color)
 	{
 		const VECU32 shift = vec_splat_u32(-16);
@@ -545,7 +545,7 @@ public:
 		m_value = VECS32(vec_cmplt(m_value, temp));
 	}
 
-	inline rgbaint_t operator=(const rgbaint_t& other)
+	inline rgbaint_t &operator=(const rgbaint_t& other)
 	{
 		m_value = other.m_value;
 		return *this;
@@ -607,7 +607,7 @@ public:
 		m_value = vec_perm(m_value, alpha.m_value, alpha_perm);
 	}
 
-	static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
+	static UINT32 bilinear_filter(const UINT32 &rgb00, const UINT32 &rgb01, const UINT32 &rgb10, const UINT32 &rgb11, UINT8 u, UINT8 v)
 	{
 		const VECS32 zero = vec_splat_s32(0);
 
@@ -650,7 +650,7 @@ public:
 		return result;
 	}
 
-	inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
+	void bilinear_filter_rgbaint(const UINT32 &rgb00, const UINT32 &rgb01, const UINT32 &rgb10, const UINT32 &rgb11, UINT8 u, UINT8 v)
 	{
 		const VECS32 zero = vec_splat_s32(0);
 
@@ -688,13 +688,13 @@ public:
 	}
 
 protected:
-	VECS32                          m_value;
+	VECS32              m_value;
 
-	static const VECU8              alpha_perm;
-	static const VECU8              red_perm;
-	static const VECU8              green_perm;
-	static const VECU8              blue_perm;
-	static const VECS16             scale_table[256];
+	static const VECU8  alpha_perm;
+	static const VECU8  red_perm;
+	static const VECU8  green_perm;
+	static const VECU8  blue_perm;
+	static const VECS16 scale_table[256];
 };
 
 
diff --git a/src/mame/video/rdptpipe.cpp b/src/mame/video/rdptpipe.cpp
index b8371c0c0d7..31d63bb532e 100644
--- a/src/mame/video/rdptpipe.cpp
+++ b/src/mame/video/rdptpipe.cpp
@@ -342,7 +342,7 @@ void n64_texture_pipe_t::cycle_linear_lerp(color_t* TEX, color_t* prev, INT32 SS
 		TEX->add(t0);
 		TEX->add(t2);
 		TEX->add(t3);
-		TEX->sra(2);
+		TEX->sra_imm(2);
 	}
 }