Merge branch 'master' of github.com:mamedev/mame

This commit is contained in:
MetalliC 2017-10-09 18:00:56 +03:00
commit 72e83e77fd
7 changed files with 114 additions and 56 deletions

View File

@ -1361,6 +1361,7 @@ static inline void ATTR_FORCE_INLINE applyFogging(voodoo_device *vd, uint32_t fb
/* if fog_mult is zero, we subtract the incoming color */
if (!FOGMODE_FOG_MULT(fogModeReg))
{
// Need to check this, manual states 9 bits
fogColorLocal.sub(color);
//fog.rgb -= color.rgb;
//fr -= (RR);
@ -1423,9 +1424,11 @@ static inline void ATTR_FORCE_INLINE applyFogging(voodoo_device *vd, uint32_t fb
//fg = (fg * fogblend) >> 8;
//fb = (fb * fogblend) >> 8;
/* if fog_mult is 0, we add this to the original color */
fogColorLocal.scale_imm_and_clamp((int16_t)fogblend);
if (FOGMODE_FOG_MULT(fogModeReg) == 0)
{
fogColorLocal.scale_imm_add_and_clamp(fogblend, color);
fogColorLocal.add(color);
fogColorLocal.clamp_to_uint8();
//color += fog;
//(RR) += fr;
//(GG) += fg;
@ -1435,7 +1438,6 @@ static inline void ATTR_FORCE_INLINE applyFogging(voodoo_device *vd, uint32_t fb
/* otherwise this just becomes the new color */
else
{
fogColorLocal.scale_imm_and_clamp(fogblend);
//color = fog;
//(RR) = fr;
//(GG) = fg;

View File

@ -770,6 +770,38 @@ void validity_checker::validate_rgb()
rgb.mul_imm_rgba(actual_a, actual_r, actual_g, actual_b);
check_expected("rgbaint_t::mul_imm_rgba");
// test select alpha element multiplication
expected_a *= actual_a = random_i32();
expected_r *= actual_a;
expected_g *= actual_a;
expected_b *= actual_a;
rgb.mul(rgbaint_t(actual_a, actual_r, actual_g, actual_b).select_alpha32());
check_expected("rgbaint_t::mul(select_alpha32)");
// test select red element multiplication
expected_a *= actual_r = random_i32();
expected_r *= actual_r;
expected_g *= actual_r;
expected_b *= actual_r;
rgb.mul(rgbaint_t(actual_a, actual_r, actual_g, actual_b).select_red32());
check_expected("rgbaint_t::mul(select_red32)");
// test select green element multiplication
expected_a *= actual_g = random_i32();
expected_r *= actual_g;
expected_g *= actual_g;
expected_b *= actual_g;
rgb.mul(rgbaint_t(actual_a, actual_r, actual_g, actual_b).select_green32());
check_expected("rgbaint_t::mul(select_green32)");
// test select blue element multiplication
expected_a *= actual_b = random_i32();
expected_r *= actual_b;
expected_g *= actual_b;
expected_b *= actual_b;
rgb.mul(rgbaint_t(actual_a, actual_r, actual_g, actual_b).select_blue32());
check_expected("rgbaint_t::mul(select_blue32)");
// test RGB and not
expected_a &= ~(actual_a = random_i32());
expected_r &= ~(actual_r = random_i32());

View File

@ -77,26 +77,6 @@ void rgbaint_t::scale_and_clamp(const rgbaint_t& scale)
}
void rgbaint_t::scale_imm_add_and_clamp(s32 scale, const rgbaint_t& other)
{
m_a = (m_a * scale) >> 8;
m_r = (m_r * scale) >> 8;
m_g = (m_g * scale) >> 8;
m_b = (m_b * scale) >> 8;
m_a |= (m_a & 0x00800000) ? 0xff000000 : 0;
m_r |= (m_r & 0x00800000) ? 0xff000000 : 0;
m_g |= (m_g & 0x00800000) ? 0xff000000 : 0;
m_b |= (m_b & 0x00800000) ? 0xff000000 : 0;
m_a += other.m_a;
m_r += other.m_r;
m_g += other.m_g;
m_b += other.m_b;
if (u32(m_a) > 255) { m_a = (m_a < 0) ? 0 : 255; }
if (u32(m_r) > 255) { m_r = (m_r < 0) ? 0 : 255; }
if (u32(m_g) > 255) { m_g = (m_g < 0) ? 0 : 255; }
if (u32(m_b) > 255) { m_b = (m_b < 0) ? 0 : 255; }
}
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
{
m_a = (m_a * scale.m_a) >> 8;

View File

@ -64,6 +64,12 @@ public:
s32 get_g32() const { return m_g; }
s32 get_b32() const { return m_b; }
// These selects return an rgbaint_t with all fields set to the element choosen (a, r, g, or b)
rgbaint_t select_alpha32() const { return rgbaint_t(get_a32(), get_a32(), get_a32(), get_a32()); }
rgbaint_t select_red32() const { return rgbaint_t(get_r32(), get_r32(), get_r32(), get_r32()); }
rgbaint_t select_green32() const { return rgbaint_t(get_g32(), get_g32(), get_g32(), get_g32()); }
rgbaint_t select_blue32() const { return rgbaint_t(get_b32(), get_b32(), get_b32(), get_b32()); }
inline void add(const rgbaint_t& color)
{
add_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
@ -304,7 +310,6 @@ public:
void scale_imm_and_clamp(const s32 scale);
void scale2_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2);
void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other);
void scale_imm_add_and_clamp(const s32 scale, const rgbaint_t& other);
void cmpeq(const rgbaint_t& value) { cmpeq_imm_rgba(value.m_a, value.m_r, value.m_g, value.m_b); }
void cmpgt(const rgbaint_t& value) { cmpgt_imm_rgba(value.m_a, value.m_r, value.m_g, value.m_b); }

View File

@ -67,7 +67,7 @@ public:
u8 get_a() const { return u8(unsigned(_mm_extract_epi16(m_value, 6))); }
u8 get_r() const { return u8(unsigned(_mm_extract_epi16(m_value, 4))); }
u8 get_g() const { return u8(unsigned(_mm_extract_epi16(m_value, 2))); }
u8 get_b() const { return u8(unsigned(_mm_extract_epi16(m_value, 0))); }
u8 get_b() const { return u8(unsigned(_mm_cvtsi128_si32(m_value))); }
#ifdef __SSE4_1__
s32 get_a32() const { return _mm_extract_epi32(m_value, 3); }
@ -75,12 +75,18 @@ public:
s32 get_g32() const { return _mm_extract_epi32(m_value, 1); }
s32 get_b32() const { return _mm_extract_epi32(m_value, 0); }
#else
s32 get_a32() const { return (_mm_extract_epi16(m_value, 7) << 16) | _mm_extract_epi16(m_value, 6); }
s32 get_r32() const { return (_mm_extract_epi16(m_value, 5) << 16) | _mm_extract_epi16(m_value, 4); }
s32 get_g32() const { return (_mm_extract_epi16(m_value, 3) << 16) | _mm_extract_epi16(m_value, 2); }
s32 get_b32() const { return (_mm_extract_epi16(m_value, 1) << 16) | _mm_extract_epi16(m_value, 0); }
s32 get_a32() const { return (_mm_cvtsi128_si32(_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 3)))); }
s32 get_r32() const { return (_mm_cvtsi128_si32(_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 2)))); }
s32 get_g32() const { return (_mm_cvtsi128_si32(_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 1)))); }
s32 get_b32() const { return (_mm_cvtsi128_si32(m_value)); }
#endif
// These selects return an rgbaint_t with all fields set to the element choosen (a, r, g, or b)
rgbaint_t select_alpha32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(3, 3, 3, 3)); }
rgbaint_t select_red32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(2, 2, 2, 2)); }
rgbaint_t select_green32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(1, 1, 1, 1)); }
rgbaint_t select_blue32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 0)); }
inline void add(const rgbaint_t& color2)
{
m_value = _mm_add_epi32(m_value, color2.m_value);
@ -283,37 +289,71 @@ public:
void scale_and_clamp(const rgbaint_t& scale);
inline void scale_imm_and_clamp(const s32 scale)
// Leave this here in case Model3 blows up...
//inline void scale_imm_and_clamp(const s32 scale)
//{
// mul_imm(scale);
// sra_imm(8);
// clamp_to_uint8();
//}
// This version needs values to be 12 bits or less
inline void scale_imm_and_clamp(const s16 scale)
{
mul_imm(scale);
sra_imm(8);
clamp_to_uint8();
}
inline void scale_imm_add_and_clamp(const s32 scale, const rgbaint_t& other)
{
mul_imm(scale);
sra_imm(8);
add(other);
clamp_to_uint8();
// Set mult a 16 bit inputs to scale
__m128i immv = _mm_set1_epi16(scale);
// Shift up by 4
immv = _mm_slli_epi16(immv, 4);
// Pack color into mult b 16 bit inputs
m_value = _mm_packs_epi32(m_value, _mm_setzero_si128());
// Shift up by 4
m_value = _mm_slli_epi16(m_value, 4);
// Do the 16 bit multiply, bottom 64 bits will contain 16 bit truncated results
m_value = _mm_mulhi_epi16(m_value, immv);
// Clamp to u8
m_value = _mm_packus_epi16(m_value, _mm_setzero_si128());
// Unpack up to s32
m_value = _mm_unpacklo_epi8(m_value, _mm_setzero_si128());
m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128());
}
// This function needs values to be 12 bits or less
inline void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
{
mul(scale);
sra_imm(8);
// Pack scale into mult a 16 bits
__m128i tmp1 = _mm_packs_epi32(scale.m_value, _mm_setzero_si128());
// Shift up by 4
tmp1 = _mm_slli_epi16(tmp1, 4);
// Pack color into mult b 16 bit inputs
m_value = _mm_packs_epi32(m_value, _mm_setzero_si128());
// Shift up by 4
m_value = _mm_slli_epi16(m_value, 4);
// Do the 16 bit multiply, bottom 64 bits will contain 16 bit truncated results
m_value = _mm_mulhi_epi16(m_value, tmp1);
// Unpack into 32 bit values
m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128());
add(other);
clamp_to_uint8();
}
// This function needs values to be 12 bits or less
inline void scale2_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2)
{
rgbaint_t color2(other);
color2.mul(scale2);
mul(scale);
add(color2);
sra_imm(8);
// Pack both scale values into mult a 16 bits
__m128i tmp1 = _mm_packs_epi32(scale.m_value, scale2.m_value);
// Shift up by 4
tmp1 = _mm_slli_epi16(tmp1, 4);
// Pack both color values into mult b 16 bit inputs
m_value = _mm_packs_epi32(m_value, other.m_value);
// Shift up by 4
m_value = _mm_slli_epi16(m_value, 4);
// Do the 16 bit multiply, top and bottom 64 bits will contain 16 bit truncated results
tmp1 = _mm_mulhi_epi16(m_value, tmp1);
// Unpack the results
m_value = _mm_unpacklo_epi16(tmp1, _mm_setzero_si128());
tmp1 = _mm_unpackhi_epi16(tmp1, _mm_setzero_si128());
// Add the results
m_value = _mm_add_epi32(m_value, tmp1);
clamp_to_uint8();
}

View File

@ -14,6 +14,7 @@
// use SSE on 64-bit implementations, where it can be assumed
#if (!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64)
#include "rgbsse.h"
#elif defined(__ALTIVEC__)
#include "rgbvmx.h"

View File

@ -205,6 +205,12 @@ public:
return result;
}
// These selects return an rgbaint_t with all fields set to the element choosen (a, r, g, or b)
rgbaint_t select_alpha32() const { return rgbaint_t(get_a32(), get_a32(), get_a32(), get_a32()); }
rgbaint_t select_red32() const { return rgbaint_t(get_r32(), get_r32(), get_r32(), get_r32()); }
rgbaint_t select_green32() const { return rgbaint_t(get_g32(), get_g32(), get_g32(), get_g32()); }
rgbaint_t select_blue32() const { return rgbaint_t(get_b32(), get_b32(), get_b32(), get_b32()); }
inline void add(const rgbaint_t& color2)
{
m_value = vec_add(m_value, color2.m_value);
@ -460,14 +466,6 @@ public:
void scale_and_clamp(const rgbaint_t& scale);
void scale_imm_and_clamp(const s32 scale);
void scale_imm_add_and_clamp(const s32 scale, const rgbaint_t& other)
{
mul_imm(scale);
sra_imm(8);
add(other);
clamp_to_uint8();
}
void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
{
mul(scale);