mirror of
https://github.com/holub/mame
synced 2025-04-24 01:11:11 +03:00
Merge branch 'master' of github.com:mamedev/mame
This commit is contained in:
commit
72e83e77fd
@ -1361,6 +1361,7 @@ static inline void ATTR_FORCE_INLINE applyFogging(voodoo_device *vd, uint32_t fb
|
||||
/* if fog_mult is zero, we subtract the incoming color */
|
||||
if (!FOGMODE_FOG_MULT(fogModeReg))
|
||||
{
|
||||
// Need to check this, manual states 9 bits
|
||||
fogColorLocal.sub(color);
|
||||
//fog.rgb -= color.rgb;
|
||||
//fr -= (RR);
|
||||
@ -1423,9 +1424,11 @@ static inline void ATTR_FORCE_INLINE applyFogging(voodoo_device *vd, uint32_t fb
|
||||
//fg = (fg * fogblend) >> 8;
|
||||
//fb = (fb * fogblend) >> 8;
|
||||
/* if fog_mult is 0, we add this to the original color */
|
||||
fogColorLocal.scale_imm_and_clamp((int16_t)fogblend);
|
||||
if (FOGMODE_FOG_MULT(fogModeReg) == 0)
|
||||
{
|
||||
fogColorLocal.scale_imm_add_and_clamp(fogblend, color);
|
||||
fogColorLocal.add(color);
|
||||
fogColorLocal.clamp_to_uint8();
|
||||
//color += fog;
|
||||
//(RR) += fr;
|
||||
//(GG) += fg;
|
||||
@ -1435,7 +1438,6 @@ static inline void ATTR_FORCE_INLINE applyFogging(voodoo_device *vd, uint32_t fb
|
||||
/* otherwise this just becomes the new color */
|
||||
else
|
||||
{
|
||||
fogColorLocal.scale_imm_and_clamp(fogblend);
|
||||
//color = fog;
|
||||
//(RR) = fr;
|
||||
//(GG) = fg;
|
||||
|
@ -770,6 +770,38 @@ void validity_checker::validate_rgb()
|
||||
rgb.mul_imm_rgba(actual_a, actual_r, actual_g, actual_b);
|
||||
check_expected("rgbaint_t::mul_imm_rgba");
|
||||
|
||||
// test select alpha element multiplication
|
||||
expected_a *= actual_a = random_i32();
|
||||
expected_r *= actual_a;
|
||||
expected_g *= actual_a;
|
||||
expected_b *= actual_a;
|
||||
rgb.mul(rgbaint_t(actual_a, actual_r, actual_g, actual_b).select_alpha32());
|
||||
check_expected("rgbaint_t::mul(select_alpha32)");
|
||||
|
||||
// test select red element multiplication
|
||||
expected_a *= actual_r = random_i32();
|
||||
expected_r *= actual_r;
|
||||
expected_g *= actual_r;
|
||||
expected_b *= actual_r;
|
||||
rgb.mul(rgbaint_t(actual_a, actual_r, actual_g, actual_b).select_red32());
|
||||
check_expected("rgbaint_t::mul(select_red32)");
|
||||
|
||||
// test select green element multiplication
|
||||
expected_a *= actual_g = random_i32();
|
||||
expected_r *= actual_g;
|
||||
expected_g *= actual_g;
|
||||
expected_b *= actual_g;
|
||||
rgb.mul(rgbaint_t(actual_a, actual_r, actual_g, actual_b).select_green32());
|
||||
check_expected("rgbaint_t::mul(select_green32)");
|
||||
|
||||
// test select blue element multiplication
|
||||
expected_a *= actual_b = random_i32();
|
||||
expected_r *= actual_b;
|
||||
expected_g *= actual_b;
|
||||
expected_b *= actual_b;
|
||||
rgb.mul(rgbaint_t(actual_a, actual_r, actual_g, actual_b).select_blue32());
|
||||
check_expected("rgbaint_t::mul(select_blue32)");
|
||||
|
||||
// test RGB and not
|
||||
expected_a &= ~(actual_a = random_i32());
|
||||
expected_r &= ~(actual_r = random_i32());
|
||||
|
@ -77,26 +77,6 @@ void rgbaint_t::scale_and_clamp(const rgbaint_t& scale)
|
||||
}
|
||||
|
||||
|
||||
void rgbaint_t::scale_imm_add_and_clamp(s32 scale, const rgbaint_t& other)
|
||||
{
|
||||
m_a = (m_a * scale) >> 8;
|
||||
m_r = (m_r * scale) >> 8;
|
||||
m_g = (m_g * scale) >> 8;
|
||||
m_b = (m_b * scale) >> 8;
|
||||
m_a |= (m_a & 0x00800000) ? 0xff000000 : 0;
|
||||
m_r |= (m_r & 0x00800000) ? 0xff000000 : 0;
|
||||
m_g |= (m_g & 0x00800000) ? 0xff000000 : 0;
|
||||
m_b |= (m_b & 0x00800000) ? 0xff000000 : 0;
|
||||
m_a += other.m_a;
|
||||
m_r += other.m_r;
|
||||
m_g += other.m_g;
|
||||
m_b += other.m_b;
|
||||
if (u32(m_a) > 255) { m_a = (m_a < 0) ? 0 : 255; }
|
||||
if (u32(m_r) > 255) { m_r = (m_r < 0) ? 0 : 255; }
|
||||
if (u32(m_g) > 255) { m_g = (m_g < 0) ? 0 : 255; }
|
||||
if (u32(m_b) > 255) { m_b = (m_b < 0) ? 0 : 255; }
|
||||
}
|
||||
|
||||
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
|
||||
{
|
||||
m_a = (m_a * scale.m_a) >> 8;
|
||||
|
@ -64,6 +64,12 @@ public:
|
||||
s32 get_g32() const { return m_g; }
|
||||
s32 get_b32() const { return m_b; }
|
||||
|
||||
// These selects return an rgbaint_t with all fields set to the element choosen (a, r, g, or b)
|
||||
rgbaint_t select_alpha32() const { return rgbaint_t(get_a32(), get_a32(), get_a32(), get_a32()); }
|
||||
rgbaint_t select_red32() const { return rgbaint_t(get_r32(), get_r32(), get_r32(), get_r32()); }
|
||||
rgbaint_t select_green32() const { return rgbaint_t(get_g32(), get_g32(), get_g32(), get_g32()); }
|
||||
rgbaint_t select_blue32() const { return rgbaint_t(get_b32(), get_b32(), get_b32(), get_b32()); }
|
||||
|
||||
inline void add(const rgbaint_t& color)
|
||||
{
|
||||
add_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
|
||||
@ -304,7 +310,6 @@ public:
|
||||
void scale_imm_and_clamp(const s32 scale);
|
||||
void scale2_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2);
|
||||
void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other);
|
||||
void scale_imm_add_and_clamp(const s32 scale, const rgbaint_t& other);
|
||||
|
||||
void cmpeq(const rgbaint_t& value) { cmpeq_imm_rgba(value.m_a, value.m_r, value.m_g, value.m_b); }
|
||||
void cmpgt(const rgbaint_t& value) { cmpgt_imm_rgba(value.m_a, value.m_r, value.m_g, value.m_b); }
|
||||
|
@ -67,7 +67,7 @@ public:
|
||||
u8 get_a() const { return u8(unsigned(_mm_extract_epi16(m_value, 6))); }
|
||||
u8 get_r() const { return u8(unsigned(_mm_extract_epi16(m_value, 4))); }
|
||||
u8 get_g() const { return u8(unsigned(_mm_extract_epi16(m_value, 2))); }
|
||||
u8 get_b() const { return u8(unsigned(_mm_extract_epi16(m_value, 0))); }
|
||||
u8 get_b() const { return u8(unsigned(_mm_cvtsi128_si32(m_value))); }
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
s32 get_a32() const { return _mm_extract_epi32(m_value, 3); }
|
||||
@ -75,12 +75,18 @@ public:
|
||||
s32 get_g32() const { return _mm_extract_epi32(m_value, 1); }
|
||||
s32 get_b32() const { return _mm_extract_epi32(m_value, 0); }
|
||||
#else
|
||||
s32 get_a32() const { return (_mm_extract_epi16(m_value, 7) << 16) | _mm_extract_epi16(m_value, 6); }
|
||||
s32 get_r32() const { return (_mm_extract_epi16(m_value, 5) << 16) | _mm_extract_epi16(m_value, 4); }
|
||||
s32 get_g32() const { return (_mm_extract_epi16(m_value, 3) << 16) | _mm_extract_epi16(m_value, 2); }
|
||||
s32 get_b32() const { return (_mm_extract_epi16(m_value, 1) << 16) | _mm_extract_epi16(m_value, 0); }
|
||||
s32 get_a32() const { return (_mm_cvtsi128_si32(_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 3)))); }
|
||||
s32 get_r32() const { return (_mm_cvtsi128_si32(_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 2)))); }
|
||||
s32 get_g32() const { return (_mm_cvtsi128_si32(_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 1)))); }
|
||||
s32 get_b32() const { return (_mm_cvtsi128_si32(m_value)); }
|
||||
#endif
|
||||
|
||||
// These selects return an rgbaint_t with all fields set to the element choosen (a, r, g, or b)
|
||||
rgbaint_t select_alpha32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(3, 3, 3, 3)); }
|
||||
rgbaint_t select_red32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(2, 2, 2, 2)); }
|
||||
rgbaint_t select_green32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(1, 1, 1, 1)); }
|
||||
rgbaint_t select_blue32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 0)); }
|
||||
|
||||
inline void add(const rgbaint_t& color2)
|
||||
{
|
||||
m_value = _mm_add_epi32(m_value, color2.m_value);
|
||||
@ -283,37 +289,71 @@ public:
|
||||
|
||||
void scale_and_clamp(const rgbaint_t& scale);
|
||||
|
||||
inline void scale_imm_and_clamp(const s32 scale)
|
||||
// Leave this here in case Model3 blows up...
|
||||
//inline void scale_imm_and_clamp(const s32 scale)
|
||||
//{
|
||||
// mul_imm(scale);
|
||||
// sra_imm(8);
|
||||
// clamp_to_uint8();
|
||||
//}
|
||||
|
||||
// This version needs values to be 12 bits or less
|
||||
inline void scale_imm_and_clamp(const s16 scale)
|
||||
{
|
||||
mul_imm(scale);
|
||||
sra_imm(8);
|
||||
clamp_to_uint8();
|
||||
}
|
||||
|
||||
inline void scale_imm_add_and_clamp(const s32 scale, const rgbaint_t& other)
|
||||
{
|
||||
mul_imm(scale);
|
||||
sra_imm(8);
|
||||
add(other);
|
||||
clamp_to_uint8();
|
||||
// Set mult a 16 bit inputs to scale
|
||||
__m128i immv = _mm_set1_epi16(scale);
|
||||
// Shift up by 4
|
||||
immv = _mm_slli_epi16(immv, 4);
|
||||
// Pack color into mult b 16 bit inputs
|
||||
m_value = _mm_packs_epi32(m_value, _mm_setzero_si128());
|
||||
// Shift up by 4
|
||||
m_value = _mm_slli_epi16(m_value, 4);
|
||||
// Do the 16 bit multiply, bottom 64 bits will contain 16 bit truncated results
|
||||
m_value = _mm_mulhi_epi16(m_value, immv);
|
||||
// Clamp to u8
|
||||
m_value = _mm_packus_epi16(m_value, _mm_setzero_si128());
|
||||
// Unpack up to s32
|
||||
m_value = _mm_unpacklo_epi8(m_value, _mm_setzero_si128());
|
||||
m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128());
|
||||
}
|
||||
|
||||
// This function needs values to be 12 bits or less
|
||||
inline void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
|
||||
{
|
||||
mul(scale);
|
||||
sra_imm(8);
|
||||
// Pack scale into mult a 16 bits
|
||||
__m128i tmp1 = _mm_packs_epi32(scale.m_value, _mm_setzero_si128());
|
||||
// Shift up by 4
|
||||
tmp1 = _mm_slli_epi16(tmp1, 4);
|
||||
// Pack color into mult b 16 bit inputs
|
||||
m_value = _mm_packs_epi32(m_value, _mm_setzero_si128());
|
||||
// Shift up by 4
|
||||
m_value = _mm_slli_epi16(m_value, 4);
|
||||
// Do the 16 bit multiply, bottom 64 bits will contain 16 bit truncated results
|
||||
m_value = _mm_mulhi_epi16(m_value, tmp1);
|
||||
// Unpack into 32 bit values
|
||||
m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128());
|
||||
add(other);
|
||||
clamp_to_uint8();
|
||||
}
|
||||
|
||||
// This function needs values to be 12 bits or less
|
||||
inline void scale2_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2)
|
||||
{
|
||||
rgbaint_t color2(other);
|
||||
color2.mul(scale2);
|
||||
|
||||
mul(scale);
|
||||
add(color2);
|
||||
sra_imm(8);
|
||||
// Pack both scale values into mult a 16 bits
|
||||
__m128i tmp1 = _mm_packs_epi32(scale.m_value, scale2.m_value);
|
||||
// Shift up by 4
|
||||
tmp1 = _mm_slli_epi16(tmp1, 4);
|
||||
// Pack both color values into mult b 16 bit inputs
|
||||
m_value = _mm_packs_epi32(m_value, other.m_value);
|
||||
// Shift up by 4
|
||||
m_value = _mm_slli_epi16(m_value, 4);
|
||||
// Do the 16 bit multiply, top and bottom 64 bits will contain 16 bit truncated results
|
||||
tmp1 = _mm_mulhi_epi16(m_value, tmp1);
|
||||
// Unpack the results
|
||||
m_value = _mm_unpacklo_epi16(tmp1, _mm_setzero_si128());
|
||||
tmp1 = _mm_unpackhi_epi16(tmp1, _mm_setzero_si128());
|
||||
// Add the results
|
||||
m_value = _mm_add_epi32(m_value, tmp1);
|
||||
clamp_to_uint8();
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
// use SSE on 64-bit implementations, where it can be assumed
|
||||
#if (!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64)
|
||||
|
||||
#include "rgbsse.h"
|
||||
#elif defined(__ALTIVEC__)
|
||||
#include "rgbvmx.h"
|
||||
|
@ -205,6 +205,12 @@ public:
|
||||
return result;
|
||||
}
|
||||
|
||||
// These selects return an rgbaint_t with all fields set to the element choosen (a, r, g, or b)
|
||||
rgbaint_t select_alpha32() const { return rgbaint_t(get_a32(), get_a32(), get_a32(), get_a32()); }
|
||||
rgbaint_t select_red32() const { return rgbaint_t(get_r32(), get_r32(), get_r32(), get_r32()); }
|
||||
rgbaint_t select_green32() const { return rgbaint_t(get_g32(), get_g32(), get_g32(), get_g32()); }
|
||||
rgbaint_t select_blue32() const { return rgbaint_t(get_b32(), get_b32(), get_b32(), get_b32()); }
|
||||
|
||||
inline void add(const rgbaint_t& color2)
|
||||
{
|
||||
m_value = vec_add(m_value, color2.m_value);
|
||||
@ -460,14 +466,6 @@ public:
|
||||
void scale_and_clamp(const rgbaint_t& scale);
|
||||
void scale_imm_and_clamp(const s32 scale);
|
||||
|
||||
void scale_imm_add_and_clamp(const s32 scale, const rgbaint_t& other)
|
||||
{
|
||||
mul_imm(scale);
|
||||
sra_imm(8);
|
||||
add(other);
|
||||
clamp_to_uint8();
|
||||
}
|
||||
|
||||
void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
|
||||
{
|
||||
mul(scale);
|
||||
|
Loading…
Reference in New Issue
Block a user