Fixed MT-05968

This commit is contained in:
therealmogminer@gmail.com 2015-06-26 13:55:04 +02:00
parent 06923f50b8
commit 1e7eb19809
5 changed files with 146 additions and 116 deletions

View File

@ -8,7 +8,7 @@
***************************************************************************/
#if !defined(__ALTIVEC__)
#if !(defined(__ALTIVEC__) || ((!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64)))
#include "emu.h"
#include "rgbgen.h"
@ -31,6 +31,10 @@ void rgbaint_t::blend(const rgbaint_t& color2, UINT8 color1scale)
m_r = (m_r * scale1 + color2.m_r * scale2) >> 8;
m_g = (m_g * scale1 + color2.m_g * scale2) >> 8;
m_b = (m_b * scale1 + color2.m_b * scale2) >> 8;
m_a |= (m_a & 0x00800000) ? 0xff000000 : 0;
m_r |= (m_r & 0x00800000) ? 0xff000000 : 0;
m_g |= (m_g & 0x00800000) ? 0xff000000 : 0;
m_b |= (m_b & 0x00800000) ? 0xff000000 : 0;
}
@ -43,70 +47,90 @@ void rgbaint_t::blend(const rgbaint_t& color2, UINT8 color1scale)
void rgbaint_t::scale_imm_and_clamp(INT32 scale)
{
m_a = (m_a * scale) >> 8;
if (m_a > 255) { m_a = (m_a < 0) ? 0 : 255; }
m_r = (m_r * scale) >> 8;
if (m_r > 255) { m_r = (m_r < 0) ? 0 : 255; }
m_g = (m_g * scale) >> 8;
if (m_g > 255) { m_g = (m_g < 0) ? 0 : 255; }
m_b = (m_b * scale) >> 8;
if (m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
m_a |= (m_a & 0x00800000) ? 0xff000000 : 0;
m_r |= (m_r & 0x00800000) ? 0xff000000 : 0;
m_g |= (m_g & 0x00800000) ? 0xff000000 : 0;
m_b |= (m_b & 0x00800000) ? 0xff000000 : 0;
if ((UINT32)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; }
if ((UINT32)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; }
if ((UINT32)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; }
if ((UINT32)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
}
void rgbaint_t::scale_and_clamp(const rgbaint_t& scale)
{
m_a = (m_a * scale.m_a) >> 8;
if (m_a > 255) { m_a = (m_a < 0) ? 0 : 255; }
m_r = (m_r * scale.m_r) >> 8;
if (m_r > 255) { m_r = (m_r < 0) ? 0 : 255; }
m_g = (m_g * scale.m_g) >> 8;
if (m_g > 255) { m_g = (m_g < 0) ? 0 : 255; }
m_b = (m_b * scale.m_b) >> 8;
if (m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
m_a |= (m_a & 0x00800000) ? 0xff000000 : 0;
m_r |= (m_r & 0x00800000) ? 0xff000000 : 0;
m_g |= (m_g & 0x00800000) ? 0xff000000 : 0;
m_b |= (m_b & 0x00800000) ? 0xff000000 : 0;
if ((UINT32)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; }
if ((UINT32)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; }
if ((UINT32)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; }
if ((UINT32)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
}
void rgbaint_t::scale_imm_add_and_clamp(INT32 scale, const rgbaint_t& other)
{
m_a = (m_a * scale) >> 8;
m_a += other.m_a;
if (m_a > 255) { m_a = (m_a < 0) ? 0 : 255; }
m_r = (m_r * scale) >> 8;
m_r += other.m_r;
if (m_r > 255) { m_r = (m_r < 0) ? 0 : 255; }
m_g = (m_g * scale) >> 8;
m_g += other.m_g;
if (m_g > 255) { m_g = (m_g < 0) ? 0 : 255; }
m_b = (m_b * scale) >> 8;
m_a |= (m_a & 0x00800000) ? 0xff000000 : 0;
m_r |= (m_r & 0x00800000) ? 0xff000000 : 0;
m_g |= (m_g & 0x00800000) ? 0xff000000 : 0;
m_b |= (m_b & 0x00800000) ? 0xff000000 : 0;
m_a += other.m_a;
m_r += other.m_r;
m_g += other.m_g;
m_b += other.m_b;
if (m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
if ((UINT32)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; }
if ((UINT32)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; }
if ((UINT32)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; }
if ((UINT32)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
}
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
{
m_a = (m_a * scale.m_a) >> 8;
m_a += other.m_a;
if (m_a > 255) { m_a = (m_a < 0) ? 0 : 255; }
m_r = (m_r * scale.m_r) >> 8;
m_r += other.m_r;
if (m_r > 255) { m_r = (m_r < 0) ? 0 : 255; }
m_g = (m_g * scale.m_g) >> 8;
m_g += other.m_g;
if (m_g > 255) { m_g = (m_g < 0) ? 0 : 255; }
m_b = (m_b * scale.m_b) >> 8;
m_a |= (m_a & 0x00800000) ? 0xff000000 : 0;
m_r |= (m_r & 0x00800000) ? 0xff000000 : 0;
m_g |= (m_g & 0x00800000) ? 0xff000000 : 0;
m_b |= (m_b & 0x00800000) ? 0xff000000 : 0;
m_a += other.m_a;
m_r += other.m_r;
m_g += other.m_g;
m_b += other.m_b;
if (m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
if ((UINT32)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; }
if ((UINT32)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; }
if ((UINT32)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; }
if ((UINT32)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
}
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2)
{
m_a = (m_a * scale.m_a + other.m_a * scale2.m_a) >> 8;
if ((UINT16)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; }
m_r = (m_r * scale.m_r + other.m_r * scale2.m_r) >> 8;
if ((UINT16)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; }
m_g = (m_g * scale.m_g + other.m_g * scale2.m_g) >> 8;
if ((UINT16)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; }
m_b = (m_b * scale.m_b + other.m_b * scale2.m_b) >> 8;
if ((UINT16)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
m_a |= (m_a & 0x00800000) ? 0xff000000 : 0;
m_r |= (m_r & 0x00800000) ? 0xff000000 : 0;
m_g |= (m_g & 0x00800000) ? 0xff000000 : 0;
m_b |= (m_b & 0x00800000) ? 0xff000000 : 0;
if ((UINT32)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; }
if ((UINT32)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; }
if ((UINT32)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; }
if ((UINT32)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
}
#endif // !defined(__ALTIVEC__)

View File

@ -21,12 +21,12 @@ class rgbaint_t
public:
inline rgbaint_t() { }
inline rgbaint_t(UINT32 rgba) { set(rgba); }
inline rgbaint_t(UINT32 a, UINT32 r, UINT32 g, UINT32 b) { set(a, r, g, b); }
inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
inline rgbaint_t(rgb_t& rgba) { set(rgba); }
inline void set(rgbaint_t& other) { set(other.m_a, other.m_r, other.m_g, other.m_b); }
inline void set(UINT32 rgba) { set((rgba >> 24) & 0xff, (rgba >> 16) & 0xff, (rgba >> 8) & 0xff, rgba & 0xff); }
inline void set(UINT32 a, UINT32 r, UINT32 g, UINT32 b)
inline void set(INT32 a, INT32 r, INT32 g, INT32 b)
{
m_a = a;
m_r = r;
@ -54,12 +54,12 @@ public:
add_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
}
inline void add_imm(const UINT32 imm)
inline void add_imm(const INT32 imm)
{
add_imm_rgba(imm, imm, imm, imm);
}
inline void add_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void add_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_a += a;
m_r += r;
@ -72,12 +72,12 @@ public:
sub_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
}
inline void sub_imm(const UINT32 imm)
inline void sub_imm(const INT32 imm)
{
sub_imm_rgba(imm, imm, imm, imm);
}
inline void sub_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void sub_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_a -= a;
m_r -= r;
@ -90,12 +90,12 @@ public:
subr_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
}
inline void subr_imm(const UINT32 imm)
inline void subr_imm(const INT32 imm)
{
subr_imm_rgba(imm, imm, imm, imm);
}
inline void subr_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void subr_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_a = a - m_a;
m_r = r - m_r;
@ -103,22 +103,22 @@ public:
m_b = b - m_b;
}
inline void set_a(const UINT32 value)
inline void set_a(const INT32 value)
{
m_r = value;
}
inline void set_r(const UINT32 value)
inline void set_r(const INT32 value)
{
m_r = value;
}
inline void set_g(const UINT32 value)
inline void set_g(const INT32 value)
{
m_g = value;
}
inline void set_b(const UINT32 value)
inline void set_b(const INT32 value)
{
m_b = value;
}
@ -143,22 +143,22 @@ public:
return m_b;
}
inline UINT32 get_a32()
inline INT32 get_a32()
{
return m_a;
}
inline UINT32 get_r32()
inline INT32 get_r32()
{
return m_r;
}
inline UINT32 get_g32()
inline INT32 get_g32()
{
return m_g;
}
inline UINT32 get_b32()
inline INT32 get_b32()
{
return m_b;
}
@ -168,12 +168,12 @@ public:
mul_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
}
inline void mul_imm(const UINT32 imm)
inline void mul_imm(const INT32 imm)
{
mul_imm_rgba(imm, imm, imm, imm);
}
inline void mul_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void mul_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_a *= a;
m_r *= r;
@ -265,12 +265,12 @@ public:
or_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
}
inline void or_imm(const UINT32 imm)
inline void or_imm(const INT32 imm)
{
or_imm_rgba(imm, imm, imm, imm);
}
inline void or_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void or_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_a |= a;
m_r |= r;
@ -283,12 +283,12 @@ public:
and_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
}
inline void and_imm(const UINT32 imm)
inline void and_imm(const INT32 imm)
{
and_imm_rgba(imm, imm, imm, imm);
}
inline void and_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void and_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_a &= a;
m_r &= r;
@ -301,12 +301,12 @@ public:
xor_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
}
inline void xor_imm(const UINT32 imm)
inline void xor_imm(const INT32 imm)
{
xor_imm_rgba(imm, imm, imm, imm);
}
inline void xor_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void xor_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_a ^= a;
m_r ^= r;
@ -349,7 +349,7 @@ public:
m_b |= sign;
}
inline void min(const UINT32 value)
inline void min(const INT32 value)
{
m_a = (m_a > value) ? value : m_a;
m_r = (m_r > value) ? value : m_r;
@ -373,7 +373,7 @@ public:
m_b = (m_b == value.m_b) ? 0xffffffff : 0;
}
inline void cmpeq_imm(const UINT32 value)
inline void cmpeq_imm(const INT32 value)
{
m_a = (m_a == value) ? 0xffffffff : 0;
m_r = (m_r == value) ? 0xffffffff : 0;
@ -389,7 +389,7 @@ public:
m_b = (m_b > value.m_b) ? 0xffffffff : 0;
}
inline void cmpgt_imm(const UINT32 value)
inline void cmpgt_imm(const INT32 value)
{
m_a = (m_a > value) ? 0xffffffff : 0;
m_r = (m_r > value) ? 0xffffffff : 0;
@ -405,7 +405,7 @@ public:
m_b = (m_b < value.m_b) ? 0xffffffff : 0;
}
inline void cmplt_imm(const UINT32 value)
inline void cmplt_imm(const INT32 value)
{
m_a = (m_a < value) ? 0xffffffff : 0;
m_r = (m_r < value) ? 0xffffffff : 0;
@ -447,10 +447,10 @@ public:
}
protected:
UINT32 m_a;
UINT32 m_r;
UINT32 m_g;
UINT32 m_b;
INT32 m_a;
INT32 m_r;
INT32 m_g;
INT32 m_b;
};
#endif /* __RGBGEN__ */

View File

@ -10,7 +10,7 @@
***************************************************************************/
#if defined(__SSE2__) || defined(_MSC_VER)
#if (!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64)
#include "emu.h"
#include <emmintrin.h>
@ -30,17 +30,17 @@ void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor)
void rgbaint_t::scale_and_clamp(const rgbaint_t& scale)
{
mul(scale);
shr(8);
min(255);
sra(8);
max(0);
min(255);
}
void rgbaint_t::scale_imm_and_clamp(const INT32 scale)
{
mul_imm(scale);
shr(8);
min(255);
sra(8);
max(0);
min(255);
}
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2)
@ -50,27 +50,27 @@ void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& oth
mul(scale);
add(color2);
shr(8);
min(255);
sra(8);
max(0);
min(255);
}
void rgbaint_t::scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other)
{
mul_imm(scale);
add(other);
shr(8);
min(255);
sra(8);
max(0);
min(255);
}
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
{
mul(scale);
sra(8);
add(other);
shr(8);
min(255);
max(0);
min(255);
}
#endif // defined(__SSE2__) || defined(_MSC_VER)

View File

@ -39,12 +39,13 @@ class rgbaint_t
public:
inline rgbaint_t() { }
inline rgbaint_t(UINT32 rgba) { set(rgba); }
inline rgbaint_t(UINT32 a, UINT32 r, UINT32 g, UINT32 b) { set(a, r, g, b); }
inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); }
inline rgbaint_t(rgb_t& rgb) { set(rgb); }
inline rgbaint_t(__m128i rgba) { m_value = rgba; }
inline void set(rgbaint_t& other) { m_value = other.m_value; }
inline void set(UINT32 rgba) { m_value = _mm_and_si128(_mm_set1_epi32(0xff), _mm_set_epi32(rgba >> 24, rgba >> 16, rgba >> 8, rgba)); }
inline void set(UINT32 a, UINT32 r, UINT32 g, UINT32 b) { m_value = _mm_set_epi32(a, r, g, b); }
inline void set(INT32 a, INT32 r, INT32 g, INT32 b) { m_value = _mm_set_epi32(a, r, g, b); }
inline void set(rgb_t& rgb) { m_value = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(rgb), _mm_setzero_si128()), _mm_setzero_si128()); }
inline rgb_t to_rgba()
@ -52,14 +53,9 @@ public:
return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128()));
}
inline UINT32 to_argb8()
{
return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128()));
}
inline rgb_t to_rgba_clamp()
{
return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, m_value), _mm_setzero_si128()));
return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128()));
}
inline void add(const rgbaint_t& color2)
@ -67,12 +63,12 @@ public:
m_value = _mm_add_epi32(m_value, color2.m_value);
}
inline void add_imm(const UINT32 imm)
inline void add_imm(const INT32 imm)
{
m_value = _mm_add_epi32(m_value, _mm_set1_epi32(imm));
}
inline void add_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void add_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_value = _mm_add_epi32(m_value, _mm_set_epi32(a, r, g, b));
}
@ -82,12 +78,12 @@ public:
m_value = _mm_sub_epi32(m_value, color2.m_value);
}
inline void sub_imm(const UINT32 imm)
inline void sub_imm(const INT32 imm)
{
m_value = _mm_sub_epi32(m_value, _mm_set1_epi32(imm));
}
inline void sub_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void sub_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_value = _mm_sub_epi32(m_value, _mm_set_epi32(a, r, g, b));
}
@ -97,32 +93,32 @@ public:
m_value = _mm_sub_epi32(color2.m_value, m_value);
}
inline void subr_imm(const UINT32 imm)
inline void subr_imm(const INT32 imm)
{
m_value = _mm_sub_epi32(_mm_set1_epi32(imm), m_value);
}
inline void subr_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void subr_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_value = _mm_sub_epi32(_mm_set_epi32(a, r, g, b), m_value);
}
inline void set_a(const UINT32 value)
inline void set_a(const INT32 value)
{
m_value = _mm_or_si128(_mm_and_si128(m_value, *(__m128i *)&rgbsse_statics.alpha_mask), _mm_set_epi32(value, 0, 0, 0));
}
inline void set_r(const UINT32 value)
inline void set_r(const INT32 value)
{
m_value = _mm_or_si128(_mm_and_si128(m_value, *(__m128i *)&rgbsse_statics.red_mask), _mm_set_epi32(0, value, 0, 0));
}
inline void set_g(const UINT32 value)
inline void set_g(const INT32 value)
{
m_value = _mm_or_si128(_mm_and_si128(m_value, *(__m128i *)&rgbsse_statics.green_mask), _mm_set_epi32(0, 0, value, 0));
}
inline void set_b(const UINT32 value)
inline void set_b(const INT32 value)
{
m_value = _mm_or_si128(_mm_and_si128(m_value, *(__m128i *)&rgbsse_statics.blue_mask), _mm_set_epi32(0, 0, 0, value));
}
@ -147,22 +143,22 @@ public:
return _mm_extract_epi16(m_value, 0);
}
inline UINT32 get_a32()
inline INT32 get_a32()
{
return (_mm_extract_epi16(m_value, 7) << 16) | _mm_extract_epi16(m_value, 6);
}
inline UINT32 get_r32()
inline INT32 get_r32()
{
return (_mm_extract_epi16(m_value, 5) << 16) | _mm_extract_epi16(m_value, 4);
}
inline UINT32 get_g32()
inline INT32 get_g32()
{
return (_mm_extract_epi16(m_value, 3) << 16) | _mm_extract_epi16(m_value, 2);
}
inline UINT32 get_b32()
inline INT32 get_b32()
{
return (_mm_extract_epi16(m_value, 1) << 16) | _mm_extract_epi16(m_value, 0);
}
@ -174,7 +170,7 @@ public:
m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0)));
}
inline void mul_imm(const UINT32 imm)
inline void mul_imm(const INT32 imm)
{
__m128i immv = _mm_set1_epi32(imm);
__m128i tmp1 = _mm_mul_epu32(m_value, immv);
@ -182,7 +178,7 @@ public:
m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0)));
}
inline void mul_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void mul_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
__m128i immv = _mm_set_epi32(a, r, g, b);
__m128i tmp1 = _mm_mul_epu32(m_value, immv);
@ -225,12 +221,12 @@ public:
m_value = _mm_or_si128(m_value, color2.m_value);
}
inline void or_imm(const UINT32 value)
inline void or_imm(const INT32 value)
{
m_value = _mm_or_si128(m_value, _mm_set1_epi32(value));
}
inline void or_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void or_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_value = _mm_or_si128(m_value, _mm_set_epi32(a, r, g, b));
}
@ -240,12 +236,12 @@ public:
m_value = _mm_and_si128(m_value, color.m_value);
}
inline void and_imm(const UINT32 value)
inline void and_imm(const INT32 value)
{
m_value = _mm_and_si128(m_value, _mm_set1_epi32(value));
}
inline void and_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void and_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_value = _mm_and_si128(m_value, _mm_set_epi32(a, r, g, b));
}
@ -260,7 +256,7 @@ public:
m_value = _mm_xor_si128(m_value, _mm_set1_epi32(value));
}
inline void xor_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void xor_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_value = _mm_xor_si128(m_value, _mm_set_epi32(a, r, g, b));
}
@ -283,18 +279,28 @@ public:
m_value = _mm_or_si128(m_value, compared);
}
inline void min(const UINT32 value)
inline void min(const INT32 value)
{
__m128i val = _mm_set1_epi32(value);
__m128i mask = _mm_cmpgt_epi32(m_value, val);
m_value = _mm_or_si128(_mm_and_si128(val, mask), _mm_and_si128(m_value, _mm_xor_si128(mask, _mm_set1_epi32(0xffffffff))));
__m128i is_greater_than = _mm_cmpgt_epi32(m_value, val);
__m128i val_to_set = _mm_and_si128(val, is_greater_than);
__m128i keep_mask = _mm_xor_si128(is_greater_than, _mm_set1_epi32(0xffffffff));
m_value = _mm_and_si128(m_value, keep_mask);
m_value = _mm_or_si128(val_to_set, m_value);
}
inline void max(const UINT32 value)
inline void max(const INT32 value)
{
__m128i val = _mm_set1_epi32(value);
__m128i mask = _mm_cmplt_epi32(m_value, val);
m_value = _mm_or_si128(_mm_and_si128(val, mask), _mm_and_si128(m_value, _mm_xor_si128(mask, _mm_set1_epi32(0xffffffff))));
__m128i is_less_than = _mm_cmplt_epi32(m_value, val);
__m128i val_to_set = _mm_and_si128(val, is_less_than);
__m128i keep_mask = _mm_xor_si128(is_less_than, _mm_set1_epi32(0xffffffff));
m_value = _mm_and_si128(m_value, keep_mask);
m_value = _mm_or_si128(val_to_set, m_value);
}
void blend(const rgbaint_t& other, UINT8 factor);
@ -310,12 +316,12 @@ public:
m_value = _mm_cmpeq_epi32(m_value, value.m_value);
}
inline void cmpeq_imm(const UINT32 value)
inline void cmpeq_imm(const INT32 value)
{
m_value = _mm_cmpeq_epi32(m_value, _mm_set1_epi32(value));
}
inline void cmpeq_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void cmpeq_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_value = _mm_cmpeq_epi32(m_value, _mm_set_epi32(a, r, g, b));
}
@ -325,12 +331,12 @@ public:
m_value = _mm_cmpgt_epi32(m_value, value.m_value);
}
inline void cmpgt_imm(const UINT32 value)
inline void cmpgt_imm(const INT32 value)
{
m_value = _mm_cmpgt_epi32(m_value, _mm_set1_epi32(value));
}
inline void cmpgt_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void cmpgt_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_value = _mm_cmpgt_epi32(m_value, _mm_set_epi32(a, r, g, b));
}
@ -340,12 +346,12 @@ public:
m_value = _mm_cmplt_epi32(m_value, value.m_value);
}
inline void cmplt_imm(const UINT32 value)
inline void cmplt_imm(const INT32 value)
{
m_value = _mm_cmplt_epi32(m_value, _mm_set1_epi32(value));
}
inline void cmplt_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b)
inline void cmplt_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b)
{
m_value = _mm_cmplt_epi32(m_value, _mm_set_epi32(a, r, g, b));
}

View File

@ -170,14 +170,14 @@ void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor)
void rgbaint_t::scale_and_clamp(const rgbaint_t& scale)
{
mul(scale);
shr(8);
sra(8);
min(255);
}
void rgbaint_t::scale_imm_and_clamp(const INT32 scale)
{
mul_imm(scale);
shr(8);
sra(8);
min(255);
}
@ -189,7 +189,7 @@ void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& oth
mul(scale);
add(color2);
shr(8);
sra(8);
min(255);
}
@ -197,7 +197,7 @@ void rgbaint_t::scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& othe
{
mul_imm(scale);
add(other);
shr(8);
sra(8);
min(255);
}
@ -205,7 +205,7 @@ void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& oth
{
mul(scale);
add(other);
shr(8);
sra(8);
min(255);
}