diff --git a/src/emu/video/rgbgen.c b/src/emu/video/rgbgen.c index 934219108f2..2586653ab54 100644 --- a/src/emu/video/rgbgen.c +++ b/src/emu/video/rgbgen.c @@ -8,7 +8,7 @@ ***************************************************************************/ -#if !defined(__ALTIVEC__) +#if !(defined(__ALTIVEC__) || ((!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64))) #include "emu.h" #include "rgbgen.h" @@ -31,6 +31,10 @@ void rgbaint_t::blend(const rgbaint_t& color2, UINT8 color1scale) m_r = (m_r * scale1 + color2.m_r * scale2) >> 8; m_g = (m_g * scale1 + color2.m_g * scale2) >> 8; m_b = (m_b * scale1 + color2.m_b * scale2) >> 8; + m_a |= (m_a & 0x00800000) ? 0xff000000 : 0; + m_r |= (m_r & 0x00800000) ? 0xff000000 : 0; + m_g |= (m_g & 0x00800000) ? 0xff000000 : 0; + m_b |= (m_b & 0x00800000) ? 0xff000000 : 0; } @@ -43,70 +47,90 @@ void rgbaint_t::blend(const rgbaint_t& color2, UINT8 color1scale) void rgbaint_t::scale_imm_and_clamp(INT32 scale) { m_a = (m_a * scale) >> 8; - if (m_a > 255) { m_a = (m_a < 0) ? 0 : 255; } m_r = (m_r * scale) >> 8; - if (m_r > 255) { m_r = (m_r < 0) ? 0 : 255; } m_g = (m_g * scale) >> 8; - if (m_g > 255) { m_g = (m_g < 0) ? 0 : 255; } m_b = (m_b * scale) >> 8; - if (m_b > 255) { m_b = (m_b < 0) ? 0 : 255; } + m_a |= (m_a & 0x00800000) ? 0xff000000 : 0; + m_r |= (m_r & 0x00800000) ? 0xff000000 : 0; + m_g |= (m_g & 0x00800000) ? 0xff000000 : 0; + m_b |= (m_b & 0x00800000) ? 0xff000000 : 0; + if ((UINT32)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; } + if ((UINT32)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; } + if ((UINT32)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; } + if ((UINT32)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; } } void rgbaint_t::scale_and_clamp(const rgbaint_t& scale) { m_a = (m_a * scale.m_a) >> 8; - if (m_a > 255) { m_a = (m_a < 0) ? 0 : 255; } m_r = (m_r * scale.m_r) >> 8; - if (m_r > 255) { m_r = (m_r < 0) ? 0 : 255; } m_g = (m_g * scale.m_g) >> 8; - if (m_g > 255) { m_g = (m_g < 0) ? 0 : 255; } m_b = (m_b * scale.m_b) >> 8; - if (m_b > 255) { m_b = (m_b < 0) ? 0 : 255; } + m_a |= (m_a & 0x00800000) ? 0xff000000 : 0; + m_r |= (m_r & 0x00800000) ? 0xff000000 : 0; + m_g |= (m_g & 0x00800000) ? 0xff000000 : 0; + m_b |= (m_b & 0x00800000) ? 0xff000000 : 0; + if ((UINT32)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; } + if ((UINT32)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; } + if ((UINT32)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; } + if ((UINT32)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; } } void rgbaint_t::scale_imm_add_and_clamp(INT32 scale, const rgbaint_t& other) { m_a = (m_a * scale) >> 8; - m_a += other.m_a; - if (m_a > 255) { m_a = (m_a < 0) ? 0 : 255; } m_r = (m_r * scale) >> 8; - m_r += other.m_r; - if (m_r > 255) { m_r = (m_r < 0) ? 0 : 255; } m_g = (m_g * scale) >> 8; - m_g += other.m_g; - if (m_g > 255) { m_g = (m_g < 0) ? 0 : 255; } m_b = (m_b * scale) >> 8; + m_a |= (m_a & 0x00800000) ? 0xff000000 : 0; + m_r |= (m_r & 0x00800000) ? 0xff000000 : 0; + m_g |= (m_g & 0x00800000) ? 0xff000000 : 0; + m_b |= (m_b & 0x00800000) ? 0xff000000 : 0; + m_a += other.m_a; + m_r += other.m_r; + m_g += other.m_g; m_b += other.m_b; - if (m_b > 255) { m_b = (m_b < 0) ? 0 : 255; } + if ((UINT32)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; } + if ((UINT32)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; } + if ((UINT32)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; } + if ((UINT32)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; } } void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other) { m_a = (m_a * scale.m_a) >> 8; - m_a += other.m_a; - if (m_a > 255) { m_a = (m_a < 0) ? 0 : 255; } m_r = (m_r * scale.m_r) >> 8; - m_r += other.m_r; - if (m_r > 255) { m_r = (m_r < 0) ? 0 : 255; } m_g = (m_g * scale.m_g) >> 8; - m_g += other.m_g; - if (m_g > 255) { m_g = (m_g < 0) ? 0 : 255; } m_b = (m_b * scale.m_b) >> 8; + m_a |= (m_a & 0x00800000) ? 0xff000000 : 0; + m_r |= (m_r & 0x00800000) ? 0xff000000 : 0; + m_g |= (m_g & 0x00800000) ? 0xff000000 : 0; + m_b |= (m_b & 0x00800000) ? 0xff000000 : 0; + m_a += other.m_a; + m_r += other.m_r; + m_g += other.m_g; m_b += other.m_b; - if (m_b > 255) { m_b = (m_b < 0) ? 0 : 255; } + if ((UINT32)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; } + if ((UINT32)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; } + if ((UINT32)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; } + if ((UINT32)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; } } void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2) { m_a = (m_a * scale.m_a + other.m_a * scale2.m_a) >> 8; - if ((UINT16)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; } m_r = (m_r * scale.m_r + other.m_r * scale2.m_r) >> 8; - if ((UINT16)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; } m_g = (m_g * scale.m_g + other.m_g * scale2.m_g) >> 8; - if ((UINT16)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; } m_b = (m_b * scale.m_b + other.m_b * scale2.m_b) >> 8; - if ((UINT16)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; } + m_a |= (m_a & 0x00800000) ? 0xff000000 : 0; + m_r |= (m_r & 0x00800000) ? 0xff000000 : 0; + m_g |= (m_g & 0x00800000) ? 0xff000000 : 0; + m_b |= (m_b & 0x00800000) ? 0xff000000 : 0; + if ((UINT32)m_a > 255) { m_a = (m_a < 0) ? 0 : 255; } + if ((UINT32)m_r > 255) { m_r = (m_r < 0) ? 0 : 255; } + if ((UINT32)m_g > 255) { m_g = (m_g < 0) ? 0 : 255; } + if ((UINT32)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; } } #endif // !defined(__ALTIVEC__) diff --git a/src/emu/video/rgbgen.h b/src/emu/video/rgbgen.h index 743896db92f..fe5372b243a 100644 --- a/src/emu/video/rgbgen.h +++ b/src/emu/video/rgbgen.h @@ -21,12 +21,12 @@ class rgbaint_t public: inline rgbaint_t() { } inline rgbaint_t(UINT32 rgba) { set(rgba); } - inline rgbaint_t(UINT32 a, UINT32 r, UINT32 g, UINT32 b) { set(a, r, g, b); } + inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); } inline rgbaint_t(rgb_t& rgba) { set(rgba); } inline void set(rgbaint_t& other) { set(other.m_a, other.m_r, other.m_g, other.m_b); } inline void set(UINT32 rgba) { set((rgba >> 24) & 0xff, (rgba >> 16) & 0xff, (rgba >> 8) & 0xff, rgba & 0xff); } - inline void set(UINT32 a, UINT32 r, UINT32 g, UINT32 b) + inline void set(INT32 a, INT32 r, INT32 g, INT32 b) { m_a = a; m_r = r; @@ -54,12 +54,12 @@ public: add_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } - inline void add_imm(const UINT32 imm) + inline void add_imm(const INT32 imm) { add_imm_rgba(imm, imm, imm, imm); } - inline void add_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void add_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_a += a; m_r += r; @@ -72,12 +72,12 @@ public: sub_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } - inline void sub_imm(const UINT32 imm) + inline void sub_imm(const INT32 imm) { sub_imm_rgba(imm, imm, imm, imm); } - inline void sub_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void sub_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_a -= a; m_r -= r; @@ -90,12 +90,12 @@ public: subr_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } - inline void subr_imm(const UINT32 imm) + inline void subr_imm(const INT32 imm) { subr_imm_rgba(imm, imm, imm, imm); } - inline void subr_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void subr_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_a = a - m_a; m_r = r - m_r; @@ -103,22 +103,22 @@ public: m_b = b - m_b; } - inline void set_a(const UINT32 value) + inline void set_a(const INT32 value) { m_r = value; } - inline void set_r(const UINT32 value) + inline void set_r(const INT32 value) { m_r = value; } - inline void set_g(const UINT32 value) + inline void set_g(const INT32 value) { m_g = value; } - inline void set_b(const UINT32 value) + inline void set_b(const INT32 value) { m_b = value; } @@ -143,22 +143,22 @@ public: return m_b; } - inline UINT32 get_a32() + inline INT32 get_a32() { return m_a; } - inline UINT32 get_r32() + inline INT32 get_r32() { return m_r; } - inline UINT32 get_g32() + inline INT32 get_g32() { return m_g; } - inline UINT32 get_b32() + inline INT32 get_b32() { return m_b; } @@ -168,12 +168,12 @@ public: mul_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } - inline void mul_imm(const UINT32 imm) + inline void mul_imm(const INT32 imm) { mul_imm_rgba(imm, imm, imm, imm); } - inline void mul_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void mul_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_a *= a; m_r *= r; @@ -265,12 +265,12 @@ public: or_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } - inline void or_imm(const UINT32 imm) + inline void or_imm(const INT32 imm) { or_imm_rgba(imm, imm, imm, imm); } - inline void or_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void or_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_a |= a; m_r |= r; @@ -283,12 +283,12 @@ public: and_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } - inline void and_imm(const UINT32 imm) + inline void and_imm(const INT32 imm) { and_imm_rgba(imm, imm, imm, imm); } - inline void and_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void and_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_a &= a; m_r &= r; @@ -301,12 +301,12 @@ public: xor_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } - inline void xor_imm(const UINT32 imm) + inline void xor_imm(const INT32 imm) { xor_imm_rgba(imm, imm, imm, imm); } - inline void xor_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void xor_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_a ^= a; m_r ^= r; @@ -349,7 +349,7 @@ public: m_b |= sign; } - inline void min(const UINT32 value) + inline void min(const INT32 value) { m_a = (m_a > value) ? value : m_a; m_r = (m_r > value) ? value : m_r; @@ -373,7 +373,7 @@ public: m_b = (m_b == value.m_b) ? 0xffffffff : 0; } - inline void cmpeq_imm(const UINT32 value) + inline void cmpeq_imm(const INT32 value) { m_a = (m_a == value) ? 0xffffffff : 0; m_r = (m_r == value) ? 0xffffffff : 0; @@ -389,7 +389,7 @@ public: m_b = (m_b > value.m_b) ? 0xffffffff : 0; } - inline void cmpgt_imm(const UINT32 value) + inline void cmpgt_imm(const INT32 value) { m_a = (m_a > value) ? 0xffffffff : 0; m_r = (m_r > value) ? 0xffffffff : 0; @@ -405,7 +405,7 @@ public: m_b = (m_b < value.m_b) ? 0xffffffff : 0; } - inline void cmplt_imm(const UINT32 value) + inline void cmplt_imm(const INT32 value) { m_a = (m_a < value) ? 0xffffffff : 0; m_r = (m_r < value) ? 0xffffffff : 0; @@ -447,10 +447,10 @@ public: } protected: - UINT32 m_a; - UINT32 m_r; - UINT32 m_g; - UINT32 m_b; + INT32 m_a; + INT32 m_r; + INT32 m_g; + INT32 m_b; }; #endif /* __RGBGEN__ */ diff --git a/src/emu/video/rgbsse.c b/src/emu/video/rgbsse.c index b4e3701a187..bdac2a583c6 100644 --- a/src/emu/video/rgbsse.c +++ b/src/emu/video/rgbsse.c @@ -10,7 +10,7 @@ ***************************************************************************/ -#if defined(__SSE2__) || defined(_MSC_VER) +#if (!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64) #include "emu.h" #include @@ -30,17 +30,17 @@ void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor) void rgbaint_t::scale_and_clamp(const rgbaint_t& scale) { mul(scale); - shr(8); - min(255); + sra(8); max(0); + min(255); } void rgbaint_t::scale_imm_and_clamp(const INT32 scale) { mul_imm(scale); - shr(8); - min(255); + sra(8); max(0); + min(255); } void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2) @@ -50,27 +50,27 @@ void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& oth mul(scale); add(color2); - shr(8); - min(255); + sra(8); max(0); + min(255); } void rgbaint_t::scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other) { mul_imm(scale); add(other); - shr(8); - min(255); + sra(8); max(0); + min(255); } void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other) { mul(scale); + sra(8); add(other); - shr(8); - min(255); max(0); + min(255); } #endif // defined(__SSE2__) || defined(_MSC_VER) diff --git a/src/emu/video/rgbsse.h b/src/emu/video/rgbsse.h index 6482330d29f..a9f8cc79e52 100644 --- a/src/emu/video/rgbsse.h +++ b/src/emu/video/rgbsse.h @@ -39,12 +39,13 @@ class rgbaint_t public: inline rgbaint_t() { } inline rgbaint_t(UINT32 rgba) { set(rgba); } - inline rgbaint_t(UINT32 a, UINT32 r, UINT32 g, UINT32 b) { set(a, r, g, b); } + inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); } inline rgbaint_t(rgb_t& rgb) { set(rgb); } + inline rgbaint_t(__m128i rgba) { m_value = rgba; } inline void set(rgbaint_t& other) { m_value = other.m_value; } inline void set(UINT32 rgba) { m_value = _mm_and_si128(_mm_set1_epi32(0xff), _mm_set_epi32(rgba >> 24, rgba >> 16, rgba >> 8, rgba)); } - inline void set(UINT32 a, UINT32 r, UINT32 g, UINT32 b) { m_value = _mm_set_epi32(a, r, g, b); } + inline void set(INT32 a, INT32 r, INT32 g, INT32 b) { m_value = _mm_set_epi32(a, r, g, b); } inline void set(rgb_t& rgb) { m_value = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(rgb), _mm_setzero_si128()), _mm_setzero_si128()); } inline rgb_t to_rgba() @@ -52,14 +53,9 @@ public: return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128())); } - inline UINT32 to_argb8() - { - return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128())); - } - inline rgb_t to_rgba_clamp() { - return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, m_value), _mm_setzero_si128())); + return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128())); } inline void add(const rgbaint_t& color2) @@ -67,12 +63,12 @@ public: m_value = _mm_add_epi32(m_value, color2.m_value); } - inline void add_imm(const UINT32 imm) + inline void add_imm(const INT32 imm) { m_value = _mm_add_epi32(m_value, _mm_set1_epi32(imm)); } - inline void add_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void add_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_value = _mm_add_epi32(m_value, _mm_set_epi32(a, r, g, b)); } @@ -82,12 +78,12 @@ public: m_value = _mm_sub_epi32(m_value, color2.m_value); } - inline void sub_imm(const UINT32 imm) + inline void sub_imm(const INT32 imm) { m_value = _mm_sub_epi32(m_value, _mm_set1_epi32(imm)); } - inline void sub_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void sub_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_value = _mm_sub_epi32(m_value, _mm_set_epi32(a, r, g, b)); } @@ -97,32 +93,32 @@ public: m_value = _mm_sub_epi32(color2.m_value, m_value); } - inline void subr_imm(const UINT32 imm) + inline void subr_imm(const INT32 imm) { m_value = _mm_sub_epi32(_mm_set1_epi32(imm), m_value); } - inline void subr_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void subr_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_value = _mm_sub_epi32(_mm_set_epi32(a, r, g, b), m_value); } - inline void set_a(const UINT32 value) + inline void set_a(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, *(__m128i *)&rgbsse_statics.alpha_mask), _mm_set_epi32(value, 0, 0, 0)); } - inline void set_r(const UINT32 value) + inline void set_r(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, *(__m128i *)&rgbsse_statics.red_mask), _mm_set_epi32(0, value, 0, 0)); } - inline void set_g(const UINT32 value) + inline void set_g(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, *(__m128i *)&rgbsse_statics.green_mask), _mm_set_epi32(0, 0, value, 0)); } - inline void set_b(const UINT32 value) + inline void set_b(const INT32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, *(__m128i *)&rgbsse_statics.blue_mask), _mm_set_epi32(0, 0, 0, value)); } @@ -147,22 +143,22 @@ public: return _mm_extract_epi16(m_value, 0); } - inline UINT32 get_a32() + inline INT32 get_a32() { return (_mm_extract_epi16(m_value, 7) << 16) | _mm_extract_epi16(m_value, 6); } - inline UINT32 get_r32() + inline INT32 get_r32() { return (_mm_extract_epi16(m_value, 5) << 16) | _mm_extract_epi16(m_value, 4); } - inline UINT32 get_g32() + inline INT32 get_g32() { return (_mm_extract_epi16(m_value, 3) << 16) | _mm_extract_epi16(m_value, 2); } - inline UINT32 get_b32() + inline INT32 get_b32() { return (_mm_extract_epi16(m_value, 1) << 16) | _mm_extract_epi16(m_value, 0); } @@ -174,7 +170,7 @@ public: m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); } - inline void mul_imm(const UINT32 imm) + inline void mul_imm(const INT32 imm) { __m128i immv = _mm_set1_epi32(imm); __m128i tmp1 = _mm_mul_epu32(m_value, immv); @@ -182,7 +178,7 @@ public: m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); } - inline void mul_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void mul_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { __m128i immv = _mm_set_epi32(a, r, g, b); __m128i tmp1 = _mm_mul_epu32(m_value, immv); @@ -225,12 +221,12 @@ public: m_value = _mm_or_si128(m_value, color2.m_value); } - inline void or_imm(const UINT32 value) + inline void or_imm(const INT32 value) { m_value = _mm_or_si128(m_value, _mm_set1_epi32(value)); } - inline void or_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void or_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_value = _mm_or_si128(m_value, _mm_set_epi32(a, r, g, b)); } @@ -240,12 +236,12 @@ public: m_value = _mm_and_si128(m_value, color.m_value); } - inline void and_imm(const UINT32 value) + inline void and_imm(const INT32 value) { m_value = _mm_and_si128(m_value, _mm_set1_epi32(value)); } - inline void and_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void and_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_value = _mm_and_si128(m_value, _mm_set_epi32(a, r, g, b)); } @@ -260,7 +256,7 @@ public: m_value = _mm_xor_si128(m_value, _mm_set1_epi32(value)); } - inline void xor_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void xor_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_value = _mm_xor_si128(m_value, _mm_set_epi32(a, r, g, b)); } @@ -283,18 +279,28 @@ public: m_value = _mm_or_si128(m_value, compared); } - inline void min(const UINT32 value) + inline void min(const INT32 value) { __m128i val = _mm_set1_epi32(value); - __m128i mask = _mm_cmpgt_epi32(m_value, val); - m_value = _mm_or_si128(_mm_and_si128(val, mask), _mm_and_si128(m_value, _mm_xor_si128(mask, _mm_set1_epi32(0xffffffff)))); + __m128i is_greater_than = _mm_cmpgt_epi32(m_value, val); + + __m128i val_to_set = _mm_and_si128(val, is_greater_than); + __m128i keep_mask = _mm_xor_si128(is_greater_than, _mm_set1_epi32(0xffffffff)); + + m_value = _mm_and_si128(m_value, keep_mask); + m_value = _mm_or_si128(val_to_set, m_value); } - inline void max(const UINT32 value) + inline void max(const INT32 value) { __m128i val = _mm_set1_epi32(value); - __m128i mask = _mm_cmplt_epi32(m_value, val); - m_value = _mm_or_si128(_mm_and_si128(val, mask), _mm_and_si128(m_value, _mm_xor_si128(mask, _mm_set1_epi32(0xffffffff)))); + __m128i is_less_than = _mm_cmplt_epi32(m_value, val); + + __m128i val_to_set = _mm_and_si128(val, is_less_than); + __m128i keep_mask = _mm_xor_si128(is_less_than, _mm_set1_epi32(0xffffffff)); + + m_value = _mm_and_si128(m_value, keep_mask); + m_value = _mm_or_si128(val_to_set, m_value); } void blend(const rgbaint_t& other, UINT8 factor); @@ -310,12 +316,12 @@ public: m_value = _mm_cmpeq_epi32(m_value, value.m_value); } - inline void cmpeq_imm(const UINT32 value) + inline void cmpeq_imm(const INT32 value) { m_value = _mm_cmpeq_epi32(m_value, _mm_set1_epi32(value)); } - inline void cmpeq_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void cmpeq_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_value = _mm_cmpeq_epi32(m_value, _mm_set_epi32(a, r, g, b)); } @@ -325,12 +331,12 @@ public: m_value = _mm_cmpgt_epi32(m_value, value.m_value); } - inline void cmpgt_imm(const UINT32 value) + inline void cmpgt_imm(const INT32 value) { m_value = _mm_cmpgt_epi32(m_value, _mm_set1_epi32(value)); } - inline void cmpgt_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void cmpgt_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_value = _mm_cmpgt_epi32(m_value, _mm_set_epi32(a, r, g, b)); } @@ -340,12 +346,12 @@ public: m_value = _mm_cmplt_epi32(m_value, value.m_value); } - inline void cmplt_imm(const UINT32 value) + inline void cmplt_imm(const INT32 value) { m_value = _mm_cmplt_epi32(m_value, _mm_set1_epi32(value)); } - inline void cmplt_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + inline void cmplt_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { m_value = _mm_cmplt_epi32(m_value, _mm_set_epi32(a, r, g, b)); } diff --git a/src/emu/video/rgbvmx.c b/src/emu/video/rgbvmx.c index 146632aa2de..f8b6753321e 100644 --- a/src/emu/video/rgbvmx.c +++ b/src/emu/video/rgbvmx.c @@ -170,14 +170,14 @@ void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor) void rgbaint_t::scale_and_clamp(const rgbaint_t& scale) { mul(scale); - shr(8); + sra(8); min(255); } void rgbaint_t::scale_imm_and_clamp(const INT32 scale) { mul_imm(scale); - shr(8); + sra(8); min(255); } @@ -189,7 +189,7 @@ void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& oth mul(scale); add(color2); - shr(8); + sra(8); min(255); } @@ -197,7 +197,7 @@ void rgbaint_t::scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& othe { mul_imm(scale); add(other); - shr(8); + sra(8); min(255); } @@ -205,7 +205,7 @@ void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& oth { mul(scale); add(other); - shr(8); + sra(8); min(255); }