mirror of
https://github.com/holub/mame
synced 2025-04-23 08:49:55 +03:00
rdp-sse (nw) Proper fix for bilinear_filter
This commit is contained in:
parent
0dcf906e4a
commit
1bef19e984
@ -109,26 +109,4 @@ void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& oth
|
||||
if ((UINT16)m_b > 255) { m_b = (m_b < 0) ? 0 : 255; }
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
bilinear_filter - bilinear filter between
|
||||
four pixel values; this code is derived from
|
||||
code provided by Michael Herf
|
||||
-------------------------------------------------*/
|
||||
|
||||
UINT32 rgbaint_t::bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
UINT32 ag0, ag1, rb0, rb1;
|
||||
|
||||
rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
|
||||
rb1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8);
|
||||
ag0 = (rgb00 & 0xff00ff00) + ((((rgb01 & 0xff00ff00) - (rgb00 & 0xff00ff00)) * u) >> 8);
|
||||
ag1 = (rgb10 & 0xff00ff00) + ((((rgb11 & 0xff00ff00) - (rgb10 & 0xff00ff00)) * u) >> 8);
|
||||
|
||||
rb0 = (rb0 & 0x00ff00ff) + ((((rb1 & 0x00ff00ff) - (rb0 & 0x00ff00ff)) * v) >> 8);
|
||||
ag0 = (ag0 & 0xff00ff00) + ((((ag1 & 0xff00ff00) - (ag0 & 0xff00ff00)) * v) >> 8);
|
||||
|
||||
return (ag0 & 0xff00ff00) | (rb0 & 0x00ff00ff);
|
||||
}
|
||||
|
||||
#endif // !defined(__ALTIVEC__)
|
||||
|
@ -20,6 +20,7 @@ class rgbaint_t
|
||||
{
|
||||
public:
|
||||
inline rgbaint_t() { }
|
||||
inline rgbaint_t(UINT32 rgba) { set(rgba); }
|
||||
inline rgbaint_t(UINT32 a, UINT32 r, UINT32 g, UINT32 b) { set(a, r, g, b); }
|
||||
inline rgbaint_t(rgb_t& rgba) { set(rgba); }
|
||||
|
||||
@ -454,7 +455,24 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v);
|
||||
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
UINT32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
|
||||
UINT32 rb1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8);
|
||||
|
||||
rgb00 >>= 8;
|
||||
rgb01 >>= 8;
|
||||
rgb10 >>= 8;
|
||||
rgb11 >>= 8;
|
||||
|
||||
UINT32 ag0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
|
||||
UINT32 ag1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8);
|
||||
|
||||
rb0 = (rb0 & 0x00ff00ff) + ((((rb1 & 0x00ff00ff) - (rb0 & 0x00ff00ff)) * v) >> 8);
|
||||
ag0 = (ag0 & 0x00ff00ff) + ((((ag1 & 0x00ff00ff) - (ag0 & 0x00ff00ff)) * v) >> 8);
|
||||
|
||||
return ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff);
|
||||
}
|
||||
|
||||
protected:
|
||||
UINT32 m_a;
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#include "emu.h"
|
||||
#include <emmintrin.h>
|
||||
#include "rgbutil.h"
|
||||
#include "rgbsse.h"
|
||||
|
||||
/***************************************************************************
|
||||
HIGHER LEVEL OPERATIONS
|
||||
@ -32,6 +32,7 @@ void rgbaint_t::scale_and_clamp(const rgbaint_t& scale)
|
||||
mul(scale);
|
||||
shr(8);
|
||||
min(255);
|
||||
max(0);
|
||||
}
|
||||
|
||||
void rgbaint_t::scale_imm_and_clamp(const INT32 scale)
|
||||
@ -39,11 +40,11 @@ void rgbaint_t::scale_imm_and_clamp(const INT32 scale)
|
||||
mul_imm(scale);
|
||||
shr(8);
|
||||
min(255);
|
||||
max(0);
|
||||
}
|
||||
|
||||
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2)
|
||||
{
|
||||
mul(scale);
|
||||
rgbaint_t color2(other);
|
||||
color2.mul(scale2);
|
||||
|
||||
@ -51,6 +52,7 @@ void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& oth
|
||||
add(color2);
|
||||
shr(8);
|
||||
min(255);
|
||||
max(0);
|
||||
}
|
||||
|
||||
void rgbaint_t::scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other)
|
||||
@ -59,6 +61,7 @@ void rgbaint_t::scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& othe
|
||||
add(other);
|
||||
shr(8);
|
||||
min(255);
|
||||
max(0);
|
||||
}
|
||||
|
||||
void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other)
|
||||
@ -67,30 +70,7 @@ void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& oth
|
||||
add(other);
|
||||
shr(8);
|
||||
min(255);
|
||||
}
|
||||
|
||||
UINT32 rgbaint_t::bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
__m128i color00 = _mm_cvtsi32_si128(rgb00);
|
||||
__m128i color01 = _mm_cvtsi32_si128(rgb01);
|
||||
__m128i color10 = _mm_cvtsi32_si128(rgb10);
|
||||
__m128i color11 = _mm_cvtsi32_si128(rgb11);
|
||||
|
||||
/* interleave color01 and color00 at the byte level */
|
||||
color01 = _mm_unpacklo_epi8(color01, color00);
|
||||
color11 = _mm_unpacklo_epi8(color11, color10);
|
||||
color01 = _mm_unpacklo_epi8(color01, _mm_setzero_si128());
|
||||
color11 = _mm_unpacklo_epi8(color11, _mm_setzero_si128());
|
||||
color01 = _mm_madd_epi16(color01, *(__m128i *)&rgbsse_statics.scale_table[u][0]);
|
||||
color11 = _mm_madd_epi16(color11, *(__m128i *)&rgbsse_statics.scale_table[u][0]);
|
||||
color01 = _mm_slli_epi32(color01, 15);
|
||||
color11 = _mm_srli_epi32(color11, 1);
|
||||
color01 = _mm_max_epi16(color01, color11);
|
||||
color01 = _mm_madd_epi16(color01, *(__m128i *)&rgbsse_statics.scale_table[v][0]);
|
||||
color01 = _mm_srli_epi32(color01, 15);
|
||||
color01 = _mm_packs_epi32(color01, color01);
|
||||
color01 = _mm_packus_epi16(color01, color01);
|
||||
return _mm_cvtsi128_si32(color01);
|
||||
max(0);
|
||||
}
|
||||
|
||||
#endif // defined(__SSE2__) || defined(_MSC_VER)
|
||||
|
@ -49,8 +49,12 @@ public:
|
||||
|
||||
inline rgb_t to_rgba()
|
||||
{
|
||||
__m128i anded = _mm_and_si128(m_value, _mm_set1_epi32(0x000000ff));
|
||||
return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(anded, anded), _mm_setzero_si128()));
|
||||
return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128()));
|
||||
}
|
||||
|
||||
inline UINT32 to_argb8()
|
||||
{
|
||||
return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128()));
|
||||
}
|
||||
|
||||
inline rgb_t to_rgba_clamp()
|
||||
@ -296,6 +300,13 @@ public:
|
||||
m_value = _mm_or_si128(_mm_and_si128(val, mask), _mm_and_si128(m_value, _mm_xor_si128(mask, _mm_set1_epi32(0xffffffff))));
|
||||
}
|
||||
|
||||
inline void max(const UINT32 value)
|
||||
{
|
||||
__m128i val = _mm_set1_epi32(value);
|
||||
__m128i mask = _mm_cmplt_epi32(m_value, val);
|
||||
m_value = _mm_or_si128(_mm_and_si128(val, mask), _mm_and_si128(m_value, _mm_xor_si128(mask, _mm_set1_epi32(0xffffffff))));
|
||||
}
|
||||
|
||||
void blend(const rgbaint_t& other, UINT8 factor);
|
||||
|
||||
void scale_and_clamp(const rgbaint_t& scale);
|
||||
@ -398,7 +409,29 @@ public:
|
||||
m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 6), 6);
|
||||
}
|
||||
|
||||
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v);
|
||||
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
__m128i color00 = _mm_cvtsi32_si128(rgb00);
|
||||
__m128i color01 = _mm_cvtsi32_si128(rgb01);
|
||||
__m128i color10 = _mm_cvtsi32_si128(rgb10);
|
||||
__m128i color11 = _mm_cvtsi32_si128(rgb11);
|
||||
|
||||
/* interleave color01 and color00 at the byte level */
|
||||
color01 = _mm_unpacklo_epi8(color01, color00);
|
||||
color11 = _mm_unpacklo_epi8(color11, color10);
|
||||
color01 = _mm_unpacklo_epi8(color01, _mm_setzero_si128());
|
||||
color11 = _mm_unpacklo_epi8(color11, _mm_setzero_si128());
|
||||
color01 = _mm_madd_epi16(color01, *(__m128i *)&rgbsse_statics.scale_table[u][0]);
|
||||
color11 = _mm_madd_epi16(color11, *(__m128i *)&rgbsse_statics.scale_table[u][0]);
|
||||
color01 = _mm_slli_epi32(color01, 15);
|
||||
color11 = _mm_srli_epi32(color11, 1);
|
||||
color01 = _mm_max_epi16(color01, color11);
|
||||
color01 = _mm_madd_epi16(color01, *(__m128i *)&rgbsse_statics.scale_table[v][0]);
|
||||
color01 = _mm_srli_epi32(color01, 15);
|
||||
color01 = _mm_packs_epi32(color01, _mm_setzero_si128());
|
||||
color01 = _mm_packus_epi16(color01, _mm_setzero_si128());
|
||||
return _mm_cvtsi128_si32(color01);
|
||||
}
|
||||
|
||||
protected:
|
||||
__m128i m_value;
|
||||
|
@ -209,33 +209,4 @@ void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& oth
|
||||
min(255);
|
||||
}
|
||||
|
||||
UINT32 rgbaint_t::bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
const VECS32 zero = vec_splat_s32(0);
|
||||
|
||||
VECS32 color00 = vec_perm((VECS32)vec_lde(0, &rgb00), zero, vec_lvsl(0, &rgb00));
|
||||
VECS32 color01 = vec_perm((VECS32)vec_lde(0, &rgb01), zero, vec_lvsl(0, &rgb01));
|
||||
VECS32 color10 = vec_perm((VECS32)vec_lde(0, &rgb10), zero, vec_lvsl(0, &rgb10));
|
||||
VECS32 color11 = vec_perm((VECS32)vec_lde(0, &rgb11), zero, vec_lvsl(0, &rgb11));
|
||||
|
||||
/* interleave color01 and color00 at the byte level */
|
||||
color01 = vec_mergeh((VECU8)color01, (VECU8)color00);
|
||||
color11 = vec_mergeh((VECU8)color11, (VECU8)color10);
|
||||
color01 = vec_mergeh((VECU8)zero, (VECU8)color01);
|
||||
color11 = vec_mergeh((VECU8)zero, (VECU8)color11);
|
||||
color01 = vec_msum((VECS16)color01, scale_table[u], zero);
|
||||
color11 = vec_msum((VECS16)color11, scale_table[u], zero);
|
||||
color01 = vec_sl(color01, vec_splat_u32(15));
|
||||
color11 = vec_sr(color11, vec_splat_u32(1));
|
||||
color01 = vec_max((VECS16)color01, (VECS16)color11);
|
||||
color01 = vec_msum((VECS16)color01, scale_table[v], zero);
|
||||
color01 = vec_sr(color01, vec_splat_u32(15));
|
||||
color01 = vec_packs(color01, color01);
|
||||
color01 = vec_packsu((VECS16)color01, (VECS16)color01);
|
||||
|
||||
UINT32 result;
|
||||
vec_ste((VECU32)color01, 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif // defined(__ALTIVEC__)
|
||||
|
@ -457,7 +457,34 @@ public:
|
||||
m_value = vec_perm(m_value, alpha.m_value, alpha_perm);
|
||||
}
|
||||
|
||||
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v);
|
||||
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
|
||||
{
|
||||
const VECS32 zero = vec_splat_s32(0);
|
||||
|
||||
VECS32 color00 = vec_perm((VECS32)vec_lde(0, &rgb00), zero, vec_lvsl(0, &rgb00));
|
||||
VECS32 color01 = vec_perm((VECS32)vec_lde(0, &rgb01), zero, vec_lvsl(0, &rgb01));
|
||||
VECS32 color10 = vec_perm((VECS32)vec_lde(0, &rgb10), zero, vec_lvsl(0, &rgb10));
|
||||
VECS32 color11 = vec_perm((VECS32)vec_lde(0, &rgb11), zero, vec_lvsl(0, &rgb11));
|
||||
|
||||
/* interleave color01 and color00 at the byte level */
|
||||
color01 = vec_mergeh((VECU8)color01, (VECU8)color00);
|
||||
color11 = vec_mergeh((VECU8)color11, (VECU8)color10);
|
||||
color01 = vec_mergeh((VECU8)zero, (VECU8)color01);
|
||||
color11 = vec_mergeh((VECU8)zero, (VECU8)color11);
|
||||
color01 = vec_msum((VECS16)color01, scale_table[u], zero);
|
||||
color11 = vec_msum((VECS16)color11, scale_table[u], zero);
|
||||
color01 = vec_sl(color01, vec_splat_u32(15));
|
||||
color11 = vec_sr(color11, vec_splat_u32(1));
|
||||
color01 = vec_max((VECS16)color01, (VECS16)color11);
|
||||
color01 = vec_msum((VECS16)color01, scale_table[v], zero);
|
||||
color01 = vec_sr(color01, vec_splat_u32(15));
|
||||
color01 = vec_packs(color01, color01);
|
||||
color01 = vec_packsu((VECS16)color01, (VECS16)color01);
|
||||
|
||||
UINT32 result;
|
||||
vec_ste((VECU32)color01, 0, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
protected:
|
||||
typedef vector unsigned char VECU8;
|
||||
|
@ -24,7 +24,7 @@ enum
|
||||
};
|
||||
|
||||
// Use old macro style or newer SSE2 optimized functions
|
||||
#define USE_OLD_RASTER 1
|
||||
#define USE_OLD_RASTER 0
|
||||
|
||||
/* maximum number of TMUs */
|
||||
#define MAX_TMU 2
|
||||
@ -2219,7 +2219,7 @@ INLINE UINT32 clampARGB(INT32 iterr, INT32 iterg, INT32 iterb, INT32 itera, UINT
|
||||
temp.set(colorint);
|
||||
temp.cmpeq_imm(0x100);
|
||||
// result.rgb.r = 0xff;
|
||||
|
||||
colorint.or_reg(temp);
|
||||
return colorint.to_rgba();
|
||||
}
|
||||
else
|
||||
|
Loading…
Reference in New Issue
Block a user