From b5be1c3695661257426acd60180efef252acc83e Mon Sep 17 00:00:00 2001 From: "therealmogminer@gmail.com" Date: Mon, 15 Jun 2015 21:35:14 +0200 Subject: [PATCH] Put in ICE-causing code for others to test --- src/emu/video/rgbsse.c | 218 ++++++++++++++++++----- src/emu/video/rgbsse.h | 85 +++++---- src/emu/video/rgbutil.h | 31 ---- src/emu/video/vooddefs.h | 10 +- src/mame/video/n64.c | 3 + src/mame/video/n64.h | 300 +------------------------------ src/mame/video/n64types.h | 327 ++++++++++++++++++++++++++++++++++ src/mame/video/rdpblend.h | 8 +- src/mame/video/rdpspn16.c | 154 ++++++++++------ src/mame/video/rdptpipe.c | 365 ++++++++++++++++---------------------- src/mame/video/rdptpipe.h | 70 ++++---- 11 files changed, 841 insertions(+), 730 deletions(-) create mode 100644 src/mame/video/n64types.h diff --git a/src/emu/video/rgbsse.c b/src/emu/video/rgbsse.c index 828af8eea75..596ccb42d05 100644 --- a/src/emu/video/rgbsse.c +++ b/src/emu/video/rgbsse.c @@ -24,7 +24,7 @@ rgbint_t::rgbint_t(UINT32 rgb) set_rgb(rgb); } -rgbint_t::rgbint_t(INT16 r, INT16 g, INT16 b) +rgbint_t::rgbint_t(INT32 r, INT32 g, INT32 b) { set_rgb(r, g, b); } @@ -34,6 +34,11 @@ rgbint_t::rgbint_t(rgb_t& rgb) m_value = _mm_unpacklo_epi8(_mm_cvtsi32_si128(rgb), _mm_setzero_si128()); } +rgbint_t::rgbint_t(__m128i value) +{ + m_value = value; +} + rgbaint_t::rgbaint_t() { set_rgba(0, 0, 0, 0); @@ -41,10 +46,10 @@ rgbaint_t::rgbaint_t() rgbaint_t::rgbaint_t(UINT32 argb) { - m_value = _mm_set_epi16(0, 0, 0, 0, (argb >> 24) & 0xff, (argb >> 16) & 0xff, (argb >> 8) & 0xff, argb & 0xff); + m_value = _mm_set_epi32((argb >> 24) & 0xff, (argb >> 16) & 0xff, (argb >> 8) & 0xff, argb & 0xff); } -rgbaint_t::rgbaint_t(INT16 a, INT16 r, INT16 g, INT16 b) +rgbaint_t::rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set_rgba(a, r, g, b); } @@ -54,29 +59,95 @@ rgbaint_t::rgbaint_t(rgb_t& rgba) m_value = _mm_unpacklo_epi8(_mm_cvtsi32_si128(rgba), _mm_setzero_si128()); } +void rgbint_t::set(void* value) +{ + m_value = *(__m128i*)value; +} + void rgbint_t::set(__m128i value) { m_value = value; } +__m128i rgbint_t::get() +{ + return m_value; +} + void rgbint_t::set_rgb(UINT32 rgb) { - m_value = _mm_set_epi16(0, 0, 0, 0, 0, (rgb >> 16) & 0xff, (rgb >> 8) & 0xff, rgb & 0xff); + m_value = _mm_set_epi32(0, rgb & 0xff, (rgb >> 8) & 0xff, (rgb >> 16) & 0xff); } void rgbint_t::set_rgb(rgb_t& rgb) { - m_value = _mm_unpacklo_epi8(_mm_cvtsi32_si128(rgb), _mm_setzero_si128()); + m_value = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(rgb), _mm_setzero_si128()), _mm_setzero_si128()); } -void rgbint_t::set_rgb(INT16 r, INT16 g, INT16 b) +void rgbint_t::set_rgb(INT32 r, INT32 g, INT32 b) { - m_value = _mm_set_epi16(0, 0, 0, 0, 0, r, g, b); + m_value = _mm_set_epi32(0, r, g, b); } -void rgbaint_t::set_rgba(INT16 a, INT16 r, INT16 g, INT16 b) +void rgbaint_t::set_rgba(INT32 a, INT32 r, INT32 g, INT32 b) { - m_value = _mm_set_epi16(0, 0, 0, 0, a, r, g, b); + m_value = _mm_set_epi32(a, r, g, b); +} + +/*************************************************************************** + OPERATORS +***************************************************************************/ + +rgbint_t rgbint_t::operator=(const rgbint_t& other) +{ + m_value = other.m_value; + return *this; +} + +rgbint_t& rgbint_t::operator+=(const rgbint_t& other) +{ + m_value = _mm_add_epi32(m_value, other.m_value); + return *this; +} + +rgbint_t& rgbint_t::operator-=(const rgbint_t& other) +{ + m_value = _mm_sub_epi32(m_value, other.m_value); + return *this; +} + +rgbint_t& rgbint_t::operator*=(const rgbint_t& other) +{ + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(_mm_mul_epu32(m_value, other.m_value), _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(_mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(other.m_value, 4)), _MM_SHUFFLE(0, 0, 2, 0))); + return *this; +} + +rgbint_t& rgbint_t::operator*=(const INT32 other) +{ + const __m128i immv = _mm_set1_epi32(other); + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(_mm_mul_epu32(m_value, immv), _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(_mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(immv, 4)), _MM_SHUFFLE(0, 0, 2, 0))); + return *this; +} + +rgbint_t rgbint_t::operator+(const rgbint_t& other) +{ + return _mm_add_epi32(m_value, other.m_value); +} + +rgbint_t rgbint_t::operator-(const rgbint_t& other) +{ + return _mm_sub_epi32(m_value, other.m_value); +} + +rgbint_t rgbint_t::operator*(const rgbint_t& other) +{ + return _mm_unpacklo_epi32(_mm_shuffle_epi32(_mm_mul_epu32(m_value, other.m_value), _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(_mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(other.m_value, 4)), _MM_SHUFFLE(0, 0, 2, 0))); +} + +rgbint_t rgbint_t::operator*(const INT32 other) +{ + const __m128i immv = _mm_set1_epi32(other); + return _mm_unpacklo_epi32(_mm_shuffle_epi32(_mm_mul_epu32(m_value, immv), _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(_mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(immv, 4)), _MM_SHUFFLE(0, 0, 2, 0))); } /*************************************************************************** @@ -85,12 +156,13 @@ void rgbaint_t::set_rgba(INT16 a, INT16 r, INT16 g, INT16 b) rgb_t rgbint_t::to_rgb() { - return _mm_cvtsi128_si32(_mm_packus_epi16(m_value, m_value)); + __m128i anded = _mm_and_si128(m_value, _mm_set_epi32(0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff)); + return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(anded, anded), _mm_setzero_si128())); } rgb_t rgbint_t::to_rgb_clamp() { - return _mm_cvtsi128_si32(_mm_packus_epi16(m_value, m_value)); + return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, m_value), _mm_setzero_si128())); } rgb_t rgbint_t::to_rgba() @@ -109,83 +181,137 @@ rgb_t rgbint_t::to_rgba_clamp() void rgbint_t::add(const rgbint_t& color2) { - m_value = _mm_add_epi16(m_value, color2.m_value); + m_value = _mm_add_epi32(m_value, color2.m_value); } -void rgbint_t::add_imm(const INT16 imm) +void rgbint_t::add_imm(const INT32 imm) { - __m128i temp = _mm_set_epi16(0, 0, 0, 0, imm, imm, imm, imm); - m_value = _mm_add_epi16(m_value, temp); + __m128i temp = _mm_set_epi32(imm, imm, imm, imm); + m_value = _mm_add_epi32(m_value, temp); } -void rgbint_t::add_imm_rgb(const INT16 r, const INT16 g, const INT16 b) +void rgbint_t::add_imm_rgb(const INT32 r, const INT32 g, const INT32 b) { - __m128i temp = _mm_set_epi16(0, 0, 0, 0, 0, r, g, b); - m_value = _mm_add_epi16(m_value, temp); + __m128i temp = _mm_set_epi32(0, r, g, b); + m_value = _mm_add_epi32(m_value, temp); } -void rgbaint_t::add_imm_rgba(const INT16 a, const INT16 r, const INT16 g, const INT16 b) +void rgbaint_t::add_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { - __m128i temp = _mm_set_epi16(0, 0, 0, 0, a, r, g, b); - m_value = _mm_add_epi16(m_value, temp); + __m128i temp = _mm_set_epi32(a, r, g, b); + m_value = _mm_add_epi32(m_value, temp); } void rgbint_t::sub(const rgbint_t& color2) { - m_value = _mm_sub_epi16(m_value, color2.m_value); + m_value = _mm_sub_epi32(m_value, color2.m_value); } -void rgbint_t::sub_imm(const INT16 imm) +void rgbint_t::sub_imm(const INT32 imm) { - __m128i temp = _mm_set_epi16(0, 0, 0, 0, imm, imm, imm, imm); - m_value = _mm_sub_epi16(m_value, temp); + __m128i temp = _mm_set_epi32(imm, imm, imm, imm); + m_value = _mm_sub_epi32(m_value, temp); } -void rgbint_t::sub_imm_rgb(const INT16 r, const INT16 g, const INT16 b) +void rgbint_t::sub_imm_rgb(const INT32 r, const INT32 g, const INT32 b) { - __m128i temp = _mm_set_epi16(0, 0, 0, 0, 0, r, g, b); - m_value = _mm_sub_epi16(m_value, temp); + __m128i temp = _mm_set_epi32(0, r, g, b); + m_value = _mm_sub_epi32(m_value, temp); } -void rgbaint_t::sub_imm_rgba(const INT16 a, const INT16 r, const INT16 g, const INT16 b) +void rgbaint_t::sub_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { - __m128i temp = _mm_set_epi16(0, 0, 0, 0, a, r, g, b); - m_value = _mm_sub_epi16(m_value, temp); + __m128i temp = _mm_set_epi32(a, r, g, b); + m_value = _mm_sub_epi32(m_value, temp); } void rgbint_t::subr(rgbint_t& color2) { - color2.m_value = _mm_sub_epi16(color2.m_value, m_value); + color2.m_value = _mm_sub_epi32(color2.m_value, m_value); } -void rgbint_t::subr_imm(const INT16 imm) +void rgbint_t::subr_imm(const INT32 imm) { - __m128i temp = _mm_set_epi16(0, 0, 0, 0, imm, imm, imm, imm); - m_value = _mm_sub_epi16(temp, m_value); + __m128i temp = _mm_set_epi32(imm, imm, imm, imm); + m_value = _mm_sub_epi32(temp, m_value); } -void rgbint_t::subr_imm_rgb(const INT16 r, const INT16 g, const INT16 b) +void rgbint_t::subr_imm_rgb(const INT32 r, const INT32 g, const INT32 b) { - __m128i temp = _mm_set_epi16(0, 0, 0, 0, 0, r, g, b); - m_value = _mm_sub_epi16(temp, m_value); + __m128i temp = _mm_set_epi32(0, r, g, b); + m_value = _mm_sub_epi32(temp, m_value); } -void rgbaint_t::subr_imm_rgba(const INT16 a, const INT16 r, const INT16 g, const INT16 b) +void rgbaint_t::subr_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) { - __m128i temp = _mm_set_epi16(0, 0, 0, 0, a, r, g, b); - m_value = _mm_sub_epi16(temp, m_value); + __m128i temp = _mm_set_epi32(a, r, g, b); + m_value = _mm_sub_epi32(temp, m_value); +} + +void rgbint_t::mul(rgbint_t& color) +{ + __m128i tmp1 = _mm_mul_epu32(m_value, color.m_value); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(color.m_value, 4)); + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); +} + +void rgbint_t::print() +{ + printf("%04x ", _mm_extract_epi16(m_value, 0)); + printf("%04x ", _mm_extract_epi16(m_value, 1)); + printf("%04x ", _mm_extract_epi16(m_value, 2)); + printf("%04x ", _mm_extract_epi16(m_value, 3)); + printf("%04x ", _mm_extract_epi16(m_value, 4)); + printf("%04x ", _mm_extract_epi16(m_value, 5)); + printf("%04x ", _mm_extract_epi16(m_value, 6)); + printf("%04x\n", _mm_extract_epi16(m_value, 7)); +} + +void rgbint_t::mul_imm(const INT32 imm) +{ + __m128i immv = _mm_set1_epi32(imm); + __m128i tmp1 = _mm_mul_epu32(m_value, immv); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(immv, 4)); + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); +} + +void rgbint_t::mul_imm_rgb(const INT32 r, const INT32 g, const INT32 b) +{ + __m128i immv = _mm_set_epi32(0, r, g, b); + __m128i tmp1 = _mm_mul_epu32(m_value, immv); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(immv, 4)); + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); +} + +void rgbaint_t::mul_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b) +{ + __m128i immv = _mm_set_epi32(a, r, g, b); + __m128i tmp1 = _mm_mul_epu32(m_value, immv); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(immv, 4)); + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); } void rgbint_t::shl(UINT8 shift) { - m_value = _mm_slli_epi16(m_value, shift); + m_value = _mm_slli_epi32(m_value, shift); } void rgbint_t::shr(UINT8 shift) { - m_value = _mm_srli_epi16(m_value, shift); + m_value = _mm_srli_epi32(m_value, shift); } +void rgbint_t::sra(UINT8 shift) +{ + m_value = _mm_srai_epi32(m_value, shift); +} + +void rgbint_t::sign_extend(const INT32 compare, const INT32 sign) +{ + __m128i compare_vec = _mm_set1_epi32(compare); + __m128i compare_mask = _mm_cmpeq_epi32(_mm_and_si128(m_value, compare_vec), compare_vec); + m_value = _mm_or_si128(m_value, _mm_and_si128(_mm_set1_epi32(sign), compare_mask)); +} /*************************************************************************** HIGHER LEVEL OPERATIONS @@ -196,7 +322,6 @@ void rgbint_t::blend(const rgbint_t& other, UINT8 factor) m_value = _mm_unpacklo_epi16(m_value, other.m_value); m_value = _mm_madd_epi16(m_value, *(__m128i *)&rgbsse_statics.scale_table[factor][0]); m_value = _mm_srli_epi32(m_value, 8); - m_value = _mm_packs_epi32(m_value, m_value); } void rgbint_t::scale_and_clamp(const rgbint_t& scale) @@ -205,7 +330,6 @@ void rgbint_t::scale_and_clamp(const rgbint_t& scale) m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128()); m_value = _mm_madd_epi16(m_value, mscale); m_value = _mm_srli_epi32(m_value, 8); - m_value = _mm_packs_epi32(m_value, m_value); m_value = _mm_min_epi16(m_value, *(__m128i *)&rgbsse_statics.maxbyte); } @@ -215,7 +339,6 @@ void rgbint_t::scale_imm_and_clamp(const INT16 scale) m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128()); m_value = _mm_madd_epi16(m_value, mscale); m_value = _mm_srli_epi32(m_value, 8); - m_value = _mm_packs_epi32(m_value, m_value); m_value = _mm_min_epi16(m_value, *(__m128i *)&rgbsse_statics.maxbyte); } @@ -225,7 +348,6 @@ void rgbint_t::scale_add_and_clamp(const rgbint_t& scale, const rgbint_t& other, m_value = _mm_unpacklo_epi16(m_value, other.m_value); m_value = _mm_madd_epi16(m_value, mscale); m_value = _mm_srli_epi32(m_value, 8); - m_value = _mm_packs_epi32(m_value, m_value); m_value = _mm_min_epi16(m_value, *(__m128i *)&rgbsse_statics.maxbyte); } @@ -236,7 +358,6 @@ void rgbint_t::scale_imm_add_and_clamp(const INT16 scale, const rgbint_t& other) m_value = _mm_unpacklo_epi16(m_value, other.m_value); m_value = _mm_madd_epi16(m_value, mscale); m_value = _mm_srli_epi32(m_value, 8); - m_value = _mm_packs_epi32(m_value, m_value); m_value = _mm_min_epi16(m_value, *(__m128i *)&rgbsse_statics.maxbyte); } @@ -247,7 +368,6 @@ void rgbint_t::scale_add_and_clamp(const rgbint_t& scale, const rgbint_t& other) m_value = _mm_unpacklo_epi16(m_value, other.m_value); m_value = _mm_madd_epi16(m_value, mscale); m_value = _mm_srli_epi32(m_value, 8); - m_value = _mm_packs_epi32(m_value, m_value); m_value = _mm_min_epi16(m_value, *(__m128i *)&rgbsse_statics.maxbyte); } diff --git a/src/emu/video/rgbsse.h b/src/emu/video/rgbsse.h index ebe10f0485c..5627d1d7fbb 100644 --- a/src/emu/video/rgbsse.h +++ b/src/emu/video/rgbsse.h @@ -19,54 +19,76 @@ TYPE DEFINITIONS ***************************************************************************/ -class rgbint_t : public rgbint_base_t +class rgbint_t { public: rgbint_t(); rgbint_t(UINT32 rgb); - rgbint_t(INT16 r, INT16 g, INT16 b); + rgbint_t(INT32 r, INT32 g, INT32 b); rgbint_t(rgb_t& rgb); - virtual void* get_ptr() { return &m_value; } + void set(void* value); + __m128i get(); + void set(__m128i value); + void set_rgb(UINT32 rgb); + void set_rgb(INT32 r, INT32 g, INT32 b); + void set_rgb(rgb_t& rgb); - virtual void set(__m128i value); - virtual void set_rgb(UINT32 rgb); - virtual void set_rgb(INT16 r, INT16 g, INT16 b); - virtual void set_rgb(rgb_t& rgb); + rgb_t to_rgb(); + rgb_t to_rgb_clamp(); - virtual rgb_t to_rgb(); - virtual rgb_t to_rgb_clamp(); - - virtual rgb_t to_rgba(); - virtual rgb_t to_rgba_clamp(); + rgb_t to_rgba(); + rgb_t to_rgba_clamp(); void add(const rgbint_t& color2); - virtual void add_imm(const INT16 imm); - virtual void add_imm_rgb(const INT16 imm_r, const INT16 imm_g, const INT16 imm_b); + void add_imm(const INT32 imm); + void add_imm_rgb(const INT32 r, const INT32 g, const INT32 b); - virtual void sub(const rgbint_t& color2); - virtual void sub_imm(const INT16 imm); - virtual void sub_imm_rgb(const INT16 imm_r, const INT16 imm_g, const INT16 imm_b); + void sub(const rgbint_t& color2); + void sub_imm(const INT32 imm); + void sub_imm_rgb(const INT32 r, const INT32 g, const INT32 b); void subr(rgbint_t& color); - virtual void subr_imm(const INT16 imm); - virtual void subr_imm_rgb(const INT16 imm_r, const INT16 imm_g, const INT16 imm_b); + void subr_imm(const INT32 imm); + void subr_imm_rgb(const INT32 r, const INT32 g, const INT32 b); + + void mul(rgbint_t& color); + void mul_imm(const INT32 imm); + void mul_imm_rgb(const INT32 r, const INT32 g, const INT32 b); void shl(const UINT8 shift); void shr(const UINT8 shift); + void sra(const UINT8 shift); - virtual void blend(const rgbint_t& other, UINT8 factor); + void blend(const rgbint_t& other, UINT8 factor); - virtual void scale_and_clamp(const rgbint_t& scale); - virtual void scale_imm_and_clamp(const INT16 scale); - virtual void scale_add_and_clamp(const rgbint_t& scale, const rgbint_t& other, const rgbint_t& scale2); - virtual void scale_add_and_clamp(const rgbint_t& scale, const rgbint_t& other); - virtual void scale_imm_add_and_clamp(const INT16 scale, const rgbint_t& other); + void scale_and_clamp(const rgbint_t& scale); + void scale_imm_and_clamp(const INT16 scale); + void scale_add_and_clamp(const rgbint_t& scale, const rgbint_t& other, const rgbint_t& scale2); + void scale_add_and_clamp(const rgbint_t& scale, const rgbint_t& other); + void scale_imm_add_and_clamp(const INT16 scale, const rgbint_t& other); + + void sign_extend(const INT32 compare, const INT32 sign); + + void print(); + + rgbint_t operator=(const rgbint_t& other); + rgbint_t& operator+=(const rgbint_t& other); + rgbint_t& operator-=(const rgbint_t& other); + rgbint_t& operator*=(const rgbint_t& other); + rgbint_t& operator*=(const INT32 other); + rgbint_t operator+(const rgbint_t& other); + rgbint_t operator-(const rgbint_t& other); + rgbint_t operator*(const rgbint_t& other); + rgbint_t operator*(const INT32 other); static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v); protected: - __m128i m_value; + volatile __m128i m_value; + +private: + rgbint_t(__m128i value); }; class rgbaint_t : public rgbint_t @@ -74,14 +96,15 @@ class rgbaint_t : public rgbint_t public: rgbaint_t(); rgbaint_t(UINT32 rgba); - rgbaint_t(INT16 a, INT16 r, INT16 g, INT16 b); + rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b); rgbaint_t(rgb_t& rgb); - virtual void set_rgba(INT16 a, INT16 r, INT16 g, INT16 b); + void set_rgba(INT32 a, INT32 r, INT32 g, INT32 b); - virtual void add_imm_rgba(const INT16 imm_a, const INT16 imm_r, const INT16 imm_g, const INT16 imm_b); - virtual void sub_imm_rgba(const INT16 imm_a, const INT16 imm_r, const INT16 imm_g, const INT16 imm_b); - virtual void subr_imm_rgba(const INT16 imm_a, const INT16 imm_r, const INT16 imm_g, const INT16 imm_b); + void add_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b); + void sub_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b); + void subr_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b); + void mul_imm_rgba(const INT32 a, const INT32 r, const INT32 g, const INT32 b); }; /*************************************************************************** diff --git a/src/emu/video/rgbutil.h b/src/emu/video/rgbutil.h index c40e3217512..15def4da2c0 100644 --- a/src/emu/video/rgbutil.h +++ b/src/emu/video/rgbutil.h @@ -12,37 +12,6 @@ #ifndef __RGBUTIL__ #define __RGBUTIL__ -class rgbint_base_t -{ -public: - rgbint_base_t() { } - rgbint_base_t(UINT32 rgb) { } - rgbint_base_t(INT16 r, INT16 g, INT16 b) { } - rgbint_base_t(rgb_t& rgb) { } - - virtual void set_rgb(UINT32 rgb) = 0; - virtual void set_rgb(INT16 r, INT16 g, INT16 b) = 0; - virtual void set_rgb(rgb_t& rgb) = 0; - - virtual rgb_t to_rgb() = 0; - virtual rgb_t to_rgb_clamp() = 0; - - virtual rgb_t to_rgba() = 0; - virtual rgb_t to_rgba_clamp() = 0; - - virtual void add_imm(const INT16 imm) = 0; - virtual void add_imm_rgb(const INT16 imm_r, const INT16 imm_g, const INT16 imm_b) = 0; - - virtual void sub_imm(const INT16 imm) = 0; - virtual void sub_imm_rgb(const INT16 imm_r, const INT16 imm_g, const INT16 imm_b) = 0; - - virtual void subr_imm(const INT16 imm) = 0; - virtual void subr_imm_rgb(const INT16 imm_r, const INT16 imm_g, const INT16 imm_b) = 0; - - virtual void shl(const UINT8 shift) = 0; - virtual void shr(const UINT8 shift) = 0; -}; - /* use SSE on 64-bit implementations, where it can be assumed */ #if (!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64) #include "rgbsse.h" diff --git a/src/emu/video/vooddefs.h b/src/emu/video/vooddefs.h index 16121999a83..369eabc4572 100644 --- a/src/emu/video/vooddefs.h +++ b/src/emu/video/vooddefs.h @@ -2208,16 +2208,16 @@ INLINE UINT32 clampARGB(INT32 iterr, INT32 iterg, INT32 iterb, INT32 itera, UINT { //r &= 0xfff; __m128i temp = _mm_set1_epi16(0xfff); - colorint.set(_mm_and_si128(*(__m128i *)colorint.get_ptr(), *(__m128i *)&temp)); + colorint.set(_mm_and_si128(colorint.get(), temp)); //if (r == 0xfff) - temp = _mm_cmpeq_epi16(*(__m128i *)colorint.get_ptr(), *(__m128i *)&temp); + temp = _mm_cmpeq_epi16(colorint.get(), temp); // result.rgb.r = 0; - colorint.set(_mm_andnot_si128(*(__m128i *)&temp, *(__m128i *)colorint.get_ptr())); + colorint.set(_mm_andnot_si128(temp, colorint.get())); //else if (r == 0x100) temp = _mm_set1_epi16(0x100); - temp = _mm_cmpeq_epi16(*(__m128i *)colorint.get_ptr(), *(__m128i *)&temp); + temp = _mm_cmpeq_epi16(colorint.get(), temp); // result.rgb.r = 0xff; - colorint.set(_mm_or_si128(*(__m128i *)colorint.get_ptr(), *(__m128i *)&temp)); + colorint.set(_mm_or_si128(colorint.get(), temp)); result = colorint.to_rgba(); } diff --git a/src/mame/video/n64.c b/src/mame/video/n64.c index 032c172fcde..7dec4fef608 100644 --- a/src/mame/video/n64.c +++ b/src/mame/video/n64.c @@ -26,6 +26,9 @@ TODO: #include "emu.h" #include "video/n64.h" +#include "video/rdpblend.h" +#include "video/rdptpipe.h" + #include #define LOG_RDP_EXECUTION 0 diff --git a/src/mame/video/n64.h b/src/mame/video/n64.h index b562db0c350..88e62cca792 100644 --- a/src/mame/video/n64.h +++ b/src/mame/video/n64.h @@ -6,8 +6,6 @@ #include "emu.h" #include "includes/n64.h" #include "video/poly.h" -#include "video/rdpblend.h" -#include "video/rdptpipe.h" /*****************************************************************************/ @@ -119,8 +117,6 @@ #define SPAN_W (6) #define SPAN_Z (7) -#define RDP_CVG_SPAN_MAX (1024) - #define EXTENT_AUX_COUNT (sizeof(rdp_span_aux)*(480*192)) // Screen coverage *192, more or less /*****************************************************************************/ @@ -128,299 +124,9 @@ class n64_periphs; class n64_rdp; -struct misc_state_t; -struct other_modes_t; -struct combine_mdoes_t; -struct color_inputs_t; -struct span_base_t; -struct rectangle_t; - -class color_t -{ - public: - color_t() - { - c = 0; - } - - union - { - UINT32 c; -#ifdef LSB_FIRST - struct { UINT8 a, b, g, r; } i; -#else - struct { UINT8 r, g, b, a; } i; -#endif - }; -}; - -enum -{ - BIT_DEPTH_32 = 0, - BIT_DEPTH_16, - - BIT_DEPTH_COUNT -}; - -class SpanParam -{ - public: - union - { - UINT32 w; -#ifdef LSB_FIRST - struct { UINT16 l; INT16 h; } h; -#else - struct { INT16 h; UINT16 l; } h; -#endif - }; -}; - -struct n64_tile_t -{ - INT32 format; // Image data format: RGBA, YUV, CI, IA, I - INT32 size; // Size of texel element: 4b, 8b, 16b, 32b - INT32 line; // Size of tile line in bytes - INT32 tmem; // Starting tmem address for this tile in bytes - INT32 palette; // Palette number for 4b CI texels - INT32 ct, mt, cs, ms; // Clamp / mirror enable bits for S / T direction - INT32 mask_t, shift_t, mask_s, shift_s; // Mask values / LOD shifts - INT32 lshift_s, rshift_s, lshift_t, rshift_t; - INT32 wrapped_mask_s, wrapped_mask_t; - bool clamp_s, clamp_t; - UINT16 sl, tl, sh, th; // 10.2 fixed-point, starting and ending texel row / column - INT32 num; -}; - -struct span_base_t -{ - INT32 m_span_dr; - INT32 m_span_dg; - INT32 m_span_db; - INT32 m_span_da; - INT32 m_span_ds; - INT32 m_span_dt; - INT32 m_span_dw; - INT32 m_span_dz; - INT32 m_span_dymax; - INT32 m_span_dzpix; - INT32 m_span_drdy; - INT32 m_span_dgdy; - INT32 m_span_dbdy; - INT32 m_span_dady; - INT32 m_span_dzdy; -}; - -struct misc_state_t -{ - misc_state_t() - { - m_max_level = 0; - m_min_level = 0; - } - - INT32 m_fb_format; // Framebuffer pixel format index (0 - I, 1 - IA, 2 - CI, 3 - RGBA) - INT32 m_fb_size; // Framebuffer pixel size index (0 - 4bpp, 1 - 8bpp, 2 - 16bpp, 3 - 32bpp) - INT32 m_fb_width; // Framebuffer width, in pixels - INT32 m_fb_height; // Framebuffer height, in pixels - UINT32 m_fb_address; // Framebuffer source address offset (in bytes) from start of RDRAM - - UINT32 m_zb_address; // Z-buffer source address offset (in bytes) from start of RDRAM - - INT32 m_ti_format; // Format for Texture Interface (TI) transfers - INT32 m_ti_size; // Size (in bytes) of TI transfers - INT32 m_ti_width; // Width (in pixels) of TI transfers - UINT32 m_ti_address; // Destination address for TI transfers - - UINT8 m_random_seed; // %HACK%, adds 19 each time it's read and is more or less random - - UINT32 m_max_level; // Maximum LOD level for texture filtering - UINT32 m_min_level; // Minimum LOD level for texture filtering - - UINT16 m_primitive_z; // Forced Z value for current primitive, if applicable - UINT16 m_primitive_dz; // Forced Delta-Z value for current primitive, if applicable -}; - -struct combine_modes_t -{ - INT32 sub_a_rgb0; - INT32 sub_b_rgb0; - INT32 mul_rgb0; - INT32 add_rgb0; - INT32 sub_a_a0; - INT32 sub_b_a0; - INT32 mul_a0; - INT32 add_a0; - - INT32 sub_a_rgb1; - INT32 sub_b_rgb1; - INT32 mul_rgb1; - INT32 add_rgb1; - INT32 sub_a_a1; - INT32 sub_b_a1; - INT32 mul_a1; - INT32 add_a1; -}; - -struct other_modes_t -{ - INT32 cycle_type; - bool persp_tex_en; - bool detail_tex_en; - bool sharpen_tex_en; - bool tex_lod_en; - bool en_tlut; - bool tlut_type; - bool sample_type; - bool mid_texel; - bool bi_lerp0; - bool bi_lerp1; - bool convert_one; - bool key_en; - INT32 rgb_dither_sel; - INT32 alpha_dither_sel; - INT32 blend_m1a_0; - INT32 blend_m1a_1; - INT32 blend_m1b_0; - INT32 blend_m1b_1; - INT32 blend_m2a_0; - INT32 blend_m2a_1; - INT32 blend_m2b_0; - INT32 blend_m2b_1; - INT32 tex_edge; - INT32 force_blend; - INT32 blend_shift; - bool alpha_cvg_select; - bool cvg_times_alpha; - INT32 z_mode; - INT32 cvg_dest; - bool color_on_cvg; - UINT8 image_read_en; - bool z_update_en; - bool z_compare_en; - bool antialias_en; - bool z_source_sel; - INT32 dither_alpha_en; - INT32 alpha_compare_en; - INT32 alpha_dither_mode; -}; - -struct color_inputs_t -{ - // combiner inputs - UINT8* combiner_rgbsub_a_r[2]; - UINT8* combiner_rgbsub_a_g[2]; - UINT8* combiner_rgbsub_a_b[2]; - UINT8* combiner_rgbsub_b_r[2]; - UINT8* combiner_rgbsub_b_g[2]; - UINT8* combiner_rgbsub_b_b[2]; - UINT8* combiner_rgbmul_r[2]; - UINT8* combiner_rgbmul_g[2]; - UINT8* combiner_rgbmul_b[2]; - UINT8* combiner_rgbadd_r[2]; - UINT8* combiner_rgbadd_g[2]; - UINT8* combiner_rgbadd_b[2]; - - UINT8* combiner_alphasub_a[2]; - UINT8* combiner_alphasub_b[2]; - UINT8* combiner_alphamul[2]; - UINT8* combiner_alphaadd[2]; - - // blender input - color_t* blender1a_rgb[2]; - UINT8* blender1b_a[2]; - color_t* blender2a_rgb[2]; - UINT8* blender2b_a[2]; -}; - -// This is enormous and horrible -struct rdp_span_aux -{ - UINT32 m_unscissored_rx; - UINT16 m_cvg[RDP_CVG_SPAN_MAX]; - color_t m_memory_color; - color_t m_pixel_color; - color_t m_inv_pixel_color; - color_t m_blended_pixel_color; - - color_t m_combined_color; - color_t m_combined_alpha; - color_t m_texel0_color; - color_t m_texel0_alpha; - color_t m_texel1_color; - color_t m_texel1_alpha; - color_t m_next_texel_color; - color_t m_next_texel_alpha; - color_t m_blend_color; /* constant blend color */ - color_t m_blend_alpha; /* constant blend alpha */ - color_t m_prim_color; /* flat primitive color */ - color_t m_prim_alpha; /* flat primitive alpha */ - color_t m_env_color; /* generic color constant ('environment') */ - color_t m_env_alpha; /* generic color constant ('environment') */ - color_t m_fog_color; /* generic color constant ('fog') */ - color_t m_fog_alpha; /* generic color constant ('fog') */ - color_t m_shade_color; /* gouraud-shaded color */ - color_t m_shade_alpha; /* gouraud-shaded color */ - color_t m_key_scale; /* color-keying constant */ - color_t m_noise_color; /* noise */ - color_t m_lod_fraction; /* Z-based LOD fraction for this poly */ - color_t m_prim_lod_fraction; /* fixed LOD fraction for this poly */ - color_t m_k4; - color_t m_k5; - color_inputs_t m_color_inputs; - UINT32 m_current_pix_cvg; - UINT32 m_current_mem_cvg; - UINT32 m_current_cvg_bit; - INT32 m_shift_a; - INT32 m_shift_b; - INT32 m_precomp_s; - INT32 m_precomp_t; - INT32 m_blend_enable; - bool m_pre_wrap; - INT32 m_dzpix_enc; - UINT8* m_tmem; /* pointer to texture cache for this polygon */ - bool m_start_span; - INT32 m_clamp_s_diff[8]; - INT32 m_clamp_t_diff[8]; -}; - -struct rectangle_t -{ - UINT16 m_xl; // 10.2 fixed-point - UINT16 m_yl; // 10.2 fixed-point - UINT16 m_xh; // 10.2 fixed-point - UINT16 m_yh; // 10.2 fixed-point -}; - -struct z_decompress_entry_t -{ - UINT32 shift; - UINT32 add; -}; - -struct cv_mask_derivative_t -{ - UINT8 cvg; - UINT8 cvbit; - UINT8 xoff; - UINT8 yoff; -}; - -struct rdp_poly_state -{ - n64_rdp* m_rdp; /* pointer back to the RDP state */ - - misc_state_t m_misc_state; /* miscellaneous rasterizer bits */ - other_modes_t m_other_modes; /* miscellaneous rasterizer bits (2) */ - span_base_t m_span_base; /* span initial values for triangle rasterization */ - rectangle_t m_scissor; /* screen-space scissor bounds */ - UINT32 m_fill_color; /* poly fill color */ - n64_tile_t m_tiles[8]; /* texture tile state */ - UINT8 m_tmem[0x1000]; /* texture cache */ - INT32 tilenum; /* texture tile index */ - bool flip; /* left-major / right-major flip */ - bool rect; /* primitive is rectangle (vs. triangle) */ -}; +#include "video/n64types.h" +#include "video/rdpblend.h" +#include "video/rdptpipe.h" typedef void (*rdp_command_t)(UINT32 w1, UINT32 w2); diff --git a/src/mame/video/n64types.h b/src/mame/video/n64types.h new file mode 100644 index 00000000000..2fc066eb0e6 --- /dev/null +++ b/src/mame/video/n64types.h @@ -0,0 +1,327 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz + +#ifndef _VIDEO_N64TYPES_H_ +#define _VIDEO_N64TYPES_H_ + +struct misc_state_t +{ + misc_state_t() + { + m_max_level = 0; + m_min_level = 0; + } + + INT32 m_fb_format; // Framebuffer pixel format index (0 - I, 1 - IA, 2 - CI, 3 - RGBA) + INT32 m_fb_size; // Framebuffer pixel size index (0 - 4bpp, 1 - 8bpp, 2 - 16bpp, 3 - 32bpp) + INT32 m_fb_width; // Framebuffer width, in pixels + INT32 m_fb_height; // Framebuffer height, in pixels + UINT32 m_fb_address; // Framebuffer source address offset (in bytes) from start of RDRAM + + UINT32 m_zb_address; // Z-buffer source address offset (in bytes) from start of RDRAM + + INT32 m_ti_format; // Format for Texture Interface (TI) transfers + INT32 m_ti_size; // Size (in bytes) of TI transfers + INT32 m_ti_width; // Width (in pixels) of TI transfers + UINT32 m_ti_address; // Destination address for TI transfers + + UINT8 m_random_seed; // %HACK%, adds 19 each time it's read and is more or less random + + UINT32 m_max_level; // Maximum LOD level for texture filtering + UINT32 m_min_level; // Minimum LOD level for texture filtering + + UINT16 m_primitive_z; // Forced Z value for current primitive, if applicable + UINT16 m_primitive_dz; // Forced Delta-Z value for current primitive, if applicable +}; + +class color_t +{ + public: + color_t() + { + c = 0; + } + + color_t(UINT32 color) + { + set(color); + } + + color_t(UINT8 a, UINT8 r, UINT8 g, UINT8 b) + { + set(a, r, g, b); + } + + void set(UINT32 color) + { + i.a = (color >> 24) & 0xff; + i.r = (color >> 16) & 0xff; + i.g = (color >> 8) & 0xff; + i.b = color & 0xff; + } + + void set(UINT8 a, UINT8 r, UINT8 g, UINT8 b) + { + i.a = a; + i.r = r; + i.g = g; + i.b = b; + } + + UINT32 get() + { + return i.a << 24 | i.r << 16 | i.g << 8 | i.b; + } + + union + { + UINT32 c; +#ifdef LSB_FIRST + struct { UINT8 a, b, g, r; } i; +#else + struct { UINT8 r, g, b, a; } i; +#endif + }; +}; + +enum +{ + BIT_DEPTH_32 = 0, + BIT_DEPTH_16, + + BIT_DEPTH_COUNT +}; + +struct n64_tile_t +{ + INT32 format; // Image data format: RGBA, YUV, CI, IA, I + INT32 size; // Size of texel element: 4b, 8b, 16b, 32b + INT32 line; // Size of tile line in bytes + INT32 tmem; // Starting tmem address for this tile in bytes + INT32 palette; // Palette number for 4b CI texels + INT32 ct, mt, cs, ms; // Clamp / mirror enable bits for S / T direction + INT32 mask_t, shift_t, mask_s, shift_s; // Mask values / LOD shifts + INT32 lshift_s, rshift_s, lshift_t, rshift_t; + INT32 wrapped_mask_s, wrapped_mask_t; + bool clamp_s, clamp_t; + UINT16 sl, tl, sh, th; // 10.2 fixed-point, starting and ending texel row / column + INT32 num; +}; + +struct span_base_t +{ + INT32 m_span_dr; + INT32 m_span_dg; + INT32 m_span_db; + INT32 m_span_da; + INT32 m_span_ds; + INT32 m_span_dt; + INT32 m_span_dw; + INT32 m_span_dz; + INT32 m_span_dymax; + INT32 m_span_dzpix; + INT32 m_span_drdy; + INT32 m_span_dgdy; + INT32 m_span_dbdy; + INT32 m_span_dady; + INT32 m_span_dzdy; +}; + +struct combine_modes_t +{ + INT32 sub_a_rgb0; + INT32 sub_b_rgb0; + INT32 mul_rgb0; + INT32 add_rgb0; + INT32 sub_a_a0; + INT32 sub_b_a0; + INT32 mul_a0; + INT32 add_a0; + + INT32 sub_a_rgb1; + INT32 sub_b_rgb1; + INT32 mul_rgb1; + INT32 add_rgb1; + INT32 sub_a_a1; + INT32 sub_b_a1; + INT32 mul_a1; + INT32 add_a1; +}; + +struct color_inputs_t +{ + // combiner inputs + UINT8* combiner_rgbsub_a_r[2]; + UINT8* combiner_rgbsub_a_g[2]; + UINT8* combiner_rgbsub_a_b[2]; + UINT8* combiner_rgbsub_b_r[2]; + UINT8* combiner_rgbsub_b_g[2]; + UINT8* combiner_rgbsub_b_b[2]; + UINT8* combiner_rgbmul_r[2]; + UINT8* combiner_rgbmul_g[2]; + UINT8* combiner_rgbmul_b[2]; + UINT8* combiner_rgbadd_r[2]; + UINT8* combiner_rgbadd_g[2]; + UINT8* combiner_rgbadd_b[2]; + + UINT8* combiner_alphasub_a[2]; + UINT8* combiner_alphasub_b[2]; + UINT8* combiner_alphamul[2]; + UINT8* combiner_alphaadd[2]; + + // blender input + color_t* blender1a_rgb[2]; + UINT8* blender1b_a[2]; + color_t* blender2a_rgb[2]; + UINT8* blender2b_a[2]; +}; + +struct other_modes_t +{ + INT32 cycle_type; + bool persp_tex_en; + bool detail_tex_en; + bool sharpen_tex_en; + bool tex_lod_en; + bool en_tlut; + bool tlut_type; + bool sample_type; + bool mid_texel; + bool bi_lerp0; + bool bi_lerp1; + bool convert_one; + bool key_en; + INT32 rgb_dither_sel; + INT32 alpha_dither_sel; + INT32 blend_m1a_0; + INT32 blend_m1a_1; + INT32 blend_m1b_0; + INT32 blend_m1b_1; + INT32 blend_m2a_0; + INT32 blend_m2a_1; + INT32 blend_m2b_0; + INT32 blend_m2b_1; + INT32 tex_edge; + INT32 force_blend; + INT32 blend_shift; + bool alpha_cvg_select; + bool cvg_times_alpha; + INT32 z_mode; + INT32 cvg_dest; + bool color_on_cvg; + UINT8 image_read_en; + bool z_update_en; + bool z_compare_en; + bool antialias_en; + bool z_source_sel; + INT32 dither_alpha_en; + INT32 alpha_compare_en; + INT32 alpha_dither_mode; +}; + +struct rectangle_t +{ + UINT16 m_xl; // 10.2 fixed-point + UINT16 m_yl; // 10.2 fixed-point + UINT16 m_xh; // 10.2 fixed-point + UINT16 m_yh; // 10.2 fixed-point +}; + +struct rdp_poly_state +{ + n64_rdp* m_rdp; /* pointer back to the RDP state */ + + misc_state_t m_misc_state; /* miscellaneous rasterizer bits */ + other_modes_t m_other_modes; /* miscellaneous rasterizer bits (2) */ + span_base_t m_span_base; /* span initial values for triangle rasterization */ + rectangle_t m_scissor; /* screen-space scissor bounds */ + UINT32 m_fill_color; /* poly fill color */ + n64_tile_t m_tiles[8]; /* texture tile state */ + UINT8 m_tmem[0x1000]; /* texture cache */ + INT32 tilenum; /* texture tile index */ + bool flip; /* left-major / right-major flip */ + bool rect; /* primitive is rectangle (vs. triangle) */ +}; + +#define RDP_CVG_SPAN_MAX (1024) + +// This is enormous and horrible +struct rdp_span_aux +{ + UINT32 m_unscissored_rx; + UINT16 m_cvg[RDP_CVG_SPAN_MAX]; + color_t m_memory_color; + color_t m_pixel_color; + color_t m_inv_pixel_color; + color_t m_blended_pixel_color; + + color_t m_combined_color; + color_t m_combined_alpha; + color_t m_texel0_color; + color_t m_texel0_alpha; + color_t m_texel1_color; + color_t m_texel1_alpha; + color_t m_next_texel_color; + color_t m_next_texel_alpha; + color_t m_blend_color; /* constant blend color */ + color_t m_blend_alpha; /* constant blend alpha */ + color_t m_prim_color; /* flat primitive color */ + color_t m_prim_alpha; /* flat primitive alpha */ + color_t m_env_color; /* generic color constant ('environment') */ + color_t m_env_alpha; /* generic color constant ('environment') */ + color_t m_fog_color; /* generic color constant ('fog') */ + color_t m_fog_alpha; /* generic color constant ('fog') */ + color_t m_shade_color; /* gouraud-shaded color */ + color_t m_shade_alpha; /* gouraud-shaded color */ + color_t m_key_scale; /* color-keying constant */ + color_t m_noise_color; /* noise */ + color_t m_lod_fraction; /* Z-based LOD fraction for this poly */ + color_t m_prim_lod_fraction; /* fixed LOD fraction for this poly */ + color_t m_k4; + color_t m_k5; + color_inputs_t m_color_inputs; + UINT32 m_current_pix_cvg; + UINT32 m_current_mem_cvg; + UINT32 m_current_cvg_bit; + INT32 m_shift_a; + INT32 m_shift_b; + INT32 m_precomp_s; + INT32 m_precomp_t; + INT32 m_blend_enable; + bool m_pre_wrap; + INT32 m_dzpix_enc; + UINT8* m_tmem; /* pointer to texture cache for this polygon */ + bool m_start_span; + INT32 m_clamp_s_diff[8]; + INT32 m_clamp_t_diff[8]; +}; + +struct z_decompress_entry_t +{ + UINT32 shift; + UINT32 add; +}; + +struct cv_mask_derivative_t +{ + UINT8 cvg; + UINT8 cvbit; + UINT8 xoff; + UINT8 yoff; +}; + +class span_param_t +{ + public: + union + { + UINT32 w; +#ifdef LSB_FIRST + struct { UINT16 l; INT16 h; } h; +#else + struct { INT16 h; UINT16 l; } h; +#endif + }; +}; + +#endif // _VIDEO_N64TYPES_H_ \ No newline at end of file diff --git a/src/mame/video/rdpblend.h b/src/mame/video/rdpblend.h index 89d97e7a5c3..27c5988e98f 100644 --- a/src/mame/video/rdpblend.h +++ b/src/mame/video/rdpblend.h @@ -17,13 +17,7 @@ #define _VIDEO_RDPBLEND_H_ #include "emu.h" - -struct other_modes_t; -struct misc_state_t; -class n64_rdp; -struct rdp_span_aux; -class color_t; -struct rdp_poly_state; +#include "video/n64.h" class n64_blender_t { diff --git a/src/mame/video/rdpspn16.c b/src/mame/video/rdpspn16.c index d36e14f7733..3022928573b 100644 --- a/src/mame/video/rdpspn16.c +++ b/src/mame/video/rdpspn16.c @@ -16,6 +16,7 @@ #include "emu.h" #include "includes/n64.h" #include "video/n64.h" +#include "video/rgbutil.h" void n64_rdp::render_spans(INT32 start, INT32 end, INT32 tilenum, bool flip, extent_t* spans, bool rect, rdp_poly_state* object) { @@ -321,14 +322,14 @@ void n64_rdp::span_draw_1cycle(INT32 scanline, const extent_t &extent, const rdp const INT32 tilenum = object.tilenum; const bool flip = object.flip; - SpanParam r; r.w = extent.param[SPAN_R].start; - SpanParam g; g.w = extent.param[SPAN_G].start; - SpanParam b; b.w = extent.param[SPAN_B].start; - SpanParam a; a.w = extent.param[SPAN_A].start; - SpanParam z; z.w = extent.param[SPAN_Z].start; - SpanParam s; s.w = extent.param[SPAN_S].start; - SpanParam t; t.w = extent.param[SPAN_T].start; - SpanParam w; w.w = extent.param[SPAN_W].start; + span_param_t r; r.w = extent.param[SPAN_R].start; + span_param_t g; g.w = extent.param[SPAN_G].start; + span_param_t b; b.w = extent.param[SPAN_B].start; + span_param_t a; a.w = extent.param[SPAN_A].start; + span_param_t z; z.w = extent.param[SPAN_Z].start; + span_param_t s; s.w = extent.param[SPAN_S].start; + span_param_t t; t.w = extent.param[SPAN_T].start; + span_param_t w; w.w = extent.param[SPAN_W].start; const UINT32 zb = object.m_misc_state.m_zb_address >> 1; const UINT32 zhb = object.m_misc_state.m_zb_address; @@ -440,10 +441,32 @@ void n64_rdp::span_draw_1cycle(INT32 scanline, const extent_t &extent, const rdp userdata->m_noise_color.i.r = userdata->m_noise_color.i.g = userdata->m_noise_color.i.b = rand() << 3; // Not accurate - userdata->m_pixel_color.i.r = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_r[1],*userdata->m_color_inputs.combiner_rgbsub_b_r[1],*userdata->m_color_inputs.combiner_rgbmul_r[1],*userdata->m_color_inputs.combiner_rgbadd_r[1]); - userdata->m_pixel_color.i.g = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_g[1],*userdata->m_color_inputs.combiner_rgbsub_b_g[1],*userdata->m_color_inputs.combiner_rgbmul_g[1],*userdata->m_color_inputs.combiner_rgbadd_g[1]); - userdata->m_pixel_color.i.b = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_b[1],*userdata->m_color_inputs.combiner_rgbsub_b_b[1],*userdata->m_color_inputs.combiner_rgbmul_b[1],*userdata->m_color_inputs.combiner_rgbadd_b[1]); - userdata->m_pixel_color.i.a = alpha_combiner_equation(*userdata->m_color_inputs.combiner_alphasub_a[1],*userdata->m_color_inputs.combiner_alphasub_b[1],*userdata->m_color_inputs.combiner_alphamul[1],*userdata->m_color_inputs.combiner_alphaadd[1]); + rgbaint_t rgbsub_a(*userdata->m_color_inputs.combiner_alphasub_a[1], *userdata->m_color_inputs.combiner_rgbsub_a_r[1], *userdata->m_color_inputs.combiner_rgbsub_a_g[1], *userdata->m_color_inputs.combiner_rgbsub_a_b[1]); + rgbaint_t rgbsub_b(*userdata->m_color_inputs.combiner_alphasub_b[1], *userdata->m_color_inputs.combiner_rgbsub_b_r[1], *userdata->m_color_inputs.combiner_rgbsub_b_g[1], *userdata->m_color_inputs.combiner_rgbsub_b_b[1]); + rgbaint_t rgbmul(*userdata->m_color_inputs.combiner_alphamul[1], *userdata->m_color_inputs.combiner_rgbmul_r[1], *userdata->m_color_inputs.combiner_rgbmul_g[1], *userdata->m_color_inputs.combiner_rgbmul_b[1]); + rgbaint_t rgbadd(*userdata->m_color_inputs.combiner_alphaadd[1], *userdata->m_color_inputs.combiner_rgbadd_r[1], *userdata->m_color_inputs.combiner_rgbadd_g[1], *userdata->m_color_inputs.combiner_rgbadd_b[1]); + + rgbsub_a.sign_extend(0x180, 0xfe00); + rgbsub_b.sign_extend(0x180, 0xfe00); + rgbadd.sign_extend(0x180, 0xfe00); + + rgbadd.shl(8); + rgbsub_a.sub(rgbsub_b); + rgbsub_a.add(rgbadd); + rgbsub_a.add_imm(0x0080); + rgbsub_a.shr(8); + rgbsub_a.sign_extend(0x100, 0xff00); + + const UINT32 unclamped = rgbsub_a.to_rgba(); + userdata->m_pixel_color.i.a = s_special_9bit_clamptable[(unclamped >> 24) & 0xff]; + userdata->m_pixel_color.i.r = s_special_9bit_clamptable[(unclamped >> 16) & 0xff]; + userdata->m_pixel_color.i.g = s_special_9bit_clamptable[(unclamped >> 8) & 0xff]; + userdata->m_pixel_color.i.b = s_special_9bit_clamptable[unclamped & 0xff]; + + //userdata->m_pixel_color.i.r = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_r[1],*userdata->m_color_inputs.combiner_rgbsub_b_r[1],*userdata->m_color_inputs.combiner_rgbmul_r[1],*userdata->m_color_inputs.combiner_rgbadd_r[1]); + //userdata->m_pixel_color.i.g = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_g[1],*userdata->m_color_inputs.combiner_rgbsub_b_g[1],*userdata->m_color_inputs.combiner_rgbmul_g[1],*userdata->m_color_inputs.combiner_rgbadd_g[1]); + //userdata->m_pixel_color.i.b = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_b[1],*userdata->m_color_inputs.combiner_rgbsub_b_b[1],*userdata->m_color_inputs.combiner_rgbmul_b[1],*userdata->m_color_inputs.combiner_rgbadd_b[1]); + //userdata->m_pixel_color.i.a = alpha_combiner_equation(*userdata->m_color_inputs.combiner_alphasub_a[1],*userdata->m_color_inputs.combiner_alphasub_b[1],*userdata->m_color_inputs.combiner_alphamul[1],*userdata->m_color_inputs.combiner_alphaadd[1]); //Alpha coverage combiner get_alpha_cvg(&userdata->m_pixel_color.i.a, userdata, object); @@ -502,14 +525,14 @@ void n64_rdp::span_draw_2cycle(INT32 scanline, const extent_t &extent, const rdp const INT32 tilenum = object.tilenum; const bool flip = object.flip; - SpanParam r; r.w = extent.param[SPAN_R].start; - SpanParam g; g.w = extent.param[SPAN_G].start; - SpanParam b; b.w = extent.param[SPAN_B].start; - SpanParam a; a.w = extent.param[SPAN_A].start; - SpanParam z; z.w = extent.param[SPAN_Z].start; - SpanParam s; s.w = extent.param[SPAN_S].start; - SpanParam t; t.w = extent.param[SPAN_T].start; - SpanParam w; w.w = extent.param[SPAN_W].start; + span_param_t r; r.w = extent.param[SPAN_R].start; + span_param_t g; g.w = extent.param[SPAN_G].start; + span_param_t b; b.w = extent.param[SPAN_B].start; + span_param_t a; a.w = extent.param[SPAN_A].start; + span_param_t z; z.w = extent.param[SPAN_Z].start; + span_param_t s; s.w = extent.param[SPAN_S].start; + span_param_t t; t.w = extent.param[SPAN_T].start; + span_param_t w; w.w = extent.param[SPAN_W].start; const UINT32 zb = object.m_misc_state.m_zb_address >> 1; const UINT32 zhb = object.m_misc_state.m_zb_address; @@ -645,42 +668,63 @@ void n64_rdp::span_draw_2cycle(INT32 scanline, const extent_t &extent, const rdp ((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle1]))(&userdata->m_next_texel_color, &userdata->m_next_texel_color, sss, sst, tile2, 1, userdata, object); userdata->m_noise_color.i.r = userdata->m_noise_color.i.g = userdata->m_noise_color.i.b = rand() << 3; // Not accurate - userdata->m_combined_color.i.r = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_r[0], - *userdata->m_color_inputs.combiner_rgbsub_b_r[0], - *userdata->m_color_inputs.combiner_rgbmul_r[0], - *userdata->m_color_inputs.combiner_rgbadd_r[0]); - userdata->m_combined_color.i.g = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_g[0], - *userdata->m_color_inputs.combiner_rgbsub_b_g[0], - *userdata->m_color_inputs.combiner_rgbmul_g[0], - *userdata->m_color_inputs.combiner_rgbadd_g[0]); - userdata->m_combined_color.i.b = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_b[0], - *userdata->m_color_inputs.combiner_rgbsub_b_b[0], - *userdata->m_color_inputs.combiner_rgbmul_b[0], - *userdata->m_color_inputs.combiner_rgbadd_b[0]); - userdata->m_combined_color.i.a = alpha_combiner_equation(*userdata->m_color_inputs.combiner_alphasub_a[0], - *userdata->m_color_inputs.combiner_alphasub_b[0], - *userdata->m_color_inputs.combiner_alphamul[0], - *userdata->m_color_inputs.combiner_alphaadd[0]); - userdata->m_texel0_color = userdata->m_texel1_color; + /*rgbaint_t rgbsub_a(*userdata->m_color_inputs.combiner_alphasub_a[0], *userdata->m_color_inputs.combiner_rgbsub_a_r[0], *userdata->m_color_inputs.combiner_rgbsub_a_g[0], *userdata->m_color_inputs.combiner_rgbsub_a_b[0]); + rgbaint_t rgbsub_b(*userdata->m_color_inputs.combiner_alphasub_b[0], *userdata->m_color_inputs.combiner_rgbsub_b_r[0], *userdata->m_color_inputs.combiner_rgbsub_b_g[0], *userdata->m_color_inputs.combiner_rgbsub_b_b[0]); + rgbaint_t rgbmul(*userdata->m_color_inputs.combiner_alphamul[0], *userdata->m_color_inputs.combiner_rgbmul_r[0], *userdata->m_color_inputs.combiner_rgbmul_g[0], *userdata->m_color_inputs.combiner_rgbmul_b[0]); + rgbaint_t rgbadd(*userdata->m_color_inputs.combiner_alphaadd[0], *userdata->m_color_inputs.combiner_rgbadd_r[0], *userdata->m_color_inputs.combiner_rgbadd_g[0], *userdata->m_color_inputs.combiner_rgbadd_b[0]); + + rgbsub_a.sign_extend(0x180, 0xfe00); + rgbsub_b.sign_extend(0x180, 0xfe00); + rgbadd.sign_extend(0x180, 0xfe00); + + rgbadd.shl(8); + rgbsub_a.sub(rgbsub_b); + rgbsub_a.add(rgbadd); + rgbsub_a.add_imm(0x0080); + rgbsub_a.shr(8); + rgbsub_a.sign_extend(0x100, 0xff00); + + const UINT32 unclamped0 = rgbsub_a.to_rgba(); + userdata->m_combined_color.i.a = s_special_9bit_clamptable[(unclamped0 >> 24) & 0xff]; + userdata->m_combined_color.i.r = s_special_9bit_clamptable[(unclamped0 >> 16) & 0xff]; + userdata->m_combined_color.i.g = s_special_9bit_clamptable[(unclamped0 >> 8) & 0xff]; + userdata->m_combined_color.i.b = s_special_9bit_clamptable[unclamped0 & 0xff];*/ + + userdata->m_combined_color.i.r = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_r[0], *userdata->m_color_inputs.combiner_rgbsub_b_r[0], *userdata->m_color_inputs.combiner_rgbmul_r[0], *userdata->m_color_inputs.combiner_rgbadd_r[0]); + userdata->m_combined_color.i.g = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_g[0], *userdata->m_color_inputs.combiner_rgbsub_b_g[0], *userdata->m_color_inputs.combiner_rgbmul_g[0], *userdata->m_color_inputs.combiner_rgbadd_g[0]); + userdata->m_combined_color.i.b = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_b[0], *userdata->m_color_inputs.combiner_rgbsub_b_b[0], *userdata->m_color_inputs.combiner_rgbmul_b[0], *userdata->m_color_inputs.combiner_rgbadd_b[0]); + userdata->m_combined_color.i.a = alpha_combiner_equation(*userdata->m_color_inputs.combiner_alphasub_a[0], *userdata->m_color_inputs.combiner_alphasub_b[0], *userdata->m_color_inputs.combiner_alphamul[0], *userdata->m_color_inputs.combiner_alphaadd[0]); + + /*userdata->m_texel0_color = userdata->m_texel1_color; userdata->m_texel1_color = userdata->m_next_texel_color; - userdata->m_pixel_color.i.r = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_r[1], - *userdata->m_color_inputs.combiner_rgbsub_b_r[1], - *userdata->m_color_inputs.combiner_rgbmul_r[1], - *userdata->m_color_inputs.combiner_rgbadd_r[1]); - userdata->m_pixel_color.i.g = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_g[1], - *userdata->m_color_inputs.combiner_rgbsub_b_g[1], - *userdata->m_color_inputs.combiner_rgbmul_g[1], - *userdata->m_color_inputs.combiner_rgbadd_g[1]); - userdata->m_pixel_color.i.b = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_b[1], - *userdata->m_color_inputs.combiner_rgbsub_b_b[1], - *userdata->m_color_inputs.combiner_rgbmul_b[1], - *userdata->m_color_inputs.combiner_rgbadd_b[1]); - userdata->m_pixel_color.i.a = alpha_combiner_equation(*userdata->m_color_inputs.combiner_alphasub_a[1], - *userdata->m_color_inputs.combiner_alphasub_b[1], - *userdata->m_color_inputs.combiner_alphamul[1], - *userdata->m_color_inputs.combiner_alphaadd[1]); + rgbsub_a.set_rgba(*userdata->m_color_inputs.combiner_alphasub_a[1], *userdata->m_color_inputs.combiner_rgbsub_a_r[1], *userdata->m_color_inputs.combiner_rgbsub_a_g[1], *userdata->m_color_inputs.combiner_rgbsub_a_b[1]); + rgbsub_b.set_rgba(*userdata->m_color_inputs.combiner_alphasub_b[1], *userdata->m_color_inputs.combiner_rgbsub_b_r[1], *userdata->m_color_inputs.combiner_rgbsub_b_g[1], *userdata->m_color_inputs.combiner_rgbsub_b_b[1]); + rgbmul.set_rgba(*userdata->m_color_inputs.combiner_alphamul[1], *userdata->m_color_inputs.combiner_rgbmul_r[1], *userdata->m_color_inputs.combiner_rgbmul_g[1], *userdata->m_color_inputs.combiner_rgbmul_b[1]); + rgbadd.set(*userdata->m_color_inputs.combiner_alphaadd[1], *userdata->m_color_inputs.combiner_rgbadd_r[1], *userdata->m_color_inputs.combiner_rgbadd_g[1], *userdata->m_color_inputs.combiner_rgbadd_b[1]); + + rgbsub_a.sign_extend(0x180, 0xfe00); + rgbsub_b.sign_extend(0x180, 0xfe00); + rgbadd.sign_extend(0x180, 0xfe00); + + rgbadd.shl(8); + rgbsub_a.sub(rgbsub_b); + rgbsub_a.add(rgbadd); + rgbsub_a.add_imm(0x0080); + rgbsub_a.shr(8); + rgbsub_a.sign_extend(0x100, 0xff00); + + const UINT32 unclamped = rgbsub_a.to_rgba(); + userdata->m_pixel_color.i.a = s_special_9bit_clamptable[(unclamped >> 24) & 0xff]; + userdata->m_pixel_color.i.r = s_special_9bit_clamptable[(unclamped >> 16) & 0xff]; + userdata->m_pixel_color.i.g = s_special_9bit_clamptable[(unclamped >> 8) & 0xff]; + userdata->m_pixel_color.i.b = s_special_9bit_clamptable[unclamped & 0xff];*/ + + userdata->m_pixel_color.i.r = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_r[1], *userdata->m_color_inputs.combiner_rgbsub_b_r[1], *userdata->m_color_inputs.combiner_rgbmul_r[1], *userdata->m_color_inputs.combiner_rgbadd_r[1]); + userdata->m_pixel_color.i.g = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_g[1], *userdata->m_color_inputs.combiner_rgbsub_b_g[1], *userdata->m_color_inputs.combiner_rgbmul_g[1], *userdata->m_color_inputs.combiner_rgbadd_g[1]); + userdata->m_pixel_color.i.b = color_combiner_equation(*userdata->m_color_inputs.combiner_rgbsub_a_b[1], *userdata->m_color_inputs.combiner_rgbsub_b_b[1], *userdata->m_color_inputs.combiner_rgbmul_b[1], *userdata->m_color_inputs.combiner_rgbadd_b[1]); + userdata->m_pixel_color.i.a = alpha_combiner_equation(*userdata->m_color_inputs.combiner_alphasub_a[1], *userdata->m_color_inputs.combiner_alphasub_b[1], *userdata->m_color_inputs.combiner_alphamul[1], *userdata->m_color_inputs.combiner_alphaadd[1]); //Alpha coverage combiner get_alpha_cvg(&userdata->m_pixel_color.i.a, userdata, object); @@ -741,8 +785,8 @@ void n64_rdp::span_draw_copy(INT32 scanline, const extent_t &extent, const rdp_p const INT32 xinc = flip ? 1 : -1; const INT32 length = flip ? (xstart - xend) : (xend - xstart); - SpanParam s; s.w = extent.param[SPAN_S].start; - SpanParam t; t.w = extent.param[SPAN_T].start; + span_param_t s; s.w = extent.param[SPAN_S].start; + span_param_t t; t.w = extent.param[SPAN_T].start; const INT32 ds = object.m_span_base.m_span_ds / 4; const INT32 dt = object.m_span_base.m_span_dt / 4; diff --git a/src/mame/video/rdptpipe.c b/src/mame/video/rdptpipe.c index 9f6536ad883..f146f6c4cd5 100644 --- a/src/mame/video/rdptpipe.c +++ b/src/mame/video/rdptpipe.c @@ -15,10 +15,14 @@ #include "emu.h" #include "includes/n64.h" +#include "video/rdptpipe.h" #include "video/n64.h" +#include "video/rgbutil.h" #define RELATIVE(x, y) ((((x) >> 3) - (y)) << 3) | (x & 7); +#define USE_SIMD (1) + void n64_texture_pipe_t::set_machine(running_machine &machine) { n64_state* state = machine.driver_data(); @@ -27,12 +31,7 @@ void n64_texture_pipe_t::set_machine(running_machine &machine) for(INT32 i = 0; i < 0x10000; i++) { - color_t c; - c.i.r = m_rdp->m_replicated_rgba[(i >> 11) & 0x1f]; - c.i.g = m_rdp->m_replicated_rgba[(i >> 6) & 0x1f]; - c.i.b = m_rdp->m_replicated_rgba[(i >> 1) & 0x1f]; - c.i.a = (i & 1) ? 0xff : 0x00; - m_expand_16to32_table[i] = c.c; + m_expand_16to32_table[i] = color_t((i & 1) ? 0xff : 0x00, m_rdp->m_replicated_rgba[(i >> 11) & 0x1f], m_rdp->m_replicated_rgba[(i >> 6) & 0x1f], m_rdp->m_replicated_rgba[(i >> 1) & 0x1f]); } for(UINT32 i = 0; i < 0x80000; i++) @@ -262,8 +261,6 @@ void n64_texture_pipe_t::cycle_nearest(color_t* TEX, color_t* prev, INT32 SSS, I const UINT32 tpal = tile.palette; const UINT32 index = (tformat << 4) | (tsize << 2) | ((UINT32) object.m_other_modes.en_tlut << 1) | (UINT32) object.m_other_modes.tlut_type; - color_t t0; - INT32 sss1 = SSS, sst1 = SST; bool maxs, maxt; shift_cycle(&sss1, &sst1, &maxs, &maxt, tile); @@ -272,7 +269,7 @@ void n64_texture_pipe_t::cycle_nearest(color_t* TEX, color_t* prev, INT32 SSS, I UINT32 tbase = tile.tmem + ((tile.line * sst1) & 0x1ff); - t0.c = ((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase, tpal, userdata); + color_t t0 = ((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase, tpal, userdata); const INT32 newk0 = SIGN9(m_rdp->get_k0()); const INT32 newk1 = SIGN9(m_rdp->get_k1()); @@ -318,7 +315,7 @@ void n64_texture_pipe_t::cycle_nearest_lerp(color_t* TEX, color_t* prev, INT32 S UINT32 tbase = tile.tmem + ((tile.line * sst1) & 0x1ff); - (*TEX).c = ((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase, tpal, userdata); + TEX->set(((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase, tpal, userdata).get()); } void n64_texture_pipe_t::cycle_linear(color_t* TEX, color_t* prev, INT32 SSS, INT32 SST, UINT32 tilenum, UINT32 cycle, rdp_span_aux* userdata, const rdp_poly_state& object) @@ -361,8 +358,7 @@ void n64_texture_pipe_t::cycle_linear(color_t* TEX, color_t* prev, INT32 SSS, IN sfrac <<= 3; tfrac <<= 3; - color_t t0; - t0.c = ((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase, tpal, userdata); + color_t t0 = ((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase, tpal, userdata); const INT32 newk0 = SIGN9(m_rdp->get_k0()); const INT32 newk1 = SIGN9(m_rdp->get_k1()); const INT32 newk2 = SIGN9(m_rdp->get_k2()); @@ -429,45 +425,91 @@ void n64_texture_pipe_t::cycle_linear_lerp(color_t* TEX, color_t* prev, INT32 SS sfrac <<= 3; tfrac <<= 3; - color_t t1; - color_t t2; - t1.c = ((this)->*(m_texel_fetch[index]))(sss2, sst1, tbase1, tpal, userdata); - t2.c = ((this)->*(m_texel_fetch[index]))(sss1, sst2, tbase2, tpal, userdata); if (!center) { if (upper) { - color_t t3; - t3.c = ((this)->*(m_texel_fetch[index]))(sss2, sst2, tbase2, tpal, userdata); +#if USE_SIMD + rgbaint_t v1_vec((rgbaint_t)((this)->*(m_texel_fetch[index]))(sss2, sst1, tbase1, tpal, userdata).get()); + rgbaint_t v2_vec((rgbaint_t)((this)->*(m_texel_fetch[index]))(sss1, sst2, tbase2, tpal, userdata).get()); + rgbaint_t v3_vec((rgbaint_t)((this)->*(m_texel_fetch[index]))(sss2, sst2, tbase2, tpal, userdata).get()); + + v1_vec.sub(v3_vec); + v2_vec.sub(v3_vec); + + v1_vec.mul_imm(invtf); + v2_vec.mul_imm(invsf); + + v1_vec.add(v2_vec); + v1_vec.add_imm(0x0080); + v1_vec.sra(8); + v1_vec.add(v3_vec); + + TEX->set((UINT32)v1_vec.to_rgba()); +#else + color_t t1 = ((this)->*(m_texel_fetch[index]))(sss2, sst1, tbase1, tpal, userdata); + color_t t2 = ((this)->*(m_texel_fetch[index]))(sss1, sst2, tbase2, tpal, userdata); + color_t t3 = ((this)->*(m_texel_fetch[index]))(sss2, sst2, tbase2, tpal, userdata); + TEX->i.r = t3.i.r + (((invsf * (t2.i.r - t3.i.r)) + (invtf * (t1.i.r - t3.i.r)) + 0x80) >> 8); TEX->i.g = t3.i.g + (((invsf * (t2.i.g - t3.i.g)) + (invtf * (t1.i.g - t3.i.g)) + 0x80) >> 8); TEX->i.b = t3.i.b + (((invsf * (t2.i.b - t3.i.b)) + (invtf * (t1.i.b - t3.i.b)) + 0x80) >> 8); TEX->i.a = t3.i.a + (((invsf * (t2.i.a - t3.i.a)) + (invtf * (t1.i.a - t3.i.a)) + 0x80) >> 8); +#endif } else { - color_t t0; - t0.c = ((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase1, tpal, userdata); - TEX->i.r = t0.i.r + (((sfrac * (t1.i.r - t0.i.r)) + (tfrac * (t2.i.r - t0.i.r)) + 0x80) >> 8); - TEX->i.g = t0.i.g + (((sfrac * (t1.i.g - t0.i.g)) + (tfrac * (t2.i.g - t0.i.g)) + 0x80) >> 8); - TEX->i.b = t0.i.b + (((sfrac * (t1.i.b - t0.i.b)) + (tfrac * (t2.i.b - t0.i.b)) + 0x80) >> 8); - TEX->i.a = t0.i.a + (((sfrac * (t1.i.a - t0.i.a)) + (tfrac * (t2.i.a - t0.i.a)) + 0x80) >> 8); +#if USE_SIMD + rgbaint_t v0_vec((rgbaint_t)((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase1, tpal, userdata).get()); + rgbaint_t v1_vec((rgbaint_t)((this)->*(m_texel_fetch[index]))(sss2, sst1, tbase1, tpal, userdata).get()); + rgbaint_t v2_vec((rgbaint_t)((this)->*(m_texel_fetch[index]))(sss1, sst2, tbase2, tpal, userdata).get()); + + v1_vec.sub(v0_vec); + v2_vec.sub(v0_vec); + + v1_vec.mul_imm(sfrac); + v2_vec.mul_imm(tfrac); + + v1_vec.add(v2_vec); + v1_vec.add_imm(0x0080); + v1_vec.sra(8); + v1_vec.add(v0_vec); + + TEX->set((UINT32)v1_vec.to_rgba()); +#else + color_t t0 = ((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase1, tpal, userdata); + color_t t1 = ((this)->*(m_texel_fetch[index]))(sss2, sst1, tbase1, tpal, userdata); + color_t t2 = ((this)->*(m_texel_fetch[index]))(sss1, sst2, tbase2, tpal, userdata); + + TEX->i.r = t0.i.r + (((tfrac * (t2.i.r - t0.i.r)) + (sfrac * (t1.i.r - t0.i.r)) + 0x80) >> 8); + TEX->i.g = t0.i.g + (((tfrac * (t2.i.g - t0.i.g)) + (sfrac * (t1.i.g - t0.i.g)) + 0x80) >> 8); + TEX->i.b = t0.i.b + (((tfrac * (t2.i.b - t0.i.b)) + (sfrac * (t1.i.b - t0.i.b)) + 0x80) >> 8); + TEX->i.a = t0.i.a + (((tfrac * (t2.i.a - t0.i.a)) + (sfrac * (t1.i.a - t0.i.a)) + 0x80) >> 8); +#endif } - TEX->i.r &= 0x1ff; - TEX->i.g &= 0x1ff; - TEX->i.b &= 0x1ff; - TEX->i.a &= 0x1ff; } else { - color_t t0; - color_t t3; - t0.c = ((this)->*(m_texel_fetch[index]))(sss1, sst1, 1, tpal, userdata); - t3.c = ((this)->*(m_texel_fetch[index]))(sss2, sst2, tbase2, tpal, userdata); +#if USE_SIMD + rgbaint_t t0_vec((rgbaint_t)((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase1, tpal, userdata).get()); + + t0_vec.add((rgbaint_t)((this)->*(m_texel_fetch[index]))(sss2, sst1, tbase1, tpal, userdata).get()); + t0_vec.add((rgbaint_t)((this)->*(m_texel_fetch[index]))(sss2, sst2, tbase2, tpal, userdata).get()); + t0_vec.add((rgbaint_t)((this)->*(m_texel_fetch[index]))(sss1, sst2, tbase2, tpal, userdata).get()); + t0_vec.shr(2); + + TEX->set((UINT32)t0_vec.to_rgba()); +#else + color_t t0 = ((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase1, tpal, userdata); + color_t t1 = ((this)->*(m_texel_fetch[index]))(sss2, sst1, tbase1, tpal, userdata); + color_t t3 = ((this)->*(m_texel_fetch[index]))(sss2, sst2, tbase2, tpal, userdata); + color_t t2 = ((this)->*(m_texel_fetch[index]))(sss1, sst2, tbase2, tpal, userdata); + TEX->i.r = (t0.i.r + t1.i.r + t2.i.r + t3.i.r) >> 2; - TEX->i.g = (t0.i.g + t1.i.g + t2.i.g + t3.i.g) >> 2; - TEX->i.b = (t0.i.b + t1.i.b + t2.i.b + t3.i.b) >> 2; - TEX->i.a = (t0.i.a + t1.i.a + t2.i.a + t3.i.a) >> 2; + TEX->i.g = (t0.i.g + t1.i.g + t2.i.g + t3.i.r) >> 2; + TEX->i.b = (t0.i.b + t1.i.b + t2.i.b + t3.i.r) >> 2; + TEX->i.a = (t0.i.a + t1.i.a + t2.i.a + t3.i.r) >> 2; +#endif } } @@ -488,7 +530,7 @@ void n64_texture_pipe_t::copy(color_t* TEX, INT32 SSS, INT32 SST, UINT32 tilenum const UINT32 index = (tile.format << 4) | (tile.size << 2) | ((UINT32) object.m_other_modes.en_tlut << 1) | (UINT32) object.m_other_modes.tlut_type; const UINT32 tbase = tile.tmem + ((tile.line * sst1) & 0x1ff); - TEX->c = ((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase, tile.palette, userdata); + TEX->set(((this)->*(m_texel_fetch[index]))(sss1, sst1, tbase, tile.palette, userdata).get()); } void n64_texture_pipe_t::lod_1cycle(INT32* sss, INT32* sst, const INT32 s, const INT32 t, const INT32 w, const INT32 dsinc, const INT32 dtinc, const INT32 dwinc, rdp_span_aux* userdata, const rdp_poly_state& object) @@ -783,7 +825,7 @@ void n64_texture_pipe_t::calculate_clamp_diffs(UINT32 prim_tile, rdp_span_aux* u static INT32 sTexAddrSwap16[2] = { WORD_ADDR_XOR, WORD_XOR_DWORD_SWAP }; static INT32 sTexAddrSwap8[2] = { BYTE_ADDR_XOR, BYTE_XOR_DWORD_SWAP }; -UINT32 n64_texture_pipe_t::fetch_rgba16_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_rgba16_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { INT32 taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x7ff; @@ -793,29 +835,22 @@ UINT32 n64_texture_pipe_t::fetch_rgba16_tlut0(INT32 s, INT32 t, INT32 tbase, INT #if USE_64K_LUT return m_expand_16to32_table[c]; #else - color_t color; - color.i.r = GET_HI_RGBA16_TMEM(c); - color.i.g = GET_MED_RGBA16_TMEM(c); - color.i.b = GET_LOW_RGBA16_TMEM(c); - color.i.a = (c & 1) * 0xff; - return color.c; + return color_t((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); #endif } -UINT32 n64_texture_pipe_t::fetch_rgba16_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_rgba16_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const INT32 taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x7ff; UINT16 c = ((UINT16*)userdata->m_tmem)[taddr]; c = ((UINT16*)(userdata->m_tmem + 0x800))[(c >> 8) << 2]; - color_t color; - color.i.r = color.i.g = color.i.b = (c >> 8) & 0xff; - color.i.a = c & 0xff; - return color.c; + const UINT8 k = (c >> 8) & 0xff; + return color_t(c & 0xff, k, k, k); } -UINT32 n64_texture_pipe_t::fetch_rgba16_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_rgba16_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const INT32 taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x7ff; @@ -823,16 +858,11 @@ UINT32 n64_texture_pipe_t::fetch_rgba16_raw(INT32 s, INT32 t, INT32 tbase, INT32 return m_expand_16to32_table[((UINT16*)userdata->m_tmem)[taddr]]; #else const UINT16 c = ((UINT16*)userdata->m_tmem)[taddr]; - color_t color; - color.i.r = GET_HI_RGBA16_TMEM(c); - color.i.g = GET_MED_RGBA16_TMEM(c); - color.i.b = GET_LOW_RGBA16_TMEM(c); - color.i.a = (c & 1) * 0xff; - return color.c; + return color_t((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); #endif } -UINT32 n64_texture_pipe_t::fetch_rgba32_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_rgba32_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT32 *tc = ((UINT32*)userdata->m_tmem); const INT32 taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x3ff; @@ -843,16 +873,11 @@ UINT32 n64_texture_pipe_t::fetch_rgba32_tlut0(INT32 s, INT32 t, INT32 tbase, INT #if USE_64K_LUT return m_expand_16to32_table[c]; #else - color_t color; - color.i.r = GET_HI_RGBA16_TMEM(c); - color.i.g = GET_MED_RGBA16_TMEM(c); - color.i.b = GET_LOW_RGBA16_TMEM(c); - color.i.a = (c & 1) * 0xff; - return color.c; + return color_t((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); #endif } -UINT32 n64_texture_pipe_t::fetch_rgba32_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_rgba32_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT32 *tc = ((UINT32*)userdata->m_tmem); const INT32 taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x3ff; @@ -860,31 +885,23 @@ UINT32 n64_texture_pipe_t::fetch_rgba32_tlut1(INT32 s, INT32 t, INT32 tbase, INT UINT32 c = tc[taddr]; c = ((UINT16*)(userdata->m_tmem + 0x800))[(c >> 24) << 2]; - color_t color; - color.i.r = color.i.g = color.i.b = (c >> 8) & 0xff; - color.i.a = c & 0xff; - - return color.c; + const UINT8 k = (c >> 8) & 0xff; + return color_t(c & 0xff, k, k, k); } -UINT32 n64_texture_pipe_t::fetch_rgba32_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_rgba32_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const INT32 taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x3ff; - UINT32 c = ((UINT16*)userdata->m_tmem)[taddr]; - color_t color; - color.i.r = (c >> 8) & 0xff; - color.i.g = c & 0xff; - c = ((UINT16*)userdata->m_tmem)[taddr | 0x400]; - color.i.b = (c >> 8) & 0xff; - color.i.a = c & 0xff; + const UINT32 cl = ((UINT16*)userdata->m_tmem)[taddr]; + const UINT32 ch = ((UINT16*)userdata->m_tmem)[taddr | 0x400]; - return color.c; + return color_t(ch, cl >> 8, cl, ch >> 8); } -UINT32 n64_texture_pipe_t::fetch_nop(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { return 0; } +color_t n64_texture_pipe_t::fetch_nop(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { return 0; } -UINT32 n64_texture_pipe_t::fetch_yuv(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_yuv(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT16 *tc = ((UINT16*)userdata->m_tmem); @@ -901,16 +918,10 @@ UINT32 n64_texture_pipe_t::fetch_yuv(INT32 s, INT32 t, INT32 tbase, INT32 tpal, u |= ((u & 0x80) << 1); v |= ((v & 0x80) << 1); - color_t color; - color.i.r = u; - color.i.g = v; - color.i.b = y; - color.i.a = y; - - return color.c; + return color_t(y, y, u, v); } -UINT32 n64_texture_pipe_t::fetch_ci4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ci4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -921,16 +932,11 @@ UINT32 n64_texture_pipe_t::fetch_ci4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 #if USE_64K_LUT return m_expand_16to32_table[c]; #else - color_t color; - color.i.r = GET_HI_RGBA16_TMEM(c); - color.i.g = GET_MED_RGBA16_TMEM(c); - color.i.b = GET_LOW_RGBA16_TMEM(c); - color.i.a = (c & 1) * 0xff; - return color.c; + return color_t((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); #endif } -UINT32 n64_texture_pipe_t::fetch_ci4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ci4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; INT32 taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -938,14 +944,11 @@ UINT32 n64_texture_pipe_t::fetch_ci4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 const UINT8 p = (s & 1) ? (tc[taddr] & 0xf) : (tc[taddr] >> 4); const UINT16 c = ((UINT16*)(userdata->m_tmem + 0x800))[((tpal << 4) | p) << 2]; - color_t color; - color.i.r = color.i.g = color.i.b = (c >> 8) & 0xff; - color.i.a = c & 0xff; - - return color.c; + const UINT8 k = (c >> 8) & 0xff; + return color_t(c & 0xff, k, k, k); } -UINT32 n64_texture_pipe_t::fetch_ci4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ci4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0xfff; @@ -953,13 +956,10 @@ UINT32 n64_texture_pipe_t::fetch_ci4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tp UINT8 p = (s & 1) ? (tc[taddr] & 0xf) : (tc[taddr] >> 4); p = (tpal << 4) | p; - color_t color; - color.i.r = color.i.g = color.i.b = color.i.a = p; - - return color.c; + return color_t(p, p, p, p); } -UINT32 n64_texture_pipe_t::fetch_ci8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ci8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -970,16 +970,11 @@ UINT32 n64_texture_pipe_t::fetch_ci8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 #if USE_64K_LUT return m_expand_16to32_table[c]; #else - color_t color; - color.i.r = GET_HI_RGBA16_TMEM(c); - color.i.g = GET_MED_RGBA16_TMEM(c); - color.i.b = GET_LOW_RGBA16_TMEM(c); - color.i.a = (c & 1) * 0xff; - return color.c; + return color_t((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); #endif } -UINT32 n64_texture_pipe_t::fetch_ci8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ci8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -987,25 +982,20 @@ UINT32 n64_texture_pipe_t::fetch_ci8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 const UINT8 p = tc[taddr]; const UINT16 c = ((UINT16*)(userdata->m_tmem + 0x800))[p << 2]; - color_t color; - color.i.r = color.i.g = color.i.b = (c >> 8) & 0xff; - color.i.a = c & 0xff; - - return color.c; + const UINT8 k = (c >> 8) & 0xff; + return color_t(c & 0xff, k, k, k); } -UINT32 n64_texture_pipe_t::fetch_ci8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ci8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0xfff; - color_t color; - color.i.r = color.i.g = color.i.b = color.i.a = tc[taddr]; - - return color.c; + const UINT8 p = tc[taddr]; + return color_t(p, p, p, p); } -UINT32 n64_texture_pipe_t::fetch_ia4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ia4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -1016,16 +1006,11 @@ UINT32 n64_texture_pipe_t::fetch_ia4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 #if USE_64K_LUT return m_expand_16to32_table[c]; #else - color_t color; - color.i.r = GET_HI_RGBA16_TMEM(c); - color.i.g = GET_MED_RGBA16_TMEM(c); - color.i.b = GET_LOW_RGBA16_TMEM(c); - color.i.a = (c & 1) * 0xff; - return color.c; + return color_t((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); #endif } -UINT32 n64_texture_pipe_t::fetch_ia4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ia4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -1033,14 +1018,11 @@ UINT32 n64_texture_pipe_t::fetch_ia4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 const UINT8 p = ((s) & 1) ? (tc[taddr] & 0xf) : (tc[taddr] >> 4); const UINT16 c = ((UINT16*)(userdata->m_tmem + 0x800))[((tpal << 4) | p) << 2]; - color_t color; - color.i.r = color.i.g = color.i.b = (c >> 8) & 0xff; - color.i.a = c & 0xff; - - return color.c; + const UINT8 k = (c >> 8) & 0xff; + return color_t(c & 0xff, k, k, k); } -UINT32 n64_texture_pipe_t::fetch_ia4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ia4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0xfff; @@ -1055,10 +1037,10 @@ UINT32 n64_texture_pipe_t::fetch_ia4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tp color.i.b = i; color.i.a = (p & 1) * 0xff; - return color.c; + return color_t((p & 1) * 0xff, i, i, i); } -UINT32 n64_texture_pipe_t::fetch_ia8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ia8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -1069,16 +1051,11 @@ UINT32 n64_texture_pipe_t::fetch_ia8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 #if USE_64K_LUT return m_expand_16to32_table[c]; #else - color_t color; - color.i.r = GET_HI_RGBA16_TMEM(c); - color.i.g = GET_MED_RGBA16_TMEM(c); - color.i.b = GET_LOW_RGBA16_TMEM(c); - color.i.a = (c & 1) * 0xff; - return color.c; + return color_t((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); #endif } -UINT32 n64_texture_pipe_t::fetch_ia8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ia8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -1086,14 +1063,11 @@ UINT32 n64_texture_pipe_t::fetch_ia8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 const UINT8 p = tc[taddr]; const UINT16 c = ((UINT16*)(userdata->m_tmem + 0x800))[p << 2]; - color_t color; - color.i.r = color.i.g = color.i.b = (c >> 8) & 0xff; - color.i.a = c & 0xff; - - return color.c; + const UINT8 k = (c >> 8) & 0xff; + return color_t(c & 0xff, k, k, k); } -UINT32 n64_texture_pipe_t::fetch_ia8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ia8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0xfff; @@ -1106,12 +1080,12 @@ UINT32 n64_texture_pipe_t::fetch_ia8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tp color.i.r = i; color.i.g = i; color.i.b = i; - color.i.a = ((p & 0xf) << 4) | (p & 0xf); + color.i.a = (p << 4) | (p & 0xf); - return color.c; + return color_t((p << 4) | (p & 0xf), i, i, i); } -UINT32 n64_texture_pipe_t::fetch_ia16_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ia16_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT16 *tc = ((UINT16*)userdata->m_tmem); const INT32 taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x3ff; @@ -1122,16 +1096,11 @@ UINT32 n64_texture_pipe_t::fetch_ia16_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 #if USE_64K_LUT return m_expand_16to32_table[c]; #else - color_t color; - color.i.r = GET_HI_RGBA16_TMEM(c); - color.i.g = GET_MED_RGBA16_TMEM(c); - color.i.b = GET_LOW_RGBA16_TMEM(c); - color.i.a = (c & 1) * 0xff; - return color.c; + return color_t((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); #endif } -UINT32 n64_texture_pipe_t::fetch_ia16_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ia16_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT16 *tc = ((UINT16*)userdata->m_tmem); const INT32 taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x3ff; @@ -1139,31 +1108,21 @@ UINT32 n64_texture_pipe_t::fetch_ia16_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 UINT16 c = tc[taddr]; c = ((UINT16*)(userdata->m_tmem + 0x800))[(c >> 8) << 2]; - color_t color; - color.i.r = color.i.g = color.i.b = (c >> 8) & 0xff; - color.i.a = c & 0xff; - - return color.c; + const UINT8 k = (c >> 8) & 0xff; + return color_t(c & 0xff, k, k, k); } -UINT32 n64_texture_pipe_t::fetch_ia16_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_ia16_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT16 *tc = ((UINT16*)userdata->m_tmem); const INT32 taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x7ff; const UINT16 c = tc[taddr]; const UINT8 i = (c >> 8); - - color_t color; - color.i.r = i; - color.i.g = i; - color.i.b = i; - color.i.a = c & 0xff; - - return color.c; + return color_t(c & 0xff, i, i, i); } -UINT32 n64_texture_pipe_t::fetch_i4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_i4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -1175,16 +1134,11 @@ UINT32 n64_texture_pipe_t::fetch_i4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 t #if USE_64K_LUT return m_expand_16to32_table[k]; #else - color_t color; - color.i.r = GET_HI_RGBA16_TMEM(k); - color.i.g = GET_MED_RGBA16_TMEM(k); - color.i.b = GET_LOW_RGBA16_TMEM(k); - color.i.a = (k & 1) * 0xff; - return color.c; + return color_t((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); #endif } -UINT32 n64_texture_pipe_t::fetch_i4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_i4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -1193,14 +1147,11 @@ UINT32 n64_texture_pipe_t::fetch_i4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 t const UINT8 c = ((s & 1)) ? (byteval & 0xf) : ((byteval >> 4) & 0xf); const UINT16 k = ((UINT16*)(userdata->m_tmem + 0x800))[((tpal << 4) | c) << 2]; - color_t color; - color.i.r = color.i.g = color.i.b = (k >> 8) & 0xff; - color.i.a = k & 0xff; - - return color.c; + const UINT8 i = (k >> 8) & 0xff; + return color_t(k & 0xff, i, i, i); } -UINT32 n64_texture_pipe_t::fetch_i4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_i4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0xfff; @@ -1209,16 +1160,10 @@ UINT32 n64_texture_pipe_t::fetch_i4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpa UINT8 c = ((s & 1)) ? (byteval & 0xf) : ((byteval >> 4) & 0xf); c |= (c << 4); - color_t color; - color.i.r = c; - color.i.g = c; - color.i.b = c; - color.i.a = c; - - return color.c; + return color_t(c, c, c, c); } -UINT32 n64_texture_pipe_t::fetch_i8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_i8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -1229,16 +1174,11 @@ UINT32 n64_texture_pipe_t::fetch_i8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 t #if USE_64K_LUT return m_expand_16to32_table[k]; #else - color_t color; - color.i.r = GET_HI_RGBA16_TMEM(k); - color.i.g = GET_MED_RGBA16_TMEM(k); - color.i.b = GET_LOW_RGBA16_TMEM(k); - color.i.a = (k & 1) * 0xff; - return color.c; + return color_t((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); #endif } -UINT32 n64_texture_pipe_t::fetch_i8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_i8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; @@ -1246,25 +1186,16 @@ UINT32 n64_texture_pipe_t::fetch_i8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 t const UINT8 c = tc[taddr]; const UINT16 k = ((UINT16*)(userdata->m_tmem + 0x800))[c << 2]; - color_t color; - color.i.r = color.i.g = color.i.b = (k >> 8) & 0xff; - color.i.a = k & 0xff; - - return color.c; + const UINT8 i = (k >> 8) & 0xff; + return color_t(k & 0xff, i, i, i); } -UINT32 n64_texture_pipe_t::fetch_i8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) +color_t n64_texture_pipe_t::fetch_i8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata) { const UINT8 *tc = userdata->m_tmem; const INT32 taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0xfff; const UINT8 c = tc[taddr]; - color_t color; - color.i.r = c; - color.i.g = c; - color.i.b = c; - color.i.a = c; - - return color.c; + return color_t(c, c, c, c); } diff --git a/src/mame/video/rdptpipe.h b/src/mame/video/rdptpipe.h index 3142d511b01..ded943037d5 100644 --- a/src/mame/video/rdptpipe.h +++ b/src/mame/video/rdptpipe.h @@ -17,18 +17,12 @@ #define _VIDEO_RDPTEXPIPE_H_ #include "emu.h" - -struct other_modes_t; -struct misc_state_t; -class color_t; -struct rdp_span_aux; -struct rdp_poly_state; -struct n64_tile_t; +#include "video/n64types.h" class n64_texture_pipe_t { public: - typedef UINT32 (n64_texture_pipe_t::*texel_fetcher_t) (INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + typedef color_t (n64_texture_pipe_t::*texel_fetcher_t) (INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); typedef void (n64_texture_pipe_t::*texel_cycler_t) (color_t* TEX, color_t* prev, INT32 SSS, INT32 SST, UINT32 tilenum, UINT32 cycle, rdp_span_aux* userdata, const rdp_poly_state& object); n64_texture_pipe_t() @@ -123,47 +117,47 @@ class n64_texture_pipe_t void clamp_cycle(INT32* S, INT32* T, INT32* SFRAC, INT32* TFRAC, const bool maxs, const bool maxt, const INT32 tilenum, const n64_tile_t& tile, rdp_span_aux* userdata); void clamp_cycle_light(INT32* S, INT32* T, const bool maxs, const bool maxt, const INT32 tilenum, const n64_tile_t& tile, rdp_span_aux* userdata); - UINT32 fetch_nop(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_nop(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_rgba16_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_rgba16_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_rgba16_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_rgba32_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_rgba32_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_rgba32_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_rgba16_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_rgba16_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_rgba16_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_rgba32_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_rgba32_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_rgba32_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_yuv(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_yuv(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ci4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ci4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ci4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ci8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ci8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ci8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ci4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ci4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ci4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ci8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ci8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ci8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ia4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ia4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ia4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ia8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ia8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ia8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ia16_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ia16_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_ia16_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ia4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ia4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ia4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ia8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ia8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ia8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ia16_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ia16_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_ia16_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_i4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_i4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_i4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_i8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_i8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); - UINT32 fetch_i8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_i4_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_i4_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_i4_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_i8_tlut0(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_i8_tlut1(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); + color_t fetch_i8_raw(INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata); texel_fetcher_t m_texel_fetch[16*5]; n64_rdp* m_rdp; INT32 m_maskbits_table[16]; - UINT32 m_expand_16to32_table[0x10000]; + color_t m_expand_16to32_table[0x10000]; UINT16 m_lod_lookup[0x80000]; };