-Refactored N64 RDP blender to use function arrays in order to [MooglyGuy]

flatten inner-loop branch structures for potential performance.

-Broke blender steps into individual #defines in order to make the [MooglyGuy]
 functional differences between RDP modes more apparent.
This commit is contained in:
Ryan Holtz 2013-09-28 05:13:54 +00:00
parent 4148ce2545
commit 53a5763f73
5 changed files with 485 additions and 213 deletions

View File

@ -16,7 +16,7 @@
#ifndef __RSP_H__
#define __RSP_H__
#define USE_SIMD (0)
#define USE_SIMD (1)
#if USE_SIMD
#include <tmmintrin.h>

View File

@ -3089,7 +3089,7 @@ INLINE void cfunc_rsp_vadd(void *param)
__m128i shuffled = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
__m128i carry = _mm_and_si128(rsp->xvflag[CARRY], vec_flagmask);
__m128i unsat = _mm_add_epi16(_mm_add_epi16(rsp->xv[VS1REG], shuffled), carry);
rsp->accum_l = _mm_add_epi16(_mm_add_epi16(rsp->xv[VS1REG], shuffled), carry);
__m128i addvec = _mm_adds_epi16(rsp->xv[VS1REG], shuffled);
@ -3098,8 +3098,6 @@ INLINE void cfunc_rsp_vadd(void *param)
rsp->xv[VDREG] = _mm_add_epi16(addvec, carry);
rsp->accum_l = unsat;
rsp->xvflag[ZERO] = _mm_setzero_si128();
rsp->xvflag[CARRY] = _mm_setzero_si128();
#else

View File

@ -2,232 +2,458 @@
#include "includes/n64.h"
#include "video/n64.h"
bool N64BlenderT::Blend1Cycle(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object)
N64BlenderT::N64BlenderT()
{
INT32 r, g, b;
blend1[0] = &N64BlenderT::Blend1CycleNoBlendNoACVGNoDither;
blend1[1] = &N64BlenderT::Blend1CycleNoBlendNoACVGDither;
blend1[2] = &N64BlenderT::Blend1CycleNoBlendACVGNoDither;
blend1[3] = &N64BlenderT::Blend1CycleNoBlendACVGDither;
blend1[4] = &N64BlenderT::Blend1CycleBlendNoACVGNoDither;
blend1[5] = &N64BlenderT::Blend1CycleBlendNoACVGDither;
blend1[6] = &N64BlenderT::Blend1CycleBlendACVGNoDither;
blend1[7] = &N64BlenderT::Blend1CycleBlendACVGDither;
if (!object.OtherModes.alpha_cvg_select)
{
DitherA(&userdata->PixelColor.i.a, adseed);
}
blend2[0] = &N64BlenderT::Blend2CycleNoBlendNoACVGNoDither;
blend2[1] = &N64BlenderT::Blend2CycleNoBlendNoACVGDither;
blend2[2] = &N64BlenderT::Blend2CycleNoBlendACVGNoDither;
blend2[3] = &N64BlenderT::Blend2CycleNoBlendACVGDither;
blend2[4] = &N64BlenderT::Blend2CycleBlendNoACVGNoDither;
blend2[5] = &N64BlenderT::Blend2CycleBlendNoACVGDither;
blend2[6] = &N64BlenderT::Blend2CycleBlendACVGNoDither;
blend2[7] = &N64BlenderT::Blend2CycleBlendACVGDither;
DitherA(&userdata->ShadeColor.i.a, adseed);
cycle0[0] = &N64BlenderT::BlendEquationCycle0NoForceNoSpecial;
cycle0[1] = &N64BlenderT::BlendEquationCycle0NoForceSpecial;
cycle0[2] = &N64BlenderT::BlendEquationCycle0ForceNoSpecial;
cycle0[3] = &N64BlenderT::BlendEquationCycle0ForceSpecial;
if (!AlphaCompare(userdata->PixelColor.i.a, userdata, object))
{
return false;
}
if (object.OtherModes.antialias_en ? (!userdata->CurrentPixCvg) : (!userdata->CurrentCvgBit))
{
return false;
}
bool dontblend = (partialreject && userdata->PixelColor.i.a >= 0xff);
if (!userdata->BlendEnable || dontblend)
{
r = *userdata->ColorInputs.blender1a_r[0];
g = *userdata->ColorInputs.blender1a_g[0];
b = *userdata->ColorInputs.blender1a_b[0];
}
else
{
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[0];
BlendEquationCycle0(&r, &g, &b, special_bsel, userdata, object);
}
if (object.OtherModes.rgb_dither_sel < 3)
{
DitherRGB(&r, &g, &b, dith);
}
*fr = r;
*fg = g;
*fb = b;
return true;
cycle1[0] = &N64BlenderT::BlendEquationCycle1NoForceNoSpecial;
cycle1[1] = &N64BlenderT::BlendEquationCycle1NoForceSpecial;
cycle1[2] = &N64BlenderT::BlendEquationCycle1ForceNoSpecial;
cycle1[3] = &N64BlenderT::BlendEquationCycle1ForceSpecial;
}
bool N64BlenderT::Blend2Cycle(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, int special_bsel1, rdp_span_aux *userdata, const rdp_poly_state& object)
{
if (!object.OtherModes.alpha_cvg_select)
{
DitherA(&userdata->PixelColor.i.a, adseed);
#define ALPHA_COMPARE() \
if (!AlphaCompare(userdata->PixelColor.i.a, userdata, object)) \
{ \
return false; \
}
DitherA(&userdata->ShadeColor.i.a, adseed);
if (!AlphaCompare(userdata->PixelColor.i.a, userdata, object))
{
return false;
#define CVG_COMPARE() \
if (object.OtherModes.antialias_en ? (!userdata->CurrentPixCvg) : (!userdata->CurrentCvgBit)) \
{ \
return false; \
}
if (object.OtherModes.antialias_en ? (!userdata->CurrentPixCvg) : (!userdata->CurrentCvgBit))
{
return false;
}
#define TEST_REJECT() \
ALPHA_COMPARE() \
CVG_COMPARE()
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[0];
#define WRITE_OUT_NB_ND(cycle) \
*fr = *userdata->ColorInputs.blender1a_r[cycle]; \
*fg = *userdata->ColorInputs.blender1a_g[cycle]; \
*fb = *userdata->ColorInputs.blender1a_b[cycle];
INT32 r, g, b;
BlendEquationCycle0(&r, &g, &b, special_bsel0, userdata, object);
#define WRITE_OUT() \
*fr = r; \
*fg = g; \
*fb = b;
userdata->BlendedPixelColor.i.r = r;
userdata->BlendedPixelColor.i.g = g;
userdata->BlendedPixelColor.i.b = b;
#define WRITE_BLENDED_COLOR() \
userdata->BlendedPixelColor.i.r = r; \
userdata->BlendedPixelColor.i.g = g; \
userdata->BlendedPixelColor.i.b = b; \
userdata->BlendedPixelColor.i.a = userdata->PixelColor.i.a;
bool dontblend = (partialreject && userdata->PixelColor.i.a >= 0xff);
if (!userdata->BlendEnable || dontblend)
{
r = *userdata->ColorInputs.blender1a_r[1];
g = *userdata->ColorInputs.blender1a_g[1];
b = *userdata->ColorInputs.blender1a_b[1];
}
else
{
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[1];
BlendEquationCycle1(&r, &g, &b, special_bsel1, userdata, object);
#define BLEND_CYCLE(cyc) \
if (partialreject && userdata->PixelColor.i.a >= 0xff) \
{ \
ASSIGN_OUT(cyc); \
} \
else \
{ \
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[cyc]; \
((this)->*(cycle##cyc[((object.OtherModes.force_blend & 1) << 1) | (special_bsel##cyc & 1)]))(&r, &g, &b, userdata, object); \
}
if (object.OtherModes.rgb_dither_sel < 3)
{
DitherRGB(&r, &g, &b, dith);
#define BLEND_FACTORS(cycle) \
UINT8 blend1a = *userdata->ColorInputs.blender1b_a[cycle] >> 3; \
UINT8 blend2a = *userdata->ColorInputs.blender2b_a[cycle] >> 3;
#define BLEND_FACTORS_SUM(cycle) \
UINT8 blend1a = *userdata->ColorInputs.blender1b_a[cycle] >> 3; \
UINT8 blend2a = *userdata->ColorInputs.blender2b_a[cycle] >> 3; \
UINT32 sum = ((blend1a >> 2) + (blend2a >> 2) + 1) & 0xf;
#define BLEND_FACTORS_SPECIAL(cycle) \
UINT8 blend1a = (*userdata->ColorInputs.blender1b_a[cycle] >> (3 + userdata->ShiftA)) & 0x1c; \
UINT8 blend2a = (*userdata->ColorInputs.blender2b_a[cycle] >> (3 + userdata->ShiftB)) & 0x1c;
#define BLEND_FACTORS_SPECIAL_SUM(cycle) \
UINT8 blend1a = (*userdata->ColorInputs.blender1b_a[cycle] >> (3 + userdata->ShiftA)) & 0x1c; \
UINT8 blend2a = (*userdata->ColorInputs.blender2b_a[cycle] >> (3 + userdata->ShiftB)) & 0x1c; \
UINT32 sum = ((blend1a >> 2) + (blend2a >> 2) + 1) & 0xf;
#define BLEND_MUL(cycle) \
*r = (((int)(*userdata->ColorInputs.blender1a_r[cycle]) * (int)(blend1a))) + \
(((int)(*userdata->ColorInputs.blender2a_r[cycle]) * (int)(blend2a))); \
*g = (((int)(*userdata->ColorInputs.blender1a_g[cycle]) * (int)(blend1a))) + \
(((int)(*userdata->ColorInputs.blender2a_g[cycle]) * (int)(blend2a))); \
*b = (((int)(*userdata->ColorInputs.blender1a_b[cycle]) * (int)(blend1a))) + \
(((int)(*userdata->ColorInputs.blender2a_b[cycle]) * (int)(blend2a)));
#define BLEND_ADD_SPECIAL(cycle) \
*r += (((int)*userdata->ColorInputs.blender2a_r[cycle]) << 2); \
*g += (((int)*userdata->ColorInputs.blender2a_g[cycle]) << 2); \
*b += (((int)*userdata->ColorInputs.blender2a_b[cycle]) << 2);
#define BLEND_ADD(cycle) \
*r += (int)*userdata->ColorInputs.blender2a_r[cycle]; \
*g += (int)*userdata->ColorInputs.blender2a_g[cycle]; \
*b += (int)*userdata->ColorInputs.blender2a_b[cycle];
#define BLEND_SHIFT(shift) \
*r >>= shift; \
*g >>= shift; \
*b >>= shift;
#define BLEND_CLAMP() \
if (*r > 255) *r = 255; \
if (*g > 255) *g = 255; \
if (*b > 255) *b = 255;
#define BLEND_SCALE() \
if (sum) \
{ \
*r /= sum; \
*g /= sum; \
*b /= sum; \
} \
else \
{ \
*r = *g = *b = 0xff; \
}
*fr = r;
*fg = g;
*fb = b;
#define ASSIGN_OUT(cycle) \
r = *userdata->ColorInputs.blender1a_r[cycle]; \
g = *userdata->ColorInputs.blender1a_g[cycle]; \
b = *userdata->ColorInputs.blender1a_b[cycle];
bool N64BlenderT::Blend1CycleNoBlendNoACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, rdp_span_aux *userdata, const rdp_poly_state& object)
{
DitherA(&userdata->PixelColor.i.a, adseed);
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
WRITE_OUT_NB_ND(0);
return true;
}
void N64BlenderT::BlendEquationCycle0(int* r, int* g, int* b, int bsel_special, rdp_span_aux *userdata, const rdp_poly_state& object)
bool N64BlenderT::Blend1CycleNoBlendNoACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, rdp_span_aux *userdata, const rdp_poly_state& object)
{
UINT8 blend1a = *userdata->ColorInputs.blender1b_a[0] >> 3;
UINT8 blend2a = *userdata->ColorInputs.blender2b_a[0] >> 3;
INT32 r, g, b;
if (bsel_special)
{
blend1a = (blend1a >> userdata->ShiftA) & 0x1C;
blend2a = (blend2a >> userdata->ShiftB) & 0x1C;
}
DitherA(&userdata->PixelColor.i.a, adseed);
DitherA(&userdata->ShadeColor.i.a, adseed);
UINT32 sum = ((blend1a >> 2) + (blend2a >> 2) + 1) & 0xf;
TEST_REJECT();
ASSIGN_OUT(0);
DitherRGB(&r, &g, &b, dith);
WRITE_OUT();
*r = (((int)(*userdata->ColorInputs.blender1a_r[0]) * (int)(blend1a))) +
(((int)(*userdata->ColorInputs.blender2a_r[0]) * (int)(blend2a)));
*g = (((int)(*userdata->ColorInputs.blender1a_g[0]) * (int)(blend1a))) +
(((int)(*userdata->ColorInputs.blender2a_g[0]) * (int)(blend2a)));
*b = (((int)(*userdata->ColorInputs.blender1a_b[0]) * (int)(blend1a))) +
(((int)(*userdata->ColorInputs.blender2a_b[0]) * (int)(blend2a)));
if (bsel_special)
{
*r += (((int)*userdata->ColorInputs.blender2a_r[0]) << 2);
*g += (((int)*userdata->ColorInputs.blender2a_g[0]) << 2);
*b += (((int)*userdata->ColorInputs.blender2a_b[0]) << 2);
}
else
{
*r += (int)*userdata->ColorInputs.blender2a_r[0];
*g += (int)*userdata->ColorInputs.blender2a_g[0];
*b += (int)*userdata->ColorInputs.blender2a_b[0];
}
*r >>= 2;
*g >>= 2;
*b >>= 2;
if (object.OtherModes.force_blend)
{
*r >>= 3;
*g >>= 3;
*b >>= 3;
}
else
{
if (sum)
{
*r /= sum;
*g /= sum;
*b /= sum;
}
else
{
*r = *g = *b = 0xff;
}
}
if (*r > 255) *r = 255;
if (*g > 255) *g = 255;
if (*b > 255) *b = 255;
return true;
}
void N64BlenderT::BlendEquationCycle1(INT32* r, INT32* g, INT32* b, int bsel_special, rdp_span_aux *userdata, const rdp_poly_state& object)
bool N64BlenderT::Blend1CycleNoBlendACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, rdp_span_aux *userdata, const rdp_poly_state& object)
{
UINT8 blend1a = *userdata->ColorInputs.blender1b_a[1] >> 3;
UINT8 blend2a = *userdata->ColorInputs.blender2b_a[1] >> 3;
DitherA(&userdata->ShadeColor.i.a, adseed);
if (bsel_special)
{
blend1a = (blend1a >> userdata->ShiftA) & 0x1C;
blend2a = (blend2a >> userdata->ShiftB) & 0x1C;
}
TEST_REJECT();
WRITE_OUT_NB_ND(0);
UINT32 sum = ((blend1a >> 2) + (blend2a >> 2) + 1) & 0xf;
return true;
}
*r = (((int)(*userdata->ColorInputs.blender1a_r[1]) * (int)(blend1a))) +
(((int)(*userdata->ColorInputs.blender2a_r[1]) * (int)(blend2a)));
bool N64BlenderT::Blend1CycleNoBlendACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
*g = (((int)(*userdata->ColorInputs.blender1a_g[1]) * (int)(blend1a))) +
(((int)(*userdata->ColorInputs.blender2a_g[1]) * (int)(blend2a)));
DitherA(&userdata->ShadeColor.i.a, adseed);
*b = (((int)(*userdata->ColorInputs.blender1a_b[1]) * (int)(blend1a))) +
(((int)(*userdata->ColorInputs.blender2a_b[1]) * (int)(blend2a)));
TEST_REJECT();
ASSIGN_OUT(0);
DitherRGB(&r, &g, &b, dith);
WRITE_OUT();
if (bsel_special)
{
*r += (((int)*userdata->ColorInputs.blender2a_r[1]) << 2);
*g += (((int)*userdata->ColorInputs.blender2a_g[1]) << 2);
*b += (((int)*userdata->ColorInputs.blender2a_b[1]) << 2);
}
else
{
*r += (int)*userdata->ColorInputs.blender2a_r[1];
*g += (int)*userdata->ColorInputs.blender2a_g[1];
*b += (int)*userdata->ColorInputs.blender2a_b[1];
}
return true;
}
*r >>= 2;
*g >>= 2;
*b >>= 2;
bool N64BlenderT::Blend1CycleBlendNoACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
if (object.OtherModes.force_blend)
{
*r >>= 3;
*g >>= 3;
*b >>= 3;
}
else
{
if (sum)
{
*r /= sum;
*g /= sum;
*b /= sum;
}
else
{
*r = *g = *b = 0xff;
}
}
DitherA(&userdata->PixelColor.i.a, adseed);
DitherA(&userdata->ShadeColor.i.a, adseed);
if (*r > 255) *r = 255;
if (*g > 255) *g = 255;
if (*b > 255) *b = 255;
TEST_REJECT();
BLEND_CYCLE(0);
WRITE_OUT();
return true;
}
bool N64BlenderT::Blend1CycleBlendNoACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->PixelColor.i.a, adseed);
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
BLEND_CYCLE(0);
DitherRGB(&r, &g, &b, dith);
WRITE_OUT();
return true;
}
bool N64BlenderT::Blend1CycleBlendACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
BLEND_CYCLE(0);
WRITE_OUT();
return true;
}
bool N64BlenderT::Blend1CycleBlendACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
BLEND_CYCLE(0);
DitherRGB(&r, &g, &b, dith);
WRITE_OUT();
return true;
}
bool N64BlenderT::Blend2CycleNoBlendNoACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, int special_bsel1, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->PixelColor.i.a, adseed);
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[0];
((this)->*(cycle0[((object.OtherModes.force_blend & 1) << 1) | (special_bsel0 & 1)]))(&r, &g, &b, userdata, object);
WRITE_BLENDED_COLOR();
WRITE_OUT_NB_ND(1);
return true;
}
bool N64BlenderT::Blend2CycleNoBlendNoACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, int special_bsel1, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->PixelColor.i.a, adseed);
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[0];
((this)->*(cycle0[((object.OtherModes.force_blend & 1) << 1) | (special_bsel0 & 1)]))(&r, &g, &b, userdata, object);
WRITE_BLENDED_COLOR();
ASSIGN_OUT(1);
DitherRGB(&r, &g, &b, dith);
WRITE_OUT();
return true;
}
bool N64BlenderT::Blend2CycleNoBlendACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, int special_bsel1, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[0];
((this)->*(cycle0[((object.OtherModes.force_blend & 1) << 1) | (special_bsel0 & 1)]))(&r, &g, &b, userdata, object);
WRITE_BLENDED_COLOR();
WRITE_OUT_NB_ND(1);
return true;
}
bool N64BlenderT::Blend2CycleNoBlendACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, int special_bsel1, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[0];
((this)->*(cycle0[((object.OtherModes.force_blend & 1) << 1) | (special_bsel0 & 1)]))(&r, &g, &b, userdata, object);
WRITE_BLENDED_COLOR();
ASSIGN_OUT(1);
DitherRGB(&r, &g, &b, dith);
WRITE_OUT();
return true;
}
bool N64BlenderT::Blend2CycleBlendNoACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, int special_bsel1, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->PixelColor.i.a, adseed);
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[0];
((this)->*(cycle0[((object.OtherModes.force_blend & 1) << 1) | (special_bsel0 & 1)]))(&r, &g, &b, userdata, object);
WRITE_BLENDED_COLOR();
BLEND_CYCLE(1);
WRITE_OUT();
return true;
}
bool N64BlenderT::Blend2CycleBlendNoACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, int special_bsel1, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->PixelColor.i.a, adseed);
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[0];
((this)->*(cycle0[((object.OtherModes.force_blend & 1) << 1) | (special_bsel0 & 1)]))(&r, &g, &b, userdata, object);
WRITE_BLENDED_COLOR();
BLEND_CYCLE(1);
DitherRGB(&r, &g, &b, dith);
WRITE_OUT();
return true;
}
bool N64BlenderT::Blend2CycleBlendACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, int special_bsel1, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[0];
((this)->*(cycle0[((object.OtherModes.force_blend & 1) << 1) | (special_bsel0 & 1)]))(&r, &g, &b, userdata, object);
WRITE_BLENDED_COLOR();
BLEND_CYCLE(1);
WRITE_OUT();
return true;
}
bool N64BlenderT::Blend2CycleBlendACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel0, int special_bsel1, rdp_span_aux *userdata, const rdp_poly_state& object)
{
INT32 r, g, b;
DitherA(&userdata->ShadeColor.i.a, adseed);
TEST_REJECT();
userdata->InvPixelColor.i.a = 0xff - *userdata->ColorInputs.blender1b_a[0];
((this)->*(cycle0[((object.OtherModes.force_blend & 1) << 1) | (special_bsel0 & 1)]))(&r, &g, &b, userdata, object);
WRITE_BLENDED_COLOR();
BLEND_CYCLE(1);
DitherRGB(&r, &g, &b, dith);
WRITE_OUT();
return true;
}
void N64BlenderT::BlendEquationCycle0NoForceNoSpecial(int* r, int* g, int* b, rdp_span_aux *userdata, const rdp_poly_state& object)
{
BLEND_FACTORS_SUM(0);
BLEND_MUL(0);
BLEND_ADD(0);
BLEND_SHIFT(2);
BLEND_SCALE();
BLEND_CLAMP();
}
void N64BlenderT::BlendEquationCycle0NoForceSpecial(int* r, int* g, int* b, rdp_span_aux *userdata, const rdp_poly_state& object)
{
BLEND_FACTORS_SPECIAL_SUM(0);
BLEND_MUL(0);
BLEND_ADD_SPECIAL(0);
BLEND_SHIFT(2);
BLEND_SCALE();
BLEND_CLAMP();
}
void N64BlenderT::BlendEquationCycle0ForceNoSpecial(int* r, int* g, int* b, rdp_span_aux *userdata, const rdp_poly_state& object)
{
BLEND_FACTORS(0);
BLEND_MUL(0);
BLEND_ADD(0);
BLEND_SHIFT(5);
BLEND_CLAMP();
}
void N64BlenderT::BlendEquationCycle0ForceSpecial(int* r, int* g, int* b, rdp_span_aux *userdata, const rdp_poly_state& object)
{
BLEND_FACTORS_SPECIAL(0);
BLEND_MUL(0);
BLEND_ADD_SPECIAL(0);
BLEND_SHIFT(5);
BLEND_CLAMP();
}
void N64BlenderT::BlendEquationCycle1NoForceNoSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object)
{
BLEND_FACTORS_SUM(1);
BLEND_MUL(1);
BLEND_ADD(1);
BLEND_SHIFT(2);
BLEND_SCALE();
BLEND_CLAMP();
}
void N64BlenderT::BlendEquationCycle1NoForceSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object)
{
BLEND_FACTORS_SPECIAL_SUM(1);
BLEND_MUL(1);
BLEND_ADD_SPECIAL(1);
BLEND_SHIFT(2);
BLEND_SCALE();
BLEND_CLAMP();
}
void N64BlenderT::BlendEquationCycle1ForceNoSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object)
{
BLEND_FACTORS(1);
BLEND_MUL(1);
BLEND_ADD(1);
BLEND_SHIFT(5);
BLEND_CLAMP();
}
void N64BlenderT::BlendEquationCycle1ForceSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object)
{
BLEND_FACTORS_SPECIAL(1);
BLEND_MUL(1);
BLEND_ADD_SPECIAL(1);
BLEND_SHIFT(5);
BLEND_CLAMP();
}
bool N64BlenderT::AlphaCompare(UINT8 alpha, const rdp_span_aux *userdata, const rdp_poly_state& object)

View File

@ -13,12 +13,13 @@ struct rdp_poly_state;
class N64BlenderT
{
public:
N64BlenderT()
{
}
typedef bool (N64BlenderT::*Blender1)(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object);
typedef bool (N64BlenderT::*Blender2)(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int bsel0, int bsel1, rdp_span_aux *userdata, const rdp_poly_state& object);
typedef void (N64BlenderT::*BlendEquation)(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object);
N64BlenderT();
bool Blend2Cycle(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int bsel0, int bsel1, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend1Cycle(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object);
Blender1 blend1[8];
Blender2 blend2[8];
void SetMachine(running_machine& machine) { m_machine = &machine; }
void SetProcessor(n64_rdp* rdp) { m_rdp = rdp; }
@ -29,8 +30,36 @@ class N64BlenderT
running_machine* m_machine;
n64_rdp* m_rdp;
void BlendEquationCycle0(INT32* r, INT32* g, INT32* b, int bsel_special, rdp_span_aux *userdata, const rdp_poly_state& object);
void BlendEquationCycle1(INT32* r, INT32* g, INT32* b, int bsel_special, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend1CycleNoBlendNoACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend1CycleNoBlendNoACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend1CycleNoBlendACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend1CycleNoBlendACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend1CycleBlendNoACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend1CycleBlendNoACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend1CycleBlendACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend1CycleBlendACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int special_bsel, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend2CycleNoBlendNoACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int bsel0, int bsel1, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend2CycleNoBlendNoACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int bsel0, int bsel1, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend2CycleNoBlendACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int bsel0, int bsel1, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend2CycleNoBlendACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int bsel0, int bsel1, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend2CycleBlendNoACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int bsel0, int bsel1, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend2CycleBlendNoACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int bsel0, int bsel1, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend2CycleBlendACVGNoDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int bsel0, int bsel1, rdp_span_aux *userdata, const rdp_poly_state& object);
bool Blend2CycleBlendACVGDither(UINT32* fr, UINT32* fg, UINT32* fb, int dith, int adseed, int partialreject, int bsel0, int bsel1, rdp_span_aux *userdata, const rdp_poly_state& object);
void BlendEquationCycle0NoForceNoSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object);
void BlendEquationCycle0NoForceSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object);
void BlendEquationCycle0ForceNoSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object);
void BlendEquationCycle0ForceSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object);
void BlendEquationCycle1NoForceNoSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object);
void BlendEquationCycle1NoForceSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object);
void BlendEquationCycle1ForceNoSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object);
void BlendEquationCycle1ForceSpecial(INT32* r, INT32* g, INT32* b, rdp_span_aux *userdata, const rdp_poly_state& object);
BlendEquation cycle0[4];
BlendEquation cycle1[4];
bool AlphaCompare(UINT8 alpha, const rdp_span_aux *userdata, const rdp_poly_state& object);

View File

@ -50,6 +50,7 @@ void n64_rdp::RenderSpans(int start, int end, int tilenum, bool flip, extent_t *
case CYCLE_TYPE_1:
render_triangle_custom(visarea, render_delegate(FUNC(n64_rdp::SpanDraw1Cycle), this), start, (end - start) + 1, Spans + offset);
break;
case CYCLE_TYPE_2:
render_triangle_custom(visarea, render_delegate(FUNC(n64_rdp::SpanDraw2Cycle), this), start, (end - start) + 1, Spans + offset);
break;
@ -148,16 +149,28 @@ void n64_rdp::SpanDraw1Cycle(INT32 scanline, const extent_t &extent, const rdp_p
bool partialreject = (userdata->ColorInputs.blender2b_a[0] == &userdata->InvPixelColor.i.a && userdata->ColorInputs.blender1b_a[0] == &userdata->PixelColor.i.a);
bool bsel0 = (userdata->ColorInputs.blender2b_a[0] == &userdata->MemoryColor.i.a);
int drinc = flip ? (object.SpanBase.m_span_dr) : -object.SpanBase.m_span_dr;
int dginc = flip ? (object.SpanBase.m_span_dg) : -object.SpanBase.m_span_dg;
int dbinc = flip ? (object.SpanBase.m_span_db) : -object.SpanBase.m_span_db;
int dainc = flip ? (object.SpanBase.m_span_da) : -object.SpanBase.m_span_da;
int dzinc = flip ? (object.SpanBase.m_span_dz) : -object.SpanBase.m_span_dz;
int dsinc = flip ? (object.SpanBase.m_span_ds) : -object.SpanBase.m_span_ds;
int dtinc = flip ? (object.SpanBase.m_span_dt) : -object.SpanBase.m_span_dt;
int dwinc = flip ? (object.SpanBase.m_span_dw) : -object.SpanBase.m_span_dw;
int drinc = object.SpanBase.m_span_dr;
int dginc = object.SpanBase.m_span_dg;
int dbinc = object.SpanBase.m_span_db;
int dainc = object.SpanBase.m_span_da;
int dzinc = object.SpanBase.m_span_dz;
int dsinc = object.SpanBase.m_span_ds;
int dtinc = object.SpanBase.m_span_dt;
int dwinc = object.SpanBase.m_span_dw;
int xinc = 1;
if (!flip)
{
drinc = -drinc;
dginc = -dginc;
dbinc = -dbinc;
dainc = -dainc;
dzinc = -dzinc;
dsinc = -dsinc;
dtinc = -dtinc;
dwinc = -dwinc;
xinc = -xinc;
}
int dzpix = object.SpanBase.m_span_dzpix;
int xinc = flip ? 1 : -1;
int fb_index = object.MiscState.FBWidth * scanline;
@ -180,6 +193,9 @@ void n64_rdp::SpanDraw1Cycle(INT32 scanline, const extent_t &extent, const rdp_p
dzinc = 0;
}
int blend_index = (object.OtherModes.alpha_cvg_select ? 2 : 0) | ((object.OtherModes.rgb_dither_sel < 3) ? 1 : 0);
int read_index = ((object.MiscState.FBSize - 2) << 1) | object.OtherModes.image_read_en;
int write_index = ((object.MiscState.FBSize - 2) << 3) | (object.OtherModes.cvg_dest << 1);
userdata->m_start_span = true;
for (int j = 0; j <= length; j++)
{
@ -233,17 +249,17 @@ void n64_rdp::SpanDraw1Cycle(INT32 scanline, const extent_t &extent, const rdp_p
UINT32 zbcur = zb + curpixel;
UINT32 zhbcur = zhb + curpixel;
((this)->*(_Read[((object.MiscState.FBSize - 2) << 1) | object.OtherModes.image_read_en]))(curpixel, userdata, object);
((this)->*(_Read[read_index]))(curpixel, userdata, object);
if(m_rdp->ZCompare(zbcur, zhbcur, sz, dzpix, userdata, object))
{
m_rdp->GetDitherValues(scanline, j, &cdith, &adith, object);
bool rendered = m_rdp->Blender.Blend1Cycle(&fir, &fig, &fib, cdith, adith, partialreject, bsel0, userdata, object);
bool rendered = ((&m_rdp->Blender)->*(m_rdp->Blender.blend1[(userdata->BlendEnable << 2) | blend_index]))(&fir, &fig, &fib, cdith, adith, partialreject, bsel0, userdata, object);
if (rendered)
{
((this)->*(_Write[((object.MiscState.FBSize - 2) << 3) | (object.OtherModes.cvg_dest << 1) | userdata->BlendEnable]))(curpixel, fir, fig, fib, userdata, object);
((this)->*(_Write[write_index | userdata->BlendEnable]))(curpixel, fir, fig, fib, userdata, object);
if (object.OtherModes.z_update_en)
{
@ -337,6 +353,9 @@ void n64_rdp::SpanDraw2Cycle(INT32 scanline, const extent_t &extent, const rdp_p
dzinc = 0;
}
int blend_index = (object.OtherModes.alpha_cvg_select ? 2 : 0) | ((object.OtherModes.rgb_dither_sel < 3) ? 1 : 0);
int read_index = ((object.MiscState.FBSize - 2) << 1) | object.OtherModes.image_read_en;
int write_index = ((object.MiscState.FBSize - 2) << 3) | (object.OtherModes.cvg_dest << 1);
userdata->m_start_span = true;
for (int j = 0; j <= length; j++)
{
@ -399,17 +418,17 @@ void n64_rdp::SpanDraw2Cycle(INT32 scanline, const extent_t &extent, const rdp_p
UINT32 zbcur = zb + curpixel;
UINT32 zhbcur = zhb + curpixel;
((this)->*(_Read[((object.MiscState.FBSize - 2) << 1) | object.OtherModes.image_read_en]))(curpixel, userdata, object);
((this)->*(_Read[read_index]))(curpixel, userdata, object);
if(m_rdp->ZCompare(zbcur, zhbcur, sz, dzpix, userdata, object))
{
m_rdp->GetDitherValues(scanline, j, &cdith, &adith, object);
bool rendered = m_rdp->Blender.Blend2Cycle(&fir, &fig, &fib, cdith, adith, partialreject, bsel0, bsel1, userdata, object);
bool rendered = ((&m_rdp->Blender)->*(m_rdp->Blender.blend2[(userdata->BlendEnable << 2) | blend_index]))(&fir, &fig, &fib, cdith, adith, partialreject, bsel0, bsel1, userdata, object);
if (rendered)
{
((this)->*(_Write[((object.MiscState.FBSize - 2) << 3) | (object.OtherModes.cvg_dest << 1) | userdata->BlendEnable]))(curpixel, fir, fig, fib, userdata, object);
((this)->*(_Write[write_index | userdata->BlendEnable]))(curpixel, fir, fig, fib, userdata, object);
if (object.OtherModes.z_update_en)
{
m_rdp->ZStore(zbcur, zhbcur, sz, userdata->m_dzpix_enc);