nw, fix most N64 visual bugs from new RDP SSE code.

This commit is contained in:
therealmogminer@gmail.com 2015-07-11 15:26:47 +02:00
parent df8a949c00
commit e815452f37
8 changed files with 109 additions and 75 deletions

View File

@ -123,42 +123,42 @@ public:
m_b = value;
}
inline UINT8 get_a()
inline UINT8 get_a() const
{
return m_a;
}
inline UINT8 get_r()
inline UINT8 get_r() const
{
return m_r;
}
inline UINT8 get_g()
inline UINT8 get_g() const
{
return m_g;
}
inline UINT8 get_b()
inline UINT8 get_b() const
{
return m_b;
}
inline INT32 get_a32()
inline INT32 get_a32() const
{
return m_a;
}
inline INT32 get_r32()
inline INT32 get_r32() const
{
return m_r;
}
inline INT32 get_g32()
inline INT32 get_g32() const
{
return m_g;
}
inline INT32 get_b32()
inline INT32 get_b32() const
{
return m_b;
}

View File

@ -108,42 +108,42 @@ public:
m_value = _mm_or_si128(_mm_and_si128(m_value, blue_mask()), _mm_set_epi32(0, 0, 0, value));
}
inline UINT8 get_a()
inline UINT8 get_a() const
{
return _mm_extract_epi16(m_value, 6);
}
inline UINT8 get_r()
inline UINT8 get_r() const
{
return _mm_extract_epi16(m_value, 4);
}
inline UINT8 get_g()
inline UINT8 get_g() const
{
return _mm_extract_epi16(m_value, 2);
}
inline UINT8 get_b()
inline UINT8 get_b() const
{
return _mm_extract_epi16(m_value, 0);
}
inline INT32 get_a32()
inline INT32 get_a32() const
{
return (_mm_extract_epi16(m_value, 7) << 16) | _mm_extract_epi16(m_value, 6);
}
inline INT32 get_r32()
inline INT32 get_r32() const
{
return (_mm_extract_epi16(m_value, 5) << 16) | _mm_extract_epi16(m_value, 4);
}
inline INT32 get_g32()
inline INT32 get_g32() const
{
return (_mm_extract_epi16(m_value, 3) << 16) | _mm_extract_epi16(m_value, 2);
}
inline INT32 get_b32()
inline INT32 get_b32() const
{
return (_mm_extract_epi16(m_value, 1) << 16) | _mm_extract_epi16(m_value, 0);
}
@ -173,7 +173,19 @@ public:
inline void shl(const rgbaint_t& shift)
{
m_value = _mm_sll_epi32(m_value, shift.m_value);
rgbaint_t areg(*this);
rgbaint_t rreg(*this);
rgbaint_t greg(*this);
rgbaint_t breg(*this);
rgbaint_t ashift(0, 0, 0, shift.get_a32());
rgbaint_t rshift(0, 0, 0, shift.get_r32());
rgbaint_t gshift(0, 0, 0, shift.get_g32());
rgbaint_t bshift(0, 0, 0, shift.get_b32());
areg.m_value = _mm_sll_epi32(areg.m_value, ashift.m_value);
rreg.m_value = _mm_sll_epi32(rreg.m_value, rshift.m_value);
greg.m_value = _mm_sll_epi32(greg.m_value, gshift.m_value);
breg.m_value = _mm_sll_epi32(breg.m_value, bshift.m_value);
set(areg.get_a32(), rreg.get_r32(), greg.get_g32(), breg.get_b32());
}
inline void shl_imm(const UINT8 shift)
@ -183,7 +195,19 @@ public:
inline void shr(const rgbaint_t& shift)
{
m_value = _mm_srl_epi32(m_value, shift.m_value);
rgbaint_t areg(*this);
rgbaint_t rreg(*this);
rgbaint_t greg(*this);
rgbaint_t breg(*this);
rgbaint_t ashift(0, 0, 0, shift.get_a32());
rgbaint_t rshift(0, 0, 0, shift.get_r32());
rgbaint_t gshift(0, 0, 0, shift.get_g32());
rgbaint_t bshift(0, 0, 0, shift.get_b32());
areg.m_value = _mm_srl_epi32(areg.m_value, ashift.m_value);
rreg.m_value = _mm_srl_epi32(rreg.m_value, rshift.m_value);
greg.m_value = _mm_srl_epi32(greg.m_value, gshift.m_value);
breg.m_value = _mm_srl_epi32(breg.m_value, bshift.m_value);
set(areg.get_a32(), rreg.get_r32(), greg.get_g32(), breg.get_b32());
}
inline void shr_imm(const UINT8 shift)
@ -193,7 +217,19 @@ public:
inline void sra(const rgbaint_t& shift)
{
m_value = _mm_sra_epi32(m_value, shift.m_value);
rgbaint_t areg(*this);
rgbaint_t rreg(*this);
rgbaint_t greg(*this);
rgbaint_t breg(*this);
rgbaint_t ashift(0, 0, 0, shift.get_a32());
rgbaint_t rshift(0, 0, 0, shift.get_r32());
rgbaint_t gshift(0, 0, 0, shift.get_g32());
rgbaint_t bshift(0, 0, 0, shift.get_b32());
areg.m_value = _mm_sra_epi32(areg.m_value, ashift.m_value);
rreg.m_value = _mm_sra_epi32(rreg.m_value, rshift.m_value);
greg.m_value = _mm_sra_epi32(greg.m_value, gshift.m_value);
breg.m_value = _mm_sra_epi32(breg.m_value, bshift.m_value);
set(areg.get_a32(), rreg.get_r32(), greg.get_g32(), breg.get_b32());
}
inline void sra_imm(const UINT8 shift)

View File

@ -140,56 +140,56 @@ public:
m_value = vec_perm(m_value, temp, blue_perm);
}
inline UINT8 get_a()
inline UINT8 get_a() const
{
UINT8 result;
vec_ste(vec_splat((VECU8)m_value, 3), 0, &result);
return result;
}
inline UINT8 get_r()
inline UINT8 get_r() const
{
UINT8 result;
vec_ste(vec_splat((VECU8)m_value, 7), 0, &result);
return result;
}
inline UINT8 get_g()
inline UINT8 get_g() const
{
UINT8 result;
vec_ste(vec_splat((VECU8)m_value, 11), 0, &result);
return result;
}
inline UINT8 get_b()
inline UINT8 get_b() const
{
UINT8 result;
vec_ste(vec_splat((VECU8)m_value, 15), 0, &result);
return result;
}
inline INT32 get_a32()
inline INT32 get_a32() const
{
INT32 result;
vec_ste(vec_splat(m_value, 0), 0, &result);
return result;
}
inline INT32 get_r32()
inline INT32 get_r32() const
{
INT32 result;
vec_ste(vec_splat(m_value, 1), 0, &result);
return result;
}
inline INT32 get_g32()
inline INT32 get_g32() const
{
INT32 result;
vec_ste(vec_splat(m_value, 2), 0, &result);
return result;
}
inline INT32 get_b32()
inline INT32 get_b32() const
{
INT32 result;
vec_ste(vec_splat(m_value, 3), 0, &result);

View File

@ -2660,6 +2660,7 @@ void n64_rdp::cmd_load_tile(UINT32 w1, UINT32 w2)
}
topad = 0; // ????
*/
switch (m_misc_state.m_ti_size)
{
case PIXEL_SIZE_8BIT:
@ -2755,7 +2756,6 @@ void n64_rdp::cmd_load_tile(UINT32 w1, UINT32 w2)
void n64_rdp::cmd_set_tile(UINT32 w1, UINT32 w2)
{
//wait("SetTile");
const INT32 tilenum = (w2 >> 24) & 0x7;
n64_tile_t* tex_tile = &m_tiles[tilenum];
@ -2784,7 +2784,8 @@ void n64_rdp::cmd_set_tile(UINT32 w1, UINT32 w2)
tex_tile->clamp_t = tex_tile->ct || !tex_tile->mask_t;
tex_tile->mm = rgbaint_t(tex_tile->ms ? ~0 : 0, tex_tile->ms ? ~0 : 0, tex_tile->mt ? ~0 : 0, tex_tile->mt ? ~0 : 0);
tex_tile->invmm = rgbaint_t(tex_tile->ms ? 0 : ~0, tex_tile->ms ? 0 : ~0, tex_tile->mt ? 0 : ~0, tex_tile->mt ? 0 : ~0);
tex_tile->mask = rgbaint_t(tex_tile->mask_s, tex_tile->mask_s, tex_tile->mask_t, tex_tile->mask_t);
tex_tile->mask = rgbaint_t(tex_tile->mask_s ? ~0 : 0, tex_tile->mask_s ? ~0 : 0, tex_tile->mask_t ? ~0 : 0, tex_tile->mask_t ? ~0 : 0);
tex_tile->invmask = rgbaint_t(tex_tile->mask_s ? 0 : ~0, tex_tile->mask_s ? 0 : ~0, tex_tile->mask_t ? 0 : ~0, tex_tile->mask_t ? 0 : ~0);
tex_tile->lshift = rgbaint_t(tex_tile->lshift_s, tex_tile->lshift_s, tex_tile->lshift_t, tex_tile->lshift_t);
tex_tile->rshift = rgbaint_t(tex_tile->rshift_s, tex_tile->rshift_s, tex_tile->rshift_t, tex_tile->rshift_t);
tex_tile->clamp_st = rgbaint_t(tex_tile->clamp_s ? ~0 : 0, tex_tile->clamp_s ? ~0 : 0, tex_tile->clamp_t ? ~0 : 0, tex_tile->clamp_t ? ~0 : 0);
@ -2809,7 +2810,6 @@ void n64_rdp::cmd_set_tile(UINT32 w1, UINT32 w2)
void n64_rdp::cmd_fill_rect(UINT32 w1, UINT32 w2)
{
//if(m_pending_mode_block) { wait("Block on pending mode-change"); m_pending_mode_block = false; }
const UINT32 xh = (w2 >> 12) & 0xfff;
const UINT32 xl = (w1 >> 12) & 0xfff;
const UINT32 yh = (w2 >> 0) & 0xfff;
@ -3041,6 +3041,8 @@ void n64_rdp::process_command_list()
n64_rdp::n64_rdp(n64_state &state) : poly_manager<UINT32, rdp_poly_state, 8, 32000>(state.machine())
{
ignore = false;
dolog = false;
m_aux_buf_ptr = 0;
m_aux_buf = NULL;
m_pipe_clean = true;
@ -3055,11 +3057,6 @@ n64_rdp::n64_rdp(n64_state &state) : poly_manager<UINT32, rdp_poly_state, 8, 320
m_current = 0;
m_status = 0x88;
for (INT32 i = 0; i < 8; i++)
{
m_tiles[i].num = i;
}
m_one.set(0xff, 0xff, 0xff, 0xff);
m_zero.set(0, 0, 0, 0);

View File

@ -153,6 +153,13 @@ public:
memset(m_tiles, 0, 8 * sizeof(n64_tile_t));
memset(m_cmd_data, 0, sizeof(m_cmd_data));
for (INT32 i = 0; i < 8; i++)
{
m_tiles[i].num = i;
m_tiles[i].invmm = rgbaint_t(~0, ~0, ~0, ~0);
m_tiles[i].invmask = rgbaint_t(~0, ~0, ~0, ~0);
}
}
void process_command_list();
@ -381,6 +388,10 @@ private:
static const INT32 s_rdp_command_length[];
static const char* s_image_format[];
static const char* s_image_size[];
public:
bool ignore;
bool dolog;
};
#endif // _VIDEO_N64_H_

View File

@ -121,7 +121,14 @@ struct n64_tile_t
INT32 wrapped_mask_s, wrapped_mask_t;
bool clamp_s, clamp_t;
rgbaint_t mm, invmm;
rgbaint_t wrapped_mask, mask, lshift, rshift, sth, stl, clamp_st;
rgbaint_t wrapped_mask;
rgbaint_t mask;
rgbaint_t invmask;
rgbaint_t lshift;
rgbaint_t rshift;
rgbaint_t sth;
rgbaint_t stl;
rgbaint_t clamp_st;
UINT16 sl, tl, sh, th; // 10.2 fixed-point, starting and ending texel row / column
INT32 num;
};

View File

@ -21,8 +21,6 @@
#define RELATIVE(x, y) ((((x) >> 3) - (y)) << 3) | (x & 7);
#define USE_SIMD (1)
void n64_texture_pipe_t::set_machine(running_machine &machine)
{
n64_state* state = machine.driver_data<n64_state>();
@ -65,34 +63,24 @@ void n64_texture_pipe_t::set_machine(running_machine &machine)
m_v1.set(1, 1, 1, 1);
}
void n64_texture_pipe_t::mask(rgbaint_t& st, const n64_tile_t& tile)
void n64_texture_pipe_t::mask(rgbaint_t& sstt, const n64_tile_t& tile)
{
UINT32 s_mask_bits = m_maskbits_table[tile.mask_s];
UINT32 t_mask_bits = m_maskbits_table[tile.mask_t];
rgbaint_t maskbits(s_mask_bits, s_mask_bits, t_mask_bits, t_mask_bits);
rgbaint_t wrap(st);
wrap.sra(tile.wrapped_mask);
wrap.and_reg(m_v1);
wrap.cmpeq(m_v1);
wrap.and_reg(tile.mm);
st.xor_reg(wrap);
st.and_reg(maskbits);
}
rgbaint_t do_wrap(sstt);
do_wrap.sra(tile.wrapped_mask);
do_wrap.and_reg(m_v1);
do_wrap.cmpeq(m_v1);
do_wrap.and_reg(tile.mm);
void n64_texture_pipe_t::mask_coupled(rgbaint_t& sstt, const n64_tile_t& tile)
{
UINT32 s_mask_bits = m_maskbits_table[tile.mask_s];
UINT32 t_mask_bits = m_maskbits_table[tile.mask_t];
rgbaint_t maskbits(s_mask_bits, s_mask_bits, t_mask_bits, t_mask_bits);
rgbaint_t wrap(sstt);
wrap.sra(tile.wrapped_mask);
wrap.and_reg(m_v1);
wrap.cmpeq(m_v1);
wrap.and_reg(tile.mm);
sstt.xor_reg(wrap);
sstt.and_reg(maskbits);
rgbaint_t wrapped(sstt);
wrapped.xor_reg(do_wrap);
wrapped.and_reg(maskbits);
wrapped.and_reg(tile.mask);
sstt.and_reg(tile.invmask);
sstt.or_reg(wrapped);
}
rgbaint_t n64_texture_pipe_t::shift_cycle(rgbaint_t& st, const n64_tile_t& tile)
@ -250,7 +238,7 @@ void n64_texture_pipe_t::cycle_linear(color_t* TEX, color_t* prev, INT32 SSS, IN
clamp_cycle(st, stfrac, maxst, tilenum, tile, userdata);
mask_coupled(st, tile);
mask(st, tile);
const UINT32 tbase = tile.tmem + ((tile.line * st.get_b32()) & 0x1ff);
@ -307,7 +295,7 @@ void n64_texture_pipe_t::cycle_linear_lerp(color_t* TEX, color_t* prev, INT32 SS
sstt.add(m_st2_add);
mask_coupled(sstt, tile);
mask(sstt, tile);
const UINT32 tbase1 = tile.tmem + ((tile.line * sstt.get_b32()) & 0x1ff);
const UINT32 tbase2 = tile.tmem + ((tile.line * sstt.get_g32()) & 0x1ff);
@ -321,10 +309,6 @@ void n64_texture_pipe_t::cycle_linear_lerp(color_t* TEX, color_t* prev, INT32 SS
invstf.subr_imm(0x20);
invstf.shl_imm(3);
}
else
{
invstf.set(0, 0, 0, 0);
}
stfrac.shl_imm(3);
@ -670,20 +654,20 @@ void n64_texture_pipe_t::calculate_clamp_diffs(UINT32 prim_tile, rdp_span_aux* u
{
for (INT32 start = 0; start <= 7; start++)
{
userdata->m_clamp_diff[start].set(0, (tiles[start].sh >> 2) - (tiles[start].sl >> 2), 0, (tiles[start].th >> 2) - (tiles[start].tl >> 2));
userdata->m_clamp_diff[start].set((tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2));
}
}
else
{
const INT32 start = prim_tile;
const INT32 end = (prim_tile + 1) & 7;
userdata->m_clamp_diff[start].set(0, (tiles[start].sh >> 2) - (tiles[start].sl >> 2), 0, (tiles[start].th >> 2) - (tiles[start].tl >> 2));
userdata->m_clamp_diff[end].set(0, (tiles[end].sh >> 2) - (tiles[end].sl >> 2), 0, (tiles[end].th >> 2) - (tiles[end].tl >> 2));
userdata->m_clamp_diff[start].set((tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2));
userdata->m_clamp_diff[end].set((tiles[end].sh >> 2) - (tiles[end].sl >> 2), (tiles[end].sh >> 2) - (tiles[end].sl >> 2), (tiles[end].th >> 2) - (tiles[end].tl >> 2), (tiles[end].th >> 2) - (tiles[end].tl >> 2));
}
}
else//1-cycle or copy
{
userdata->m_clamp_diff[prim_tile].set(0, (tiles[prim_tile].sh >> 2) - (tiles[prim_tile].sl >> 2), 0, (tiles[prim_tile].th >> 2) - (tiles[prim_tile].tl >> 2));
userdata->m_clamp_diff[prim_tile].set((tiles[prim_tile].sh >> 2) - (tiles[prim_tile].sl >> 2), (tiles[prim_tile].sh >> 2) - (tiles[prim_tile].sl >> 2), (tiles[prim_tile].th >> 2) - (tiles[prim_tile].tl >> 2), (tiles[prim_tile].th >> 2) - (tiles[prim_tile].tl >> 2));
}
}
@ -786,7 +770,7 @@ void n64_texture_pipe_t::fetch_yuv(rgbaint_t& out, INT32 s, INT32 t, INT32 tbase
u |= ((u & 0x80) << 1);
v |= ((v & 0x80) << 1);
out.set(y, y, u, v);
out.set(y & 0xff, y & 0xff, u & 0xff, v & 0xff);
}
void n64_texture_pipe_t::fetch_ci4_tlut0(rgbaint_t& out, INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata)
@ -938,7 +922,7 @@ void n64_texture_pipe_t::fetch_ia8_raw(rgbaint_t& out, INT32 s, INT32 t, INT32 t
UINT8 i = p & 0xf0;
i |= (i >> 4);
out.set((p << 4) | (p & 0xf), i, i, i);
out.set(((p << 4) | (p & 0xf)) & 0xff, i, i, i);
}
void n64_texture_pipe_t::fetch_ia16_tlut0(rgbaint_t& out, INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata)

View File

@ -27,7 +27,7 @@ class n64_texture_pipe_t
n64_texture_pipe_t()
{
m_maskbits_table[0] = 0x3ff;
m_maskbits_table[0] = 0xffff;
for(int i = 1; i < 16; i++)
{
m_maskbits_table[i] = ((UINT16)(0xffff) >> (16 - i)) & 0x3ff;
@ -108,8 +108,7 @@ class n64_texture_pipe_t
bool m_start_span;
private:
void mask(rgbaint_t& st, const n64_tile_t& tile);
void mask_coupled(rgbaint_t& sstt, const n64_tile_t& tile);
void mask(rgbaint_t& sstt, const n64_tile_t& tile);
rgbaint_t shift_cycle(rgbaint_t& st, const n64_tile_t& tile);
void shift_copy(rgbaint_t& st, const n64_tile_t& tile);