nw, Fix CTC2 and VCH opcodes in RSP SIMD code.

This commit is contained in:
therealmogminer@gmail.com 2015-07-10 04:09:34 +02:00
parent 35d960d153
commit 7c78f67d66
2 changed files with 30 additions and 15 deletions

View File

@ -207,9 +207,6 @@ const rsp_cop2::vec_helpers_t rsp_cop2::m_vec_helpers = {
{ 0xffff, 0xffff, 0xffff, 0xffff } // D
},
{ // word_reverse
0x0706, 0x0504, 0x0302, 0x0100, 0x0f0e, 0x0d0c, 0x0b0a, 0x0908
},
{ // byte_reverse
0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d
}
};
@ -539,9 +536,9 @@ void rsp_cop2::vec_store_group2(UINT32 addr, UINT32 element, UINT16 *regp, rsp_v
reg = _mm_packs_epi16(reg, reg);
#if !(defined(__SSSE3__) || defined(_MSC_VER))
reg = sse2_pshufb(reg, m_vec_helpers.byte_reverse);
reg = sse2_pshufb(reg, m_vec_helpers.word_reverse);
#else
rsp_vec_t dkey = _mm_load_si128((rsp_vec_t *) (m_vec_helpers.byte_reverse));
rsp_vec_t dkey = _mm_load_si128((rsp_vec_t *) (m_vec_helpers.word_reverse));
reg = _mm_shuffle_epi8(reg, dkey);
#endif
@ -3882,9 +3879,32 @@ void rsp_cop2::handle_cop2(UINT32 op)
// | 010010 | 00110 | TTTTT | DDDDD | 00000000000 |
// ------------------------------------------------
//
//printf("CTC2 ");
switch(RDREG)
{
#if USE_SIMD
case 0:
case 1:
case 2:
UINT16 r0 = (RTVAL & (1 << 0)) ? 0xffff : 0;
UINT16 r1 = (RTVAL & (1 << 1)) ? 0xffff : 0;
UINT16 r2 = (RTVAL & (1 << 2)) ? 0xffff : 0;
UINT16 r3 = (RTVAL & (1 << 3)) ? 0xffff : 0;
UINT16 r4 = (RTVAL & (1 << 4)) ? 0xffff : 0;
UINT16 r5 = (RTVAL & (1 << 5)) ? 0xffff : 0;
UINT16 r6 = (RTVAL & (1 << 6)) ? 0xffff : 0;
UINT16 r7 = (RTVAL & (1 << 7)) ? 0xffff : 0;
m_flags[RDREG].__align[0] = _mm_set_epi16(r7, r6, r5, r4, r3, r2, r1, r0);
r0 = (RTVAL & (1 << 8)) ? 0xffff : 0;
r1 = (RTVAL & (1 << 9)) ? 0xffff : 0;
r2 = (RTVAL & (1 << 10)) ? 0xffff : 0;
r3 = (RTVAL & (1 << 11)) ? 0xffff : 0;
r4 = (RTVAL & (1 << 12)) ? 0xffff : 0;
r5 = (RTVAL & (1 << 13)) ? 0xffff : 0;
r6 = (RTVAL & (1 << 14)) ? 0xffff : 0;
r7 = (RTVAL & (1 << 15)) ? 0xffff : 0;
m_flags[RDREG].__align[1] = _mm_set_epi16(r7, r6, r5, r4, r3, r2, r1, r0);
break;
#else
case 0:
CLEAR_CARRY_FLAGS();
CLEAR_ZERO_FLAGS();
@ -3938,6 +3958,7 @@ void rsp_cop2::handle_cop2(UINT32 op)
if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); }
if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); }
break;
#endif
}
break;
}

View File

@ -252,7 +252,6 @@ protected:
const UINT16 qr_lut[16][8];
const UINT16 bdls_lut[4][4];
const UINT16 word_reverse[8];
const UINT16 byte_reverse[8];
} vec_helpers_t;
static const vec_helpers_t m_vec_helpers;
@ -299,7 +298,7 @@ protected:
}
static inline rsp_vec_t read_vce(const UINT16 *vce)
{
return vec_load_unshuffled_operand(vce);
return vec_load_unshuffled_operand(vce + (sizeof(rsp_vec_t) >> 1));
}
static inline void write_acc_lo(UINT16 *acc, rsp_vec_t acc_lo)
{
@ -331,17 +330,12 @@ protected:
}
static inline void write_vce(UINT16 *vce, rsp_vec_t vce_r)
{
return vec_write_operand(vce, vce_r);
return vec_write_operand(vce + (sizeof(rsp_vec_t) >> 1), vce_r);
}
static inline INT16 get_flags(const UINT16 *flags)
{
return (INT16)_mm_movemask_epi8(
_mm_packs_epi16(
_mm_load_si128((rsp_vec_t*) (flags + (sizeof(rsp_vec_t) >> 1))),
_mm_load_si128((rsp_vec_t*) flags)
)
);
return _mm_movemask_epi8(_mm_packs_epi16(_mm_load_si128((rsp_vec_t*) (flags + (sizeof(rsp_vec_t) >> 1))), _mm_load_si128((rsp_vec_t*) flags)));
}
static inline rsp_vec_t vec_zero()