From 7c78f67d667cb2c3493106fe4c772248dcd411b9 Mon Sep 17 00:00:00 2001 From: "therealmogminer@gmail.com" Date: Fri, 10 Jul 2015 04:09:34 +0200 Subject: [PATCH] nw, Fix CTC2 and VCH opcodes in RSP SIMD code. --- src/emu/cpu/rsp/rspcp2.c | 33 +++++++++++++++++++++++++++------ src/emu/cpu/rsp/rspcp2.h | 12 +++--------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/emu/cpu/rsp/rspcp2.c b/src/emu/cpu/rsp/rspcp2.c index 6d44a06e3e3..cde6f3b5be8 100644 --- a/src/emu/cpu/rsp/rspcp2.c +++ b/src/emu/cpu/rsp/rspcp2.c @@ -207,9 +207,6 @@ const rsp_cop2::vec_helpers_t rsp_cop2::m_vec_helpers = { { 0xffff, 0xffff, 0xffff, 0xffff } // D }, { // word_reverse - 0x0706, 0x0504, 0x0302, 0x0100, 0x0f0e, 0x0d0c, 0x0b0a, 0x0908 - }, - { // byte_reverse 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d } }; @@ -539,9 +536,9 @@ void rsp_cop2::vec_store_group2(UINT32 addr, UINT32 element, UINT16 *regp, rsp_v reg = _mm_packs_epi16(reg, reg); #if !(defined(__SSSE3__) || defined(_MSC_VER)) - reg = sse2_pshufb(reg, m_vec_helpers.byte_reverse); + reg = sse2_pshufb(reg, m_vec_helpers.word_reverse); #else - rsp_vec_t dkey = _mm_load_si128((rsp_vec_t *) (m_vec_helpers.byte_reverse)); + rsp_vec_t dkey = _mm_load_si128((rsp_vec_t *) (m_vec_helpers.word_reverse)); reg = _mm_shuffle_epi8(reg, dkey); #endif @@ -3882,9 +3879,32 @@ void rsp_cop2::handle_cop2(UINT32 op) // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 | // ------------------------------------------------ // - //printf("CTC2 "); switch(RDREG) { +#if USE_SIMD + case 0: + case 1: + case 2: + UINT16 r0 = (RTVAL & (1 << 0)) ? 0xffff : 0; + UINT16 r1 = (RTVAL & (1 << 1)) ? 0xffff : 0; + UINT16 r2 = (RTVAL & (1 << 2)) ? 0xffff : 0; + UINT16 r3 = (RTVAL & (1 << 3)) ? 0xffff : 0; + UINT16 r4 = (RTVAL & (1 << 4)) ? 0xffff : 0; + UINT16 r5 = (RTVAL & (1 << 5)) ? 0xffff : 0; + UINT16 r6 = (RTVAL & (1 << 6)) ? 0xffff : 0; + UINT16 r7 = (RTVAL & (1 << 7)) ? 0xffff : 0; + m_flags[RDREG].__align[0] = _mm_set_epi16(r7, r6, r5, r4, r3, r2, r1, r0); + r0 = (RTVAL & (1 << 8)) ? 0xffff : 0; + r1 = (RTVAL & (1 << 9)) ? 0xffff : 0; + r2 = (RTVAL & (1 << 10)) ? 0xffff : 0; + r3 = (RTVAL & (1 << 11)) ? 0xffff : 0; + r4 = (RTVAL & (1 << 12)) ? 0xffff : 0; + r5 = (RTVAL & (1 << 13)) ? 0xffff : 0; + r6 = (RTVAL & (1 << 14)) ? 0xffff : 0; + r7 = (RTVAL & (1 << 15)) ? 0xffff : 0; + m_flags[RDREG].__align[1] = _mm_set_epi16(r7, r6, r5, r4, r3, r2, r1, r0); + break; +#else case 0: CLEAR_CARRY_FLAGS(); CLEAR_ZERO_FLAGS(); @@ -3938,6 +3958,7 @@ void rsp_cop2::handle_cop2(UINT32 op) if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } break; +#endif } break; } diff --git a/src/emu/cpu/rsp/rspcp2.h b/src/emu/cpu/rsp/rspcp2.h index 4f03f81132f..c5e01218def 100644 --- a/src/emu/cpu/rsp/rspcp2.h +++ b/src/emu/cpu/rsp/rspcp2.h @@ -252,7 +252,6 @@ protected: const UINT16 qr_lut[16][8]; const UINT16 bdls_lut[4][4]; const UINT16 word_reverse[8]; - const UINT16 byte_reverse[8]; } vec_helpers_t; static const vec_helpers_t m_vec_helpers; @@ -299,7 +298,7 @@ protected: } static inline rsp_vec_t read_vce(const UINT16 *vce) { - return vec_load_unshuffled_operand(vce); + return vec_load_unshuffled_operand(vce + (sizeof(rsp_vec_t) >> 1)); } static inline void write_acc_lo(UINT16 *acc, rsp_vec_t acc_lo) { @@ -331,17 +330,12 @@ protected: } static inline void write_vce(UINT16 *vce, rsp_vec_t vce_r) { - return vec_write_operand(vce, vce_r); + return vec_write_operand(vce + (sizeof(rsp_vec_t) >> 1), vce_r); } static inline INT16 get_flags(const UINT16 *flags) { - return (INT16)_mm_movemask_epi8( - _mm_packs_epi16( - _mm_load_si128((rsp_vec_t*) (flags + (sizeof(rsp_vec_t) >> 1))), - _mm_load_si128((rsp_vec_t*) flags) - ) - ); + return _mm_movemask_epi8(_mm_packs_epi16(_mm_load_si128((rsp_vec_t*) (flags + (sizeof(rsp_vec_t) >> 1))), _mm_load_si128((rsp_vec_t*) flags))); } static inline rsp_vec_t vec_zero()