nw, merge in most direct RSP vector opcodes from CEN64

2025-07-05 01:48:29 +03:00 · 2015-06-25 21:32:57 +02:00 · 2015-06-25 21:32:57 +02:00 · 4441fe004b
commit 4441fe004b
parent 938d96bbb2
26 changed files with 673 additions and 145 deletions
--- a/src/emu/cpu/rsp/rspcp2.c
+++ b/src/emu/cpu/rsp/rspcp2.c
@ -33,25 +33,25 @@ const rsp_cop2::vec_helpers_t rsp_cop2::m_vec_helpers = {
 		{  0,  0,  0,  0,  0,  0,  0, ~0 }
 	},
 	{ // shuffle_keys
-/* -- */{0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e},
+		{ 0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e }, /* -- */
-/* -- */{0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e},
+		{ 0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e }, /* -- */
-/* 0q */{0x0100, 0x0100, 0x0504, 0x0504, 0x0908, 0x0908, 0x0d0c, 0x0d0c},
+		{ 0x0100, 0x0100, 0x0504, 0x0504, 0x0908, 0x0908, 0x0d0c, 0x0d0c }, /* 0q */
-/* 1q */{0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e},
+		{ 0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e }, /* 1q */
-/* 0h */{0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908},
+		{ 0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908 }, /* 0h */
-/* 1h */{0x0302, 0x0302, 0x0302, 0x0302, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a},
+		{ 0x0302, 0x0302, 0x0302, 0x0302, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a }, /* 1h */
-/* 2h */{0x0504, 0x0504, 0x0504, 0x0504, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c},
+		{ 0x0504, 0x0504, 0x0504, 0x0504, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c }, /* 2h */
-/* 3h */{0x0706, 0x0706, 0x0706, 0x0706, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e},
+		{ 0x0706, 0x0706, 0x0706, 0x0706, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e }, /* 3h */
-/* 0w */{0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100},
+		{ 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100 }, /* 0w */
-/* 1w */{0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302},
+		{ 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302 }, /* 1w */
-/* 2w */{0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504},
+		{ 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504 }, /* 2w */
-/* 3w */{0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706},
+		{ 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706 }, /* 3w */
-/* 4w */{0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908},
+		{ 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908 }, /* 4w */
-/* 5w */{0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a},
+		{ 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a }, /* 5w */
-/* 6w */{0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c},
+		{ 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c }, /* 6w */
-/* 7w */{0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e}
+		{ 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e }  /* 7w */
 	},
 	{ // sll_b2l_keys
 		{ 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d },
@ -180,6 +180,60 @@ const rsp_cop2::vec_helpers_t rsp_cop2::m_vec_helpers = {
 		{ 0x030c, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e }
 	}
 };
 #ifndef __SSSE3__
 rsp_vec_t rsp_cop2::vec_load_and_shuffle_operand(const UINT16* src, UINT32 element)
 {
 	if (element >= 8) // element => 0w ... 7w
 	{
 		UINT16 word_lo;
 		memcpy(&word_lo, src + (element - 8), sizeof(word_lo));
 		UINT64 dword = word_lo | ((UINT32) word_lo << 16);
 		return _mm_shuffle_epi32(_mm_loadl_epi64((rsp_vec_t*) &dword), _MM_SHUFFLE(0,0,0,0));
 	}
 	else if (element >= 4) // element => 0h ... 3h
 	{
 		UINT16 word_lo;
 		UINT16 word_hi;
 		memcpy(&word_hi, src + element - 0, sizeof(word_hi));
 		memcpy(&word_lo, src + element - 4, sizeof(word_lo));
 		UINT64 dword = word_lo | ((UINT32) word_hi << 16);
 		rsp_vec_t v = _mm_loadl_epi64((rsp_vec_t*) &dword);
 		v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(1,1,0,0));
 		return _mm_shuffle_epi32(v, _MM_SHUFFLE(1,1,0,0));
 	}
 	else if (element >= 2) // element => 0q ... 1q
 	{
 		rsp_vec_t v = vec_load_unshuffled_operand(src);
 		if (element == 2) {
 			v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3,3,1,1));
 			v = _mm_shufflehi_epi16(v, _MM_SHUFFLE(3,3,1,1));
 		}
 		else
 		{
 			v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(2,2,0,0));
 			v = _mm_shufflehi_epi16(v, _MM_SHUFFLE(2,2,0,0));
 		}
 		return v;
 	}
 	return vec_load_unshuffled_operand(src);
 }
 #else
 rsp_vec_t rsp_cop2::vec_load_and_shuffle_operand(const UINT16* src, UINT32 element)
 {
 	rsp_vec_t operand = _mm_load_si128((rsp_vec_t*) src);
 	rsp_vec_t key = _mm_load_si128((rsp_vec_t*) m_vec_helpers.shuffle_keys[element]);
 	return _mm_shuffle_epi8(operand, key);
 }
 #endif
 #endif
 extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op);
@ -1092,6 +1146,17 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Multiplies signed integer by signed integer * 2
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmulf_vmulu(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1131,6 +1196,17 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			//
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmulf_vmulu(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1174,6 +1250,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The low slice of accumulator is stored into destination element
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			acc_lo = read_acc_lo(acc);
 			acc_mid = read_acc_mid(acc);
 			acc_hi = read_acc_hi(acc);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmadl_vmudl(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1204,6 +1295,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The middle slice of accumulator is stored into destination element
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			acc_lo = read_acc_lo(acc);
 			acc_mid = read_acc_mid(acc);
 			acc_hi = read_acc_hi(acc);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmadm_vmudm(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1235,6 +1341,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The low slice of accumulator is stored into destination element
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			acc_lo = read_acc_lo(acc);
 			acc_mid = read_acc_mid(acc);
 			acc_hi = read_acc_hi(acc);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmadn_vmudn(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1265,6 +1386,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The highest 32 bits of accumulator is saturated into destination element
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			acc_lo = read_acc_lo(acc);
 			acc_mid = read_acc_mid(acc);
 			acc_hi = read_acc_hi(acc);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmadh_vmudh(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1296,6 +1432,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The result is added to accumulator
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			acc_lo = read_acc_lo(acc);
 			acc_mid = read_acc_mid(acc);
 			acc_hi = read_acc_hi(acc);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmacf_vmacu(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1330,6 +1481,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			//
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			acc_lo = read_acc_lo(acc);
 			acc_mid = read_acc_mid(acc);
 			acc_hi = read_acc_hi(acc);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmacf_vmacu(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -1383,6 +1549,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The low slice of accumulator is stored into destination element
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			acc_lo = read_acc_lo(acc);
 			acc_mid = read_acc_mid(acc);
 			acc_hi = read_acc_hi(acc);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmadl_vmudl(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -1415,6 +1596,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The middle slice of accumulator is stored into destination element
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			acc_lo = read_acc_lo(acc);
 			acc_mid = read_acc_mid(acc);
 			acc_hi = read_acc_hi(acc);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmadm_vmudm(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1449,6 +1645,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The low slice of accumulator is stored into destination element
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			acc_lo = read_acc_lo(acc);
 			acc_mid = read_acc_mid(acc);
 			acc_hi = read_acc_hi(acc);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmadn_vmudn(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1486,6 +1697,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The highest 32 bits of accumulator is saturated into destination element
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t acc_lo, acc_mid, acc_hi;
 			acc_lo = read_acc_lo(acc);
 			acc_mid = read_acc_mid(acc);
 			acc_hi = read_acc_hi(acc);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmadh_vmudh(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
 			write_acc_lo(acc, acc_lo);
 			write_acc_mid(acc, acc_mid);
 			write_acc_hi(acc, acc_hi);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -1519,6 +1745,18 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// TODO: check VS2REG == VDREG
 #if USE_SIMD
 			rsp_vec_t acc_lo;
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t carry = read_vco_lo(m_flags[RSP_VCO].s);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vadd(vs, vt_shuffle, carry, &acc_lo);
 			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
 			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
 			write_acc_lo(acc, acc_lo);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1551,6 +1789,18 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// TODO: check VS2REG == VDREG
 #if USE_SIMD
 			rsp_vec_t acc_lo;
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t carry = read_vco_lo(m_flags[RSP_VCO].s);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vsub(vs, vt_shuffle, carry, &acc_lo);
 			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
 			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
 			write_acc_lo(acc, acc_lo);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -1583,6 +1833,15 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// the result to destination register
 #if USE_SIMD
 			rsp_vec_t acc_lo;
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vabs(vs, vt_shuffle, vec_zero(), &acc_lo);
 			write_acc_lo(acc, acc_lo);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1628,6 +1887,17 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// TODO: check VS2REG = VDREG
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t sn;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vaddc(vs, vt_shuffle, vec_zero(), &sn);
 			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
 			write_vco_lo(m_flags[RSP_VCO].s, sn);
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_ZERO_FLAGS();
 			CLEAR_CARRY_FLAGS();
@ -1663,6 +1933,17 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// TODO: check VS2REG = VDREG
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t eq, sn;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vsubc(vs, vt_shuffle, vec_zero(), &eq, &sn);
 			write_vco_hi(m_flags[RSP_VCO].s, eq);
 			write_vco_lo(m_flags[RSP_VCO].s, sn);
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_ZERO_FLAGS();
 			CLEAR_CARRY_FLAGS();
@ -1700,6 +1981,18 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Stores high, middle or low slice of accumulator to destination vector
 #if USE_SIMD
 			switch (EL)
 			{
 				case 8:
 					break;
 				case 9:
 					break;
 				case 10:
 					break;
 				default:
 					break;
 			}
 #else
 			switch (EL)
 			{
@ -1746,6 +2039,22 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Moves the element in VS2 to destination vector
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t le;
 			rsp_vec_t eq = read_vco_hi(m_flags[RSP_VCO].s);
 			rsp_vec_t sign = read_vco_lo(m_flags[RSP_VCO].s);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_veq_vge_vlt_vne(op, vs, vt_shuffle, vec_zero(), &le, eq, sign);
 			write_vcc_hi(m_flags[RSP_VCC].s, vec_zero());
 			write_vcc_lo(m_flags[RSP_VCC].s, le);
 			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
 			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_COMPARE_FLAGS();
 			CLEAR_CLIP2_FLAGS();
@ -1797,6 +2106,22 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Moves the element in VS2 to destination vector
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t le;
 			rsp_vec_t eq = read_vco_hi(m_flags[RSP_VCO].s);
 			rsp_vec_t sign = read_vco_lo(m_flags[RSP_VCO].s);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_veq_vge_vlt_vne(op, vs, vt_shuffle, vec_zero(), &le, eq, sign);
 			write_vcc_hi(m_flags[RSP_VCC].s, vec_zero());
 			write_vcc_lo(m_flags[RSP_VCC].s, le);
 			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
 			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_COMPARE_FLAGS();
 			CLEAR_CLIP2_FLAGS();
@ -1836,6 +2161,22 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Moves the element in VS2 to destination vector
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t le;
 			rsp_vec_t eq = read_vco_hi(m_flags[RSP_VCO].s);
 			rsp_vec_t sign = read_vco_lo(m_flags[RSP_VCO].s);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_veq_vge_vlt_vne(op, vs, vt_shuffle, vec_zero(), &le, eq, sign);
 			write_vcc_hi(m_flags[RSP_VCC].s, vec_zero());
 			write_vcc_lo(m_flags[RSP_VCC].s, le);
 			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
 			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_COMPARE_FLAGS();
 			CLEAR_CLIP2_FLAGS();
@ -1876,6 +2217,22 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Moves the element in VS2 to destination vector
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t le;
 			rsp_vec_t eq = read_vco_hi(m_flags[RSP_VCO].s);
 			rsp_vec_t sign = read_vco_lo(m_flags[RSP_VCO].s);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_veq_vge_vlt_vne(op, vs, vt_shuffle, vec_zero(), &le, eq, sign);
 			write_vcc_hi(m_flags[RSP_VCC].s, vec_zero());
 			write_vcc_lo(m_flags[RSP_VCC].s, le);
 			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
 			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_COMPARE_FLAGS();
 			CLEAR_CLIP2_FLAGS();
@ -1915,6 +2272,25 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Vector clip low
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t ge = read_vcc_hi(m_flags[RSP_VCC].s);
 			rsp_vec_t le = read_vcc_lo(m_flags[RSP_VCC].s);
 			rsp_vec_t eq = read_vco_hi(m_flags[RSP_VCO].s);
 			rsp_vec_t sign = read_vco_lo(m_flags[RSP_VCO].s);
 			rsp_vec_t vce = read_vce(m_flags[RSP_VCE].s);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vcl(vs, vt_shuffle, vec_zero(), &ge, &le, eq, sign, vce);
 			write_vcc_hi(m_flags[RSP_VCC].s, ge);
 			write_vcc_lo(m_flags[RSP_VCC].s, le);
 			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
 			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
 			write_vce(m_flags[RSP_VCE].s, vec_zero());
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -2012,6 +2388,20 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Vector clip high
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t ge, le, sign, eq, vce;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vch(vs, vt_shuffle, vec_zero(), &ge, &le, &eq, &sign, &vce);
 			write_vcc_hi(m_flags[RSP_VCC].s, ge);
 			write_vcc_lo(m_flags[RSP_VCC].s, le);
 			write_vco_hi(m_flags[RSP_VCO].s, eq);
 			write_vco_lo(m_flags[RSP_VCO].s, sign);
 			write_vce(m_flags[RSP_VCE].s, vce);
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_CARRY_FLAGS();
 			CLEAR_COMPARE_FLAGS();
@ -2099,6 +2489,20 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Vector clip reverse
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t ge, le;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vcr(vs, vt_shuffle, vec_zero(), &ge, &le);
 			write_vcc_hi(m_flags[RSP_VCC].s, ge);
 			write_vcc_lo(m_flags[RSP_VCC].s, le);
 			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
 			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
 			write_vce(m_flags[RSP_VCE].s, vec_zero());
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_CARRY_FLAGS();
 			CLEAR_COMPARE_FLAGS();
@ -2161,6 +2565,17 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Merges two vectors according to compare flags
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t le = read_vcc_lo(m_flags[RSP_VCO].s);
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vmrg(vs, vt_shuffle, le);
 			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
 			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -2189,6 +2604,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise AND of two vector registers
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vand_vnand(op, vs, vt_shuffle);
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -2209,6 +2632,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise NOT AND of two vector registers
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vand_vnand(op, vs, vt_shuffle);
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -2229,6 +2660,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise OR of two vector registers
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vor_vnor(op, vs, vt_shuffle);
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -2249,6 +2688,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise NOT OR of two vector registers
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vor_vnor(op, vs, vt_shuffle);
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -2269,6 +2716,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise XOR of two vector registers
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vxor_vnxor(op, vs, vt_shuffle);
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -2289,6 +2744,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise NOT XOR of two vector registers
 #if USE_SIMD
 			UINT16 *acc = m_acc.s;
 			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
 			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
 			m_v[VDREG].v = vec_vxor_vnxor(op, vs, vt_shuffle);
 			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -2310,6 +2773,12 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal
 #if USE_SIMD
 			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
 			INT32 dp = op & m_dp_flag;
 			m_dp_flag = 0;
 			m_v[VDREG].v = vec_vrcp_vrsq(op, dp, VS2REG, EL, VDREG, VS1REG);
 #else
 			INT32 shifter = 0;
@ -2373,6 +2842,12 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal low part
 #if USE_SIMD
 			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
 			INT32 dp = op & m_dp_flag;
 			m_dp_flag = 0;
 			m_v[VDREG].v = vec_vrcp_vrsq(op, dp, VS2REG, EL, VDREG, VS1REG);
 #else
 			INT32 shifter = 0;
@ -2452,6 +2927,11 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal high part
 #if USE_SIMD
 			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
 			m_dp_flag = 1;
 			m_v[VDREG].v = vec_vdivh(VS2REG, EL, VDREG, VS1REG);
 #else
 			m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16;
 			m_dp_allowed = 1;
@ -2477,6 +2957,8 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Moves element from vector to destination vector
 #if USE_SIMD
 			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
 			m_v[VDREG].v = vec_vmov(VS2REG, EL, VDREG, VS1REG);
 #else
 			VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7);
 			for (i = 0; i < 8; i++)
@ -2497,6 +2979,12 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal square-root
 #if USE_SIMD
 			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
 			INT32 dp = op & m_dp_flag;
 			m_dp_flag = 0;
 			m_v[VDREG].v = vec_vrcp_vrsq(op, dp, VS2REG, EL, VDREG, VS1REG);
 #else
 			INT32 shifter = 0;
@ -2561,6 +3049,12 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal square-root low part
 #if USE_SIMD
 			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
 			INT32 dp = op & m_dp_flag;
 			m_dp_flag = 0;
 			m_v[VDREG].v = vec_vrcp_vrsq(op, dp, VS2REG, EL, VDREG, VS1REG);
 #else
 			INT32 shifter = 0;
 			INT32 rec = (INT16)VREG_S(VS2REG, EL & 7);
@ -2643,6 +3137,11 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal square-root high part
 #if USE_SIMD
 			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
 			m_dp_flag = 1;
 			m_v[VDREG].v = vec_vdivh(VS2REG, EL, VDREG, VS1REG);
 #else
 			m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16;
 			m_dp_allowed = 1;
--- a/src/emu/cpu/rsp/rspcp2.h
+++ b/src/emu/cpu/rsp/rspcp2.h
@ -51,7 +51,7 @@ union VECTOR_REG
 {
 	UINT64 d[2];
 	UINT32 l[4];
-	INT16 s[8];
+	UINT16 s[8];
 	UINT8 b[16];
 #if USE_SIMD
 	rsp_vec_t v;
@ -187,6 +187,32 @@ protected:
 	INT32           m_dp_allowed;
 #if USE_SIMD
 	enum rsp_flags_t {
 		RSP_VCO = 0,
 		RSP_VCC = 1,
 		RSP_VCE = 2
 	};
 	enum rsp_acc_t {
 		RSP_ACC_LO = 16,
 		RSP_ACC_MD = 8,
 		RSP_ACC_HI = 0,
 	};
 	union aligned_rsp_2vect_t {
 		rsp_vec_t __align[2];
 		UINT16 s[16];
 	};
 	union aligned_rsp_3vect_t {
 		rsp_vec_t __align[3];
 		UINT16 s[24];
 	};
 	aligned_rsp_2vect_t m_flags[3];
 	aligned_rsp_3vect_t m_acc;
 	UINT32 m_dp_flag;
 	typedef struct
 	{
 		rsp_vec_t dummy_for_alignment;
@ -308,7 +334,9 @@ protected:
 #include "vcmp.h"
 #include "vcl.h"
 #include "vcr.h"
 #include "vdivh.h"
 #include "vmac.h"
 #include "vmov.h"
 #include "vmrg.h"
 #include "vmul.h"
 #include "vmulh.h"
@ -316,6 +344,8 @@ protected:
 #include "vmulm.h"
 #include "vmuln.h"
 #include "vor.h"
 #include "vrcpsq.h"
 #include "vrsq.h"
 #include "vsub.h"
 #include "vsubc.h"
 #include "vxor.h"
--- a/src/emu/cpu/rsp/vabs.h
+++ b/src/emu/cpu/rsp/vabs.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vabs(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo)
+inline rsp_vec_t vec_vabs(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo)
 {
 	rsp_vec_t vs_zero = _mm_cmpeq_epi16(vs, zero);
 	rsp_vec_t sign_lt = _mm_srai_epi16(vs, 15);
--- a/src/emu/cpu/rsp/vadd.h
+++ b/src/emu/cpu/rsp/vadd.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vadd(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t carry, rsp_vec_t *acc_lo)
+inline rsp_vec_t vec_vadd(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t carry, rsp_vec_t *acc_lo)
 {
 	// VCC uses unsaturated arithmetic.
 	rsp_vec_t vd = _mm_add_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vaddc.h
+++ b/src/emu/cpu/rsp/vaddc.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vaddc(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *sn)
+inline rsp_vec_t vec_vaddc(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *sn)
 {
 	rsp_vec_t sat_sum = _mm_adds_epu16(vs, vt);
 	rsp_vec_t unsat_sum = _mm_add_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vand.h
+++ b/src/emu/cpu/rsp/vand.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vand_vnand(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt) {
+inline rsp_vec_t vec_vand_vnand(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt) {
 	rsp_vec_t vmask = _mm_load_si128((rsp_vec_t *) m_vec_helpers.logic_mask[iw & 0x1]);
 	rsp_vec_t vd = _mm_and_si128(vs, vt);
--- a/src/emu/cpu/rsp/vch.h
+++ b/src/emu/cpu/rsp/vch.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vch(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le, rsp_vec_t *eq, rsp_vec_t *sign, rsp_vec_t *vce) {
+inline rsp_vec_t vec_vch(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le, rsp_vec_t *eq, rsp_vec_t *sign, rsp_vec_t *vce) {
 	// sign = (vs ^ vt) < 0
 	*sign = _mm_xor_si128(vs, vt);
 	*sign = _mm_cmplt_epi16(*sign, zero);
--- a/src/emu/cpu/rsp/vcl.h
+++ b/src/emu/cpu/rsp/vcl.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vcl(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le, rsp_vec_t eq, rsp_vec_t sign, rsp_vec_t vce)
+inline rsp_vec_t vec_vcl(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le, rsp_vec_t eq, rsp_vec_t sign, rsp_vec_t vce)
 {
 	// sign_negvt = sign ? -vt : vt
 	rsp_vec_t sign_negvt = _mm_xor_si128(vt, sign);
--- a/src/emu/cpu/rsp/vcmp.h
+++ b/src/emu/cpu/rsp/vcmp.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_veq_vge_vlt_vne(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *le, rsp_vec_t eq, rsp_vec_t sign)
+inline rsp_vec_t vec_veq_vge_vlt_vne(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *le, rsp_vec_t eq, rsp_vec_t sign)
 {
 	rsp_vec_t equal = _mm_cmpeq_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vcr.h
+++ b/src/emu/cpu/rsp/vcr.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vcr(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le) {
+inline rsp_vec_t vec_vcr(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le) {
 	// sign = (vs ^ vt) < 0
 	rsp_vec_t sign = _mm_xor_si128(vs, vt);
 	sign = _mm_srai_epi16(sign, 15);
--- a/src/emu/cpu/rsp/vdivh.h
+++ b/src/emu/cpu/rsp/vdivh.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-rsp_vec_t vec_vdivh(UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
+inline rsp_vec_t vec_vdivh(UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
 {
 	// Get the element from VT.
 	m_div_in = m_v[src].s[e & 0x7];
--- a/src/emu/cpu/rsp/vmac.h
+++ b/src/emu/cpu/rsp/vmac.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vmacf_vmacu(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_mid, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmacf_vmacu(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_mid, rsp_vec_t *acc_hi)
 {
 	// Get the product and shift it over
 	// being sure to save the carries.
--- a/src/emu/cpu/rsp/vmov.h
+++ b/src/emu/cpu/rsp/vmov.h
@ -1,9 +1,9 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-rsp_vec_t vec_vmov(UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
+inline rsp_vec_t vec_vmov(UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
 {
 	// Get the element from VT and write out the upper part of the result.
 	m_v[dest].s[de & 0x7] = m_v[src].s[e & 0x7];
-	return rsp_vect_load_unshuffled_operand(m_v[dest].s);
+	return vec_load_unshuffled_operand(m_v[dest].s);
 }
--- a/src/emu/cpu/rsp/vmrg.h
+++ b/src/emu/cpu/rsp/vmrg.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vmrg(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t le)
+inline rsp_vec_t vec_vmrg(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t le)
 {
 #ifdef __SSE4_1__
 	return _mm_blendv_epi8(vt, vs, le);
--- a/src/emu/cpu/rsp/vmudh.h
+++ b/src/emu/cpu/rsp/vmudh.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t rsp_vmudh(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t rsp_vmudh(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	*acc_md = _mm_mullo_epi16(vs, vt);
 	*acc_hi = _mm_mulhi_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vmul.h
+++ b/src/emu/cpu/rsp/vmul.h
@ -5,7 +5,7 @@
 // TODO: CHECK ME.
 //
-static inline rsp_vec_t vec_vmulf_vmulu(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmulf_vmulu(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
 	rsp_vec_t round = _mm_cmpeq_epi16(zero, zero);
--- a/src/emu/cpu/rsp/vmulh.h
+++ b/src/emu/cpu/rsp/vmulh.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vmadh_vmudh(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmadh_vmudh(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
 	rsp_vec_t hi = _mm_mulhi_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vmull.h
+++ b/src/emu/cpu/rsp/vmull.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vmadl_vmudl(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmadl_vmudl(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	rsp_vec_t hi = _mm_mulhi_epu16(vs, vt);
--- a/src/emu/cpu/rsp/vmulm.h
+++ b/src/emu/cpu/rsp/vmulm.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vmadm_vmudm(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmadm_vmudm(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
 	rsp_vec_t hi = _mm_mulhi_epu16(vs, vt);
--- a/src/emu/cpu/rsp/vmuln.h
+++ b/src/emu/cpu/rsp/vmuln.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vmadn_vmudn(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmadn_vmudn(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
 	rsp_vec_t hi = _mm_mulhi_epu16(vs, vt);
--- a/src/emu/cpu/rsp/vor.h
+++ b/src/emu/cpu/rsp/vor.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vor_vnor(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt)
+inline rsp_vec_t vec_vor_vnor(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt)
 {
 	rsp_vec_t vmask = _mm_load_si128((rsp_vec_t *) m_vec_helpers.logic_mask[iw & 0x1]);
--- a/src/emu/cpu/rsp/vrcpsq.h
+++ b/src/emu/cpu/rsp/vrcpsq.h
@ -1,10 +1,8 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vrcp_vrsq(UINT32 iw, INT32 dp, UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
+inline rsp_vec_t vec_vrcp_vrsq(UINT32 iw, INT32 dp, UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
 {
 	UINT32 shift, idx;
 	// Get the element from VT.
 	INT16 vt = m_v[src].s[e & 0x7];
@ -37,7 +35,7 @@ static inline rsp_vec_t vec_vrcp_vrsq(UINT32 iw, INT32 dp, UINT32 src, UINT32 e,
 		if (iw & 0x4) // VRSQ
 		{
-			idx = (idx | 0x200) & 0x3FE | (shift % 2);
+			idx = ((idx | 0x200) & 0x3fe) | (shift % 2);
 			result = rsp_divtable[idx];
 			result = ((0x10000 | result) << 14) >> ((31 - shift) >> 1);
--- a/src/emu/cpu/rsp/vrsq.h
+++ b/src/emu/cpu/rsp/vrsq.h
@ -19,21 +19,22 @@ rsp_vec_t vec_vrsq(INT32 dp, UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
 	}
 	// Handle edge cases.
 	INT32 result;
 	if (data == 0)
 	{
-		result = 0x7fffFFFFU;
+		result = 0x7fffffff;
 	}
 	else if (input == -32768)
 	{
-    	result = 0xffff0000U;
+    	result = 0xffff0000;
 	}
 	else // Main case: compute the reciprocal.
 	{
 		UINT32 shift = count_leading_zeros(data);
-		UINT32 idx = (((UINT64) data << shift) & 0x7FC00000U) >> 22;
+		UINT32 idx = (((UINT64) data << shift) & 0x7fc00000) >> 22;
-		idx = (idx | 0x200) & 0x3FE | (shift % 2);
+		idx = ((idx | 0x200) & 0x3fe) | (shift % 2);
-		INT32 result = rsp_reciprocal_rom[idx];
+		result = rsp_divtable[idx];
 		result = ((0x10000 | result) << 14) >> ((31 - shift) >> 1);
 		result = result ^ input_mask;
--- a/src/emu/cpu/rsp/vsub.h
+++ b/src/emu/cpu/rsp/vsub.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t rsp_vsub(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t carry, rsp_vec_t *acc_lo)
+inline rsp_vec_t vec_vsub(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t carry, rsp_vec_t *acc_lo)
 {
 	// acc_lo uses saturated arithmetic.
 	rsp_vec_t unsat_diff = _mm_sub_epi16(vt, carry);
--- a/src/emu/cpu/rsp/vsubc.h
+++ b/src/emu/cpu/rsp/vsubc.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vsubc(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *eq, rsp_vec_t *sn)
+inline rsp_vec_t vec_vsubc(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *eq, rsp_vec_t *sn)
 {
 	rsp_vec_t sat_udiff = _mm_subs_epu16(vs, vt);
 	rsp_vec_t equal = _mm_cmpeq_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vxor.h
+++ b/src/emu/cpu/rsp/vxor.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz
-static inline rsp_vec_t vec_vxor_vnxor(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt)
+inline rsp_vec_t vec_vxor_vnxor(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt)
 {
 	rsp_vec_t vmask = _mm_load_si128((rsp_vec_t *) m_vec_helpers.logic_mask[iw & 0x1]);