nw, merge in most direct RSP vector opcodes from CEN64

2025-07-08 11:21:56 +03:00 · 2015-06-25 21:32:57 +02:00 · 2015-06-25 21:32:57 +02:00 · 4441fe004b
commit 4441fe004b
parent 938d96bbb2
26 changed files with 673 additions and 145 deletions
--- a/src/emu/cpu/rsp/rspcp2.c
+++ b/src/emu/cpu/rsp/rspcp2.c
@ -33,153 +33,207 @@ const rsp_cop2::vec_helpers_t rsp_cop2::m_vec_helpers = {
 		{  0,  0,  0,  0,  0,  0,  0, ~0 }
 	},
 	{ // shuffle_keys
-/* -- */{0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e},
-/* -- */{0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e},
+		{ 0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e }, /* -- */
+		{ 0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e }, /* -- */

-/* 0q */{0x0100, 0x0100, 0x0504, 0x0504, 0x0908, 0x0908, 0x0d0c, 0x0d0c},
-/* 1q */{0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e},
+		{ 0x0100, 0x0100, 0x0504, 0x0504, 0x0908, 0x0908, 0x0d0c, 0x0d0c }, /* 0q */
+		{ 0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e }, /* 1q */

-/* 0h */{0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908},
-/* 1h */{0x0302, 0x0302, 0x0302, 0x0302, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a},
-/* 2h */{0x0504, 0x0504, 0x0504, 0x0504, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c},
-/* 3h */{0x0706, 0x0706, 0x0706, 0x0706, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e},
+		{ 0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908 }, /* 0h */
+		{ 0x0302, 0x0302, 0x0302, 0x0302, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a }, /* 1h */
+		{ 0x0504, 0x0504, 0x0504, 0x0504, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c }, /* 2h */
+		{ 0x0706, 0x0706, 0x0706, 0x0706, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e }, /* 3h */

-/* 0w */{0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100},
-/* 1w */{0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302},
-/* 2w */{0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504},
-/* 3w */{0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706},
-/* 4w */{0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908},
-/* 5w */{0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a},
-/* 6w */{0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c},
-/* 7w */{0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e}
+		{ 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100 }, /* 0w */
+		{ 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302 }, /* 1w */
+		{ 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504 }, /* 2w */
+		{ 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706 }, /* 3w */
+		{ 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908 }, /* 4w */
+		{ 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a }, /* 5w */
+		{ 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c }, /* 6w */
+		{ 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e }  /* 7w */
 	},
 	{ // sll_b2l_keys
-		{0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d},
-		{0x0102, 0x8000, 0x0506, 0x0304, 0x090a, 0x0708, 0x0d0e, 0x0b0c},
-		{0x0001, 0x8080, 0x0405, 0x0203, 0x0809, 0x0607, 0x0c0d, 0x0a0b},
-		{0x8000, 0x8080, 0x0304, 0x0102, 0x0708, 0x0506, 0x0b0c, 0x090a},
+		{ 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d },
+		{ 0x0102, 0x8000, 0x0506, 0x0304, 0x090a, 0x0708, 0x0d0e, 0x0b0c },
+		{ 0x0001, 0x8080, 0x0405, 0x0203, 0x0809, 0x0607, 0x0c0d, 0x0a0b },
+		{ 0x8000, 0x8080, 0x0304, 0x0102, 0x0708, 0x0506, 0x0b0c, 0x090a },

-		{0x8080, 0x8080, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809},
-		{0x8080, 0x8080, 0x0102, 0x8000, 0x0506, 0x0304, 0x090a, 0x0708},
-		{0x8080, 0x8080, 0x0001, 0x8080, 0x0405, 0x0203, 0x0809, 0x0607},
-		{0x8080, 0x8080, 0x8000, 0x8080, 0x0304, 0x0102, 0x0708, 0x0506},
+		{ 0x8080, 0x8080, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809 },
+		{ 0x8080, 0x8080, 0x0102, 0x8000, 0x0506, 0x0304, 0x090a, 0x0708 },
+		{ 0x8080, 0x8080, 0x0001, 0x8080, 0x0405, 0x0203, 0x0809, 0x0607 },
+		{ 0x8080, 0x8080, 0x8000, 0x8080, 0x0304, 0x0102, 0x0708, 0x0506 },

-		{0x8080, 0x8080, 0x8080, 0x8080, 0x0203, 0x0001, 0x0607, 0x0405},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x0102, 0x8000, 0x0506, 0x0304},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x0001, 0x8080, 0x0405, 0x0203},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8000, 0x8080, 0x0304, 0x0102},
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x0203, 0x0001, 0x0607, 0x0405 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x0102, 0x8000, 0x0506, 0x0304 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x0001, 0x8080, 0x0405, 0x0203 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8000, 0x8080, 0x0304, 0x0102 },

-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0203, 0x0001},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0102, 0x8000},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0001, 0x8080},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8000, 0x8080}
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0203, 0x0001 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0102, 0x8000 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0001, 0x8080 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8000, 0x8080 }
 	},
 	{ // sll_l2b_keys
-		{0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d},
-		{0x0380, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e},
-		{0x8080, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f},
-		{0x8080, 0x0380, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08},
+		{ 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d },
+		{ 0x0380, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e },
+		{ 0x8080, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f },
+		{ 0x8080, 0x0380, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08 },

-		{0x8080, 0x8080, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809},
-		{0x8080, 0x8080, 0x0380, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a},
-		{0x8080, 0x8080, 0x8080, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b},
-		{0x8080, 0x8080, 0x8080, 0x0380, 0x0102, 0x0700, 0x0506, 0x0b04},
+		{ 0x8080, 0x8080, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809 },
+		{ 0x8080, 0x8080, 0x0380, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a },
+		{ 0x8080, 0x8080, 0x8080, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b },
+		{ 0x8080, 0x8080, 0x8080, 0x0380, 0x0102, 0x0700, 0x0506, 0x0b04 },

-		{0x8080, 0x8080, 0x8080, 0x8080, 0x0203, 0x0001, 0x0607, 0x0405},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x0380, 0x0102, 0x0700, 0x0506},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0203, 0x0001, 0x0607},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0380, 0x0102, 0x0700},
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x0203, 0x0001, 0x0607, 0x0405 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x0380, 0x0102, 0x0700, 0x0506 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0203, 0x0001, 0x0607 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0380, 0x0102, 0x0700 },

-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0203, 0x0001},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0380, 0x0102},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0203},
-		{0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0380}
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0203, 0x0001 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0380, 0x0102 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0203 },
+		{ 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x0380 }
 	},
 	{ // srl_b2l_keys
-		{0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d},
-		{0x0304, 0x0102, 0x0708, 0x0506, 0x0b0c, 0x090a, 0x0f80, 0x0d0e},
-		{0x0405, 0x0203, 0x0809, 0x0607, 0x0c0d, 0x0a0b, 0x8080, 0x0e0f},
-		{0x0506, 0x0304, 0x090a, 0x0708, 0x0d0e, 0x0b0c, 0x8080, 0x0f80},
+		{ 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d },
+		{ 0x0304, 0x0102, 0x0708, 0x0506, 0x0b0c, 0x090a, 0x0f80, 0x0d0e },
+		{ 0x0405, 0x0203, 0x0809, 0x0607, 0x0c0d, 0x0a0b, 0x8080, 0x0e0f },
+		{ 0x0506, 0x0304, 0x090a, 0x0708, 0x0d0e, 0x0b0c, 0x8080, 0x0f80 },

-		{0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x8080, 0x8080},
-		{0x0708, 0x0506, 0x0b0c, 0x090a, 0x0f80, 0x0d0e, 0x8080, 0x8080},
-		{0x0809, 0x0607, 0x0c0d, 0x0a0b, 0x8080, 0x0e0f, 0x8080, 0x8080},
-		{0x090a, 0x0708, 0x0d0e, 0x0b0c, 0x8080, 0x0f80, 0x8080, 0x8080},
+		{ 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x8080, 0x8080 },
+		{ 0x0708, 0x0506, 0x0b0c, 0x090a, 0x0f80, 0x0d0e, 0x8080, 0x8080 },
+		{ 0x0809, 0x0607, 0x0c0d, 0x0a0b, 0x8080, 0x0e0f, 0x8080, 0x8080 },
+		{ 0x090a, 0x0708, 0x0d0e, 0x0b0c, 0x8080, 0x0f80, 0x8080, 0x8080 },

-		{0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x8080, 0x8080, 0x8080, 0x8080},
-		{0x0b0c, 0x090a, 0x0f80, 0x0d0e, 0x8080, 0x8080, 0x8080, 0x8080},
-		{0x0c0d, 0x0a0b, 0x8080, 0x0e0f, 0x8080, 0x8080, 0x8080, 0x8080},
-		{0x0d0e, 0x0b0c, 0x8080, 0x0f80, 0x8080, 0x8080, 0x8080, 0x8080},
+		{ 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x8080, 0x8080, 0x8080, 0x8080 },
+		{ 0x0b0c, 0x090a, 0x0f80, 0x0d0e, 0x8080, 0x8080, 0x8080, 0x8080 },
+		{ 0x0c0d, 0x0a0b, 0x8080, 0x0e0f, 0x8080, 0x8080, 0x8080, 0x8080 },
+		{ 0x0d0e, 0x0b0c, 0x8080, 0x0f80, 0x8080, 0x8080, 0x8080, 0x8080 },

-		{0x0e0f, 0x0c0d, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080},
-		{0x0f80, 0x0d0e, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080},
-		{0x8080, 0x0e0f, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080},
-		{0x8080, 0x0f80, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080}
+		{ 0x0e0f, 0x0c0d, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 },
+		{ 0x0f80, 0x0d0e, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 },
+		{ 0x8080, 0x0e0f, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 },
+		{ 0x8080, 0x0f80, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 }
 	},
 	{ // ror_b2l_keys
-		{0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d},
-		{0x0304, 0x0102, 0x0708, 0x0506, 0x0b0c, 0x090a, 0x0f00, 0x0d0e},
-		{0x0405, 0x0203, 0x0809, 0x0607, 0x0c0d, 0x0a0b, 0x0001, 0x0e0f},
-		{0x0506, 0x0304, 0x090a, 0x0708, 0x0d0e, 0x0b0c, 0x0102, 0x0f00},
+		{ 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d },
+		{ 0x0304, 0x0102, 0x0708, 0x0506, 0x0b0c, 0x090a, 0x0f00, 0x0d0e },
+		{ 0x0405, 0x0203, 0x0809, 0x0607, 0x0c0d, 0x0a0b, 0x0001, 0x0e0f },
+		{ 0x0506, 0x0304, 0x090a, 0x0708, 0x0d0e, 0x0b0c, 0x0102, 0x0f00 },

-		{0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001},
-		{0x0708, 0x0506, 0x0b0c, 0x090a, 0x0f00, 0x0d0e, 0x0304, 0x0102},
-		{0x0809, 0x0607, 0x0c0d, 0x0a0b, 0x0001, 0x0e0f, 0x0405, 0x0203},
-		{0x090a, 0x0708, 0x0d0e, 0x0b0c, 0x0102, 0x0f00, 0x0506, 0x0304},
+		{ 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001 },
+		{ 0x0708, 0x0506, 0x0b0c, 0x090a, 0x0f00, 0x0d0e, 0x0304, 0x0102 },
+		{ 0x0809, 0x0607, 0x0c0d, 0x0a0b, 0x0001, 0x0e0f, 0x0405, 0x0203 },
+		{ 0x090a, 0x0708, 0x0d0e, 0x0b0c, 0x0102, 0x0f00, 0x0506, 0x0304 },

-		{0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405},
-		{0x0b0c, 0x090a, 0x0f00, 0x0d0e, 0x0304, 0x0102, 0x0708, 0x0506},
-		{0x0c0d, 0x0a0b, 0x0001, 0x0e0f, 0x0405, 0x0203, 0x0809, 0x0607},
-		{0x0d0e, 0x0b0c, 0x0102, 0x0f00, 0x0506, 0x0304, 0x090a, 0x0708},
+		{ 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405 },
+		{ 0x0b0c, 0x090a, 0x0f00, 0x0d0e, 0x0304, 0x0102, 0x0708, 0x0506 },
+		{ 0x0c0d, 0x0a0b, 0x0001, 0x0e0f, 0x0405, 0x0203, 0x0809, 0x0607 },
+		{ 0x0d0e, 0x0b0c, 0x0102, 0x0f00, 0x0506, 0x0304, 0x090a, 0x0708 },

-		{0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809},
-		{0x0f00, 0x0d0e, 0x0304, 0x0102, 0x0708, 0x0506, 0x0b0c, 0x090a},
-		{0x0001, 0x0e0f, 0x0405, 0x0203, 0x0809, 0x0607, 0x0c0d, 0x0a0b},
-		{0x0102, 0x0f00, 0x0506, 0x0304, 0x090a, 0x0708, 0x0d0e, 0x0b0c}
+		{ 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809 },
+		{ 0x0f00, 0x0d0e, 0x0304, 0x0102, 0x0708, 0x0506, 0x0b0c, 0x090a },
+		{ 0x0001, 0x0e0f, 0x0405, 0x0203, 0x0809, 0x0607, 0x0c0d, 0x0a0b },
+		{ 0x0102, 0x0f00, 0x0506, 0x0304, 0x090a, 0x0708, 0x0d0e, 0x0b0c }
 	},
 	{ // rol_l2b_keys
-		{0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d},
-		{0x030c, 0x0102, 0x0400, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e},
-		{0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f},
-		{0x0d0e, 0x030c, 0x0102, 0x0400, 0x0506, 0x0b04, 0x090a, 0x0f08},
+		{ 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d },
+		{ 0x030c, 0x0102, 0x0400, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e },
+		{ 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f },
+		{ 0x0d0e, 0x030c, 0x0102, 0x0400, 0x0506, 0x0b04, 0x090a, 0x0f08 },

-		{0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809},
-		{0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0400, 0x0506, 0x0b04, 0x090a},
-		{0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b},
-		{0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0400, 0x0506, 0x0b04},
+		{ 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809 },
+		{ 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0400, 0x0506, 0x0b04, 0x090a },
+		{ 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0a0b },
+		{ 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0400, 0x0506, 0x0b04 },

-		{0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405},
-		{0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0400, 0x0506},
-		{0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607},
-		{0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0400},
+		{ 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405 },
+		{ 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0400, 0x0506 },
+		{ 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607 },
+		{ 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0400 },

-		{0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001},
-		{0x0400, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102},
-		{0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203},
-		{0x0102, 0x0400, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c}
+		{ 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001 },
+		{ 0x0400, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102 },
+		{ 0x0001, 0x0607, 0x0405, 0x0a0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203 },
+		{ 0x0102, 0x0400, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c }
 	},
 	{ // ror_l2b_keys
-		{0x0203, 0x0001, 0x0607, 0x0405, 0x0c0b, 0x0809, 0x0e0f, 0x0c0d},
-		{0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c},
-		{0x0001, 0x0607, 0x0405, 0x0c0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203},
-		{0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102},
+		{ 0x0203, 0x0001, 0x0607, 0x0405, 0x0c0b, 0x0809, 0x0e0f, 0x0c0d },
+		{ 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c },
+		{ 0x0001, 0x0607, 0x0405, 0x0c0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203 },
+		{ 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102 },

-		{0x0607, 0x0405, 0x0c0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001},
-		{0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0700},
-		{0x0405, 0x0c0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607},
-		{0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0700, 0x0506},
+		{ 0x0607, 0x0405, 0x0c0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001 },
+		{ 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0700 },
+		{ 0x0405, 0x0c0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607 },
+		{ 0x0b04, 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0700, 0x0506 },

-		{0x0c0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405},
-		{0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0700, 0x0506, 0x0b04},
-		{0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0c0b},
-		{0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a},
+		{ 0x0c0b, 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405 },
+		{ 0x090a, 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0700, 0x0506, 0x0b04 },
+		{ 0x0809, 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0c0b },
+		{ 0x0f08, 0x0d0e, 0x030c, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a },

-		{0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0c0b, 0x0809},
-		{0x0d0e, 0x030c, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08},
-		{0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0c0b, 0x0809, 0x0e0f},
-		{0x030c, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e}
+		{ 0x0e0f, 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0c0b, 0x0809 },
+		{ 0x0d0e, 0x030c, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08 },
+		{ 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0c0b, 0x0809, 0x0e0f },
+		{ 0x030c, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e }
 	}
 };
+
+#ifndef __SSSE3__
+rsp_vec_t rsp_cop2::vec_load_and_shuffle_operand(const UINT16* src, UINT32 element)
+{
+	if (element >= 8) // element => 0w ... 7w
+	{
+		UINT16 word_lo;
+
+		memcpy(&word_lo, src + (element - 8), sizeof(word_lo));
+		UINT64 dword = word_lo | ((UINT32) word_lo << 16);
+
+		return _mm_shuffle_epi32(_mm_loadl_epi64((rsp_vec_t*) &dword), _MM_SHUFFLE(0,0,0,0));
+	}
+	else if (element >= 4) // element => 0h ... 3h
+	{
+		UINT16 word_lo;
+		UINT16 word_hi;
+
+		memcpy(&word_hi, src + element - 0, sizeof(word_hi));
+		memcpy(&word_lo, src + element - 4, sizeof(word_lo));
+		UINT64 dword = word_lo | ((UINT32) word_hi << 16);
+
+		rsp_vec_t v = _mm_loadl_epi64((rsp_vec_t*) &dword);
+		v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(1,1,0,0));
+		return _mm_shuffle_epi32(v, _MM_SHUFFLE(1,1,0,0));
+	}
+	else if (element >= 2) // element => 0q ... 1q
+	{
+		rsp_vec_t v = vec_load_unshuffled_operand(src);
+
+		if (element == 2) {
+			v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3,3,1,1));
+			v = _mm_shufflehi_epi16(v, _MM_SHUFFLE(3,3,1,1));
+		}
+		else
+		{
+			v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(2,2,0,0));
+			v = _mm_shufflehi_epi16(v, _MM_SHUFFLE(2,2,0,0));
+		}
+
+		return v;
+	}
+
+	return vec_load_unshuffled_operand(src);
+}
+#else
+rsp_vec_t rsp_cop2::vec_load_and_shuffle_operand(const UINT16* src, UINT32 element)
+{
+	rsp_vec_t operand = _mm_load_si128((rsp_vec_t*) src);
+	rsp_vec_t key = _mm_load_si128((rsp_vec_t*) m_vec_helpers.shuffle_keys[element]);
+
+	return _mm_shuffle_epi8(operand, key);
+}
+#endif
 #endif

 extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op);
@ -1092,6 +1146,17 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Multiplies signed integer by signed integer * 2

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmulf_vmulu(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1131,6 +1196,17 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			//

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmulf_vmulu(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1174,6 +1250,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The low slice of accumulator is stored into destination element

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			acc_lo = read_acc_lo(acc);
+			acc_mid = read_acc_mid(acc);
+			acc_hi = read_acc_hi(acc);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmadl_vmudl(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1204,6 +1295,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The middle slice of accumulator is stored into destination element

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			acc_lo = read_acc_lo(acc);
+			acc_mid = read_acc_mid(acc);
+			acc_hi = read_acc_hi(acc);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmadm_vmudm(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1235,6 +1341,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The low slice of accumulator is stored into destination element

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			acc_lo = read_acc_lo(acc);
+			acc_mid = read_acc_mid(acc);
+			acc_hi = read_acc_hi(acc);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmadn_vmudn(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1265,6 +1386,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The highest 32 bits of accumulator is saturated into destination element

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			acc_lo = read_acc_lo(acc);
+			acc_mid = read_acc_mid(acc);
+			acc_hi = read_acc_hi(acc);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmadh_vmudh(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1296,6 +1432,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The result is added to accumulator

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			acc_lo = read_acc_lo(acc);
+			acc_mid = read_acc_mid(acc);
+			acc_hi = read_acc_hi(acc);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmacf_vmacu(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1330,6 +1481,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			//

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			acc_lo = read_acc_lo(acc);
+			acc_mid = read_acc_mid(acc);
+			acc_hi = read_acc_hi(acc);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmacf_vmacu(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -1383,6 +1549,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The low slice of accumulator is stored into destination element

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			acc_lo = read_acc_lo(acc);
+			acc_mid = read_acc_mid(acc);
+			acc_hi = read_acc_hi(acc);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmadl_vmudl(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -1415,6 +1596,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The middle slice of accumulator is stored into destination element

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			acc_lo = read_acc_lo(acc);
+			acc_mid = read_acc_mid(acc);
+			acc_hi = read_acc_hi(acc);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmadm_vmudm(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1449,6 +1645,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The low slice of accumulator is stored into destination element

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			acc_lo = read_acc_lo(acc);
+			acc_mid = read_acc_mid(acc);
+			acc_hi = read_acc_hi(acc);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmadn_vmudn(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1486,6 +1697,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// The highest 32 bits of accumulator is saturated into destination element

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t acc_lo, acc_mid, acc_hi;
+
+			acc_lo = read_acc_lo(acc);
+			acc_mid = read_acc_mid(acc);
+			acc_hi = read_acc_hi(acc);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmadh_vmudh(op, vs, vt_shuffle, vec_zero(), &acc_lo, &acc_mid, &acc_hi);
+
+			write_acc_lo(acc, acc_lo);
+			write_acc_mid(acc, acc_mid);
+			write_acc_hi(acc, acc_hi);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -1519,6 +1745,18 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// TODO: check VS2REG == VDREG

 #if USE_SIMD
+			rsp_vec_t acc_lo;
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t carry = read_vco_lo(m_flags[RSP_VCO].s);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vadd(vs, vt_shuffle, carry, &acc_lo);
+
+			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
+			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
+			write_acc_lo(acc, acc_lo);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1551,6 +1789,18 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// TODO: check VS2REG == VDREG

 #if USE_SIMD
+			rsp_vec_t acc_lo;
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t carry = read_vco_lo(m_flags[RSP_VCO].s);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vsub(vs, vt_shuffle, carry, &acc_lo);
+
+			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
+			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
+			write_acc_lo(acc, acc_lo);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -1583,6 +1833,15 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// the result to destination register

 #if USE_SIMD
+			rsp_vec_t acc_lo;
+			UINT16 *acc = m_acc.s;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vabs(vs, vt_shuffle, vec_zero(), &acc_lo);
+
+			write_acc_lo(acc, acc_lo);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -1628,6 +1887,17 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// TODO: check VS2REG = VDREG

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t sn;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vaddc(vs, vt_shuffle, vec_zero(), &sn);
+
+			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
+			write_vco_lo(m_flags[RSP_VCO].s, sn);
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_ZERO_FLAGS();
 			CLEAR_CARRY_FLAGS();
@ -1663,6 +1933,17 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// TODO: check VS2REG = VDREG

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t eq, sn;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vsubc(vs, vt_shuffle, vec_zero(), &eq, &sn);
+
+			write_vco_hi(m_flags[RSP_VCO].s, eq);
+			write_vco_lo(m_flags[RSP_VCO].s, sn);
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_ZERO_FLAGS();
 			CLEAR_CARRY_FLAGS();
@ -1700,6 +1981,18 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Stores high, middle or low slice of accumulator to destination vector

 #if USE_SIMD
+			switch (EL)
+			{
+				case 8:
+					break;
+				case 9:
+					break;
+				case 10:
+					break;
+
+				default:
+					break;
+			}
 #else
 			switch (EL)
 			{
@ -1746,6 +2039,22 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Moves the element in VS2 to destination vector

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t le;
+
+			rsp_vec_t eq = read_vco_hi(m_flags[RSP_VCO].s);
+			rsp_vec_t sign = read_vco_lo(m_flags[RSP_VCO].s);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_veq_vge_vlt_vne(op, vs, vt_shuffle, vec_zero(), &le, eq, sign);
+
+			write_vcc_hi(m_flags[RSP_VCC].s, vec_zero());
+			write_vcc_lo(m_flags[RSP_VCC].s, le);
+			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
+			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_COMPARE_FLAGS();
 			CLEAR_CLIP2_FLAGS();
@ -1797,6 +2106,22 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Moves the element in VS2 to destination vector

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t le;
+
+			rsp_vec_t eq = read_vco_hi(m_flags[RSP_VCO].s);
+			rsp_vec_t sign = read_vco_lo(m_flags[RSP_VCO].s);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_veq_vge_vlt_vne(op, vs, vt_shuffle, vec_zero(), &le, eq, sign);
+
+			write_vcc_hi(m_flags[RSP_VCC].s, vec_zero());
+			write_vcc_lo(m_flags[RSP_VCC].s, le);
+			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
+			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_COMPARE_FLAGS();
 			CLEAR_CLIP2_FLAGS();
@ -1836,6 +2161,22 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Moves the element in VS2 to destination vector

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t le;
+
+			rsp_vec_t eq = read_vco_hi(m_flags[RSP_VCO].s);
+			rsp_vec_t sign = read_vco_lo(m_flags[RSP_VCO].s);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_veq_vge_vlt_vne(op, vs, vt_shuffle, vec_zero(), &le, eq, sign);
+
+			write_vcc_hi(m_flags[RSP_VCC].s, vec_zero());
+			write_vcc_lo(m_flags[RSP_VCC].s, le);
+			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
+			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_COMPARE_FLAGS();
 			CLEAR_CLIP2_FLAGS();
@ -1876,6 +2217,22 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Moves the element in VS2 to destination vector

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t le;
+
+			rsp_vec_t eq = read_vco_hi(m_flags[RSP_VCO].s);
+			rsp_vec_t sign = read_vco_lo(m_flags[RSP_VCO].s);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_veq_vge_vlt_vne(op, vs, vt_shuffle, vec_zero(), &le, eq, sign);
+
+			write_vcc_hi(m_flags[RSP_VCC].s, vec_zero());
+			write_vcc_lo(m_flags[RSP_VCC].s, le);
+			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
+			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_COMPARE_FLAGS();
 			CLEAR_CLIP2_FLAGS();
@ -1915,6 +2272,25 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Vector clip low

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+
+			rsp_vec_t ge = read_vcc_hi(m_flags[RSP_VCC].s);
+			rsp_vec_t le = read_vcc_lo(m_flags[RSP_VCC].s);
+			rsp_vec_t eq = read_vco_hi(m_flags[RSP_VCO].s);
+			rsp_vec_t sign = read_vco_lo(m_flags[RSP_VCO].s);
+			rsp_vec_t vce = read_vce(m_flags[RSP_VCE].s);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vcl(vs, vt_shuffle, vec_zero(), &ge, &le, eq, sign, vce);
+
+			write_vcc_hi(m_flags[RSP_VCC].s, ge);
+			write_vcc_lo(m_flags[RSP_VCC].s, le);
+			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
+			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
+			write_vce(m_flags[RSP_VCE].s, vec_zero());
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -2012,6 +2388,20 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Vector clip high

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t ge, le, sign, eq, vce;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vch(vs, vt_shuffle, vec_zero(), &ge, &le, &eq, &sign, &vce);
+
+			write_vcc_hi(m_flags[RSP_VCC].s, ge);
+			write_vcc_lo(m_flags[RSP_VCC].s, le);
+			write_vco_hi(m_flags[RSP_VCO].s, eq);
+			write_vco_lo(m_flags[RSP_VCO].s, sign);
+			write_vce(m_flags[RSP_VCE].s, vce);
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_CARRY_FLAGS();
 			CLEAR_COMPARE_FLAGS();
@ -2099,6 +2489,20 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Vector clip reverse

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t ge, le;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vcr(vs, vt_shuffle, vec_zero(), &ge, &le);
+
+			write_vcc_hi(m_flags[RSP_VCC].s, ge);
+			write_vcc_lo(m_flags[RSP_VCC].s, le);
+			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
+			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
+			write_vce(m_flags[RSP_VCE].s, vec_zero());
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			CLEAR_CARRY_FLAGS();
 			CLEAR_COMPARE_FLAGS();
@ -2161,6 +2565,17 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Merges two vectors according to compare flags

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+			rsp_vec_t le = read_vcc_lo(m_flags[RSP_VCO].s);
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vmrg(vs, vt_shuffle, le);
+
+			write_vco_hi(m_flags[RSP_VCO].s, vec_zero());
+			write_vco_lo(m_flags[RSP_VCO].s, vec_zero());
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -2189,6 +2604,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise AND of two vector registers

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vand_vnand(op, vs, vt_shuffle);
+
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -2209,6 +2632,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise NOT AND of two vector registers

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vand_vnand(op, vs, vt_shuffle);
+
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -2229,6 +2660,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise OR of two vector registers

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vor_vnor(op, vs, vt_shuffle);
+
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i = 0; i < 8; i++)
 			{
@ -2249,6 +2688,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise NOT OR of two vector registers

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vor_vnor(op, vs, vt_shuffle);
+
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -2269,6 +2716,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise XOR of two vector registers

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vxor_vnxor(op, vs, vt_shuffle);
+
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -2289,6 +2744,14 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Bitwise NOT XOR of two vector registers

 #if USE_SIMD
+			UINT16 *acc = m_acc.s;
+
+			rsp_vec_t vs = vec_load_unshuffled_operand(m_v[VS1REG].s);
+			rsp_vec_t vt_shuffle = vec_load_and_shuffle_operand(m_v[VS2REG].s, EL);
+
+			m_v[VDREG].v = vec_vxor_vnxor(op, vs, vt_shuffle);
+
+			write_acc_lo(acc, m_v[VDREG].v);
 #else
 			for (i=0; i < 8; i++)
 			{
@ -2310,6 +2773,12 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal

 #if USE_SIMD
+			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
+
+			INT32 dp = op & m_dp_flag;
+			m_dp_flag = 0;
+
+			m_v[VDREG].v = vec_vrcp_vrsq(op, dp, VS2REG, EL, VDREG, VS1REG);
 #else
 			INT32 shifter = 0;

@ -2373,6 +2842,12 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal low part

 #if USE_SIMD
+			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
+
+			INT32 dp = op & m_dp_flag;
+			m_dp_flag = 0;
+
+			m_v[VDREG].v = vec_vrcp_vrsq(op, dp, VS2REG, EL, VDREG, VS1REG);
 #else
 			INT32 shifter = 0;

@ -2452,6 +2927,11 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal high part

 #if USE_SIMD
+			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
+
+			m_dp_flag = 1;
+
+			m_v[VDREG].v = vec_vdivh(VS2REG, EL, VDREG, VS1REG);
 #else
 			m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16;
 			m_dp_allowed = 1;
@ -2477,6 +2957,8 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Moves element from vector to destination vector

 #if USE_SIMD
+			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
+			m_v[VDREG].v = vec_vmov(VS2REG, EL, VDREG, VS1REG);
 #else
 			VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7);
 			for (i = 0; i < 8; i++)
@ -2497,6 +2979,12 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal square-root

 #if USE_SIMD
+			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
+
+			INT32 dp = op & m_dp_flag;
+			m_dp_flag = 0;
+
+			m_v[VDREG].v = vec_vrcp_vrsq(op, dp, VS2REG, EL, VDREG, VS1REG);
 #else
 			INT32 shifter = 0;

@ -2561,6 +3049,12 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal square-root low part

 #if USE_SIMD
+			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
+
+			INT32 dp = op & m_dp_flag;
+			m_dp_flag = 0;
+
+			m_v[VDREG].v = vec_vrcp_vrsq(op, dp, VS2REG, EL, VDREG, VS1REG);
 #else
 			INT32 shifter = 0;
 			INT32 rec = (INT16)VREG_S(VS2REG, EL & 7);
@ -2643,6 +3137,11 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
 			// Calculates reciprocal square-root high part

 #if USE_SIMD
+			write_acc_lo(m_acc.s, vec_load_and_shuffle_operand(m_v[VS2REG].s, EL));
+
+			m_dp_flag = 1;
+
+			m_v[VDREG].v = vec_vdivh(VS2REG, EL, VDREG, VS1REG);
 #else
 			m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16;
 			m_dp_allowed = 1;
--- a/src/emu/cpu/rsp/rspcp2.h
+++ b/src/emu/cpu/rsp/rspcp2.h
@ -51,7 +51,7 @@ union VECTOR_REG
 {
 	UINT64 d[2];
 	UINT32 l[4];
-	INT16 s[8];
+	UINT16 s[8];
 	UINT8 b[16];
 #if USE_SIMD
 	rsp_vec_t v;
@ -187,6 +187,32 @@ protected:
 	INT32           m_dp_allowed;

 #if USE_SIMD
+	enum rsp_flags_t {
+		RSP_VCO = 0,
+		RSP_VCC = 1,
+		RSP_VCE = 2
+	};
+
+	enum rsp_acc_t {
+		RSP_ACC_LO = 16,
+		RSP_ACC_MD = 8,
+		RSP_ACC_HI = 0,
+	};
+
+	union aligned_rsp_2vect_t {
+		rsp_vec_t __align[2];
+		UINT16 s[16];
+	};
+
+	union aligned_rsp_3vect_t {
+		rsp_vec_t __align[3];
+		UINT16 s[24];
+	};
+
+	aligned_rsp_2vect_t m_flags[3];
+	aligned_rsp_3vect_t m_acc;
+	UINT32 m_dp_flag;
+
 	typedef struct
 	{
 		rsp_vec_t dummy_for_alignment;
@ -308,7 +334,9 @@ protected:
 #include "vcmp.h"
 #include "vcl.h"
 #include "vcr.h"
+#include "vdivh.h"
 #include "vmac.h"
+#include "vmov.h"
 #include "vmrg.h"
 #include "vmul.h"
 #include "vmulh.h"
@ -316,6 +344,8 @@ protected:
 #include "vmulm.h"
 #include "vmuln.h"
 #include "vor.h"
+#include "vrcpsq.h"
+#include "vrsq.h"
 #include "vsub.h"
 #include "vsubc.h"
 #include "vxor.h"
--- a/src/emu/cpu/rsp/vabs.h
+++ b/src/emu/cpu/rsp/vabs.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vabs(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo)
+inline rsp_vec_t vec_vabs(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo)
 {
 	rsp_vec_t vs_zero = _mm_cmpeq_epi16(vs, zero);
 	rsp_vec_t sign_lt = _mm_srai_epi16(vs, 15);
--- a/src/emu/cpu/rsp/vadd.h
+++ b/src/emu/cpu/rsp/vadd.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vadd(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t carry, rsp_vec_t *acc_lo)
+inline rsp_vec_t vec_vadd(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t carry, rsp_vec_t *acc_lo)
 {
 	// VCC uses unsaturated arithmetic.
 	rsp_vec_t vd = _mm_add_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vaddc.h
+++ b/src/emu/cpu/rsp/vaddc.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vaddc(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *sn)
+inline rsp_vec_t vec_vaddc(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *sn)
 {
 	rsp_vec_t sat_sum = _mm_adds_epu16(vs, vt);
 	rsp_vec_t unsat_sum = _mm_add_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vand.h
+++ b/src/emu/cpu/rsp/vand.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vand_vnand(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt) {
+inline rsp_vec_t vec_vand_vnand(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt) {
 	rsp_vec_t vmask = _mm_load_si128((rsp_vec_t *) m_vec_helpers.logic_mask[iw & 0x1]);

 	rsp_vec_t vd = _mm_and_si128(vs, vt);
--- a/src/emu/cpu/rsp/vch.h
+++ b/src/emu/cpu/rsp/vch.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vch(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le, rsp_vec_t *eq, rsp_vec_t *sign, rsp_vec_t *vce) {
+inline rsp_vec_t vec_vch(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le, rsp_vec_t *eq, rsp_vec_t *sign, rsp_vec_t *vce) {
 	// sign = (vs ^ vt) < 0
 	*sign = _mm_xor_si128(vs, vt);
 	*sign = _mm_cmplt_epi16(*sign, zero);
--- a/src/emu/cpu/rsp/vcl.h
+++ b/src/emu/cpu/rsp/vcl.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vcl(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le, rsp_vec_t eq, rsp_vec_t sign, rsp_vec_t vce)
+inline rsp_vec_t vec_vcl(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le, rsp_vec_t eq, rsp_vec_t sign, rsp_vec_t vce)
 {
 	// sign_negvt = sign ? -vt : vt
 	rsp_vec_t sign_negvt = _mm_xor_si128(vt, sign);
--- a/src/emu/cpu/rsp/vcmp.h
+++ b/src/emu/cpu/rsp/vcmp.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_veq_vge_vlt_vne(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *le, rsp_vec_t eq, rsp_vec_t sign)
+inline rsp_vec_t vec_veq_vge_vlt_vne(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *le, rsp_vec_t eq, rsp_vec_t sign)
 {
 	rsp_vec_t equal = _mm_cmpeq_epi16(vs, vt);

--- a/src/emu/cpu/rsp/vcr.h
+++ b/src/emu/cpu/rsp/vcr.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vcr(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le) {
+inline rsp_vec_t vec_vcr(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le) {
 	// sign = (vs ^ vt) < 0
 	rsp_vec_t sign = _mm_xor_si128(vs, vt);
 	sign = _mm_srai_epi16(sign, 15);
--- a/src/emu/cpu/rsp/vdivh.h
+++ b/src/emu/cpu/rsp/vdivh.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-rsp_vec_t vec_vdivh(UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
+inline rsp_vec_t vec_vdivh(UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
 {
 	// Get the element from VT.
 	m_div_in = m_v[src].s[e & 0x7];
--- a/src/emu/cpu/rsp/vmac.h
+++ b/src/emu/cpu/rsp/vmac.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vmacf_vmacu(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_mid, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmacf_vmacu(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_mid, rsp_vec_t *acc_hi)
 {
 	// Get the product and shift it over
 	// being sure to save the carries.
--- a/src/emu/cpu/rsp/vmov.h
+++ b/src/emu/cpu/rsp/vmov.h
@ -1,9 +1,9 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-rsp_vec_t vec_vmov(UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
+inline rsp_vec_t vec_vmov(UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
 {
 	// Get the element from VT and write out the upper part of the result.
 	m_v[dest].s[de & 0x7] = m_v[src].s[e & 0x7];
-	return rsp_vect_load_unshuffled_operand(m_v[dest].s);
+	return vec_load_unshuffled_operand(m_v[dest].s);
 }
--- a/src/emu/cpu/rsp/vmrg.h
+++ b/src/emu/cpu/rsp/vmrg.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vmrg(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t le)
+inline rsp_vec_t vec_vmrg(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t le)
 {
 #ifdef __SSE4_1__
 	return _mm_blendv_epi8(vt, vs, le);
--- a/src/emu/cpu/rsp/vmudh.h
+++ b/src/emu/cpu/rsp/vmudh.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t rsp_vmudh(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t rsp_vmudh(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	*acc_md = _mm_mullo_epi16(vs, vt);
 	*acc_hi = _mm_mulhi_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vmul.h
+++ b/src/emu/cpu/rsp/vmul.h
@ -5,7 +5,7 @@
 // TODO: CHECK ME.
 //

-static inline rsp_vec_t vec_vmulf_vmulu(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmulf_vmulu(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
 	rsp_vec_t round = _mm_cmpeq_epi16(zero, zero);
--- a/src/emu/cpu/rsp/vmulh.h
+++ b/src/emu/cpu/rsp/vmulh.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vmadh_vmudh(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmadh_vmudh(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
 	rsp_vec_t hi = _mm_mulhi_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vmull.h
+++ b/src/emu/cpu/rsp/vmull.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vmadl_vmudl(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmadl_vmudl(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	rsp_vec_t hi = _mm_mulhi_epu16(vs, vt);

--- a/src/emu/cpu/rsp/vmulm.h
+++ b/src/emu/cpu/rsp/vmulm.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vmadm_vmudm(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmadm_vmudm(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
 	rsp_vec_t hi = _mm_mulhi_epu16(vs, vt);
--- a/src/emu/cpu/rsp/vmuln.h
+++ b/src/emu/cpu/rsp/vmuln.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vmadn_vmudn(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
+inline rsp_vec_t vec_vmadn_vmudn(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
 {
 	rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
 	rsp_vec_t hi = _mm_mulhi_epu16(vs, vt);
--- a/src/emu/cpu/rsp/vor.h
+++ b/src/emu/cpu/rsp/vor.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vor_vnor(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt)
+inline rsp_vec_t vec_vor_vnor(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt)
 {
 	rsp_vec_t vmask = _mm_load_si128((rsp_vec_t *) m_vec_helpers.logic_mask[iw & 0x1]);

--- a/src/emu/cpu/rsp/vrcpsq.h
+++ b/src/emu/cpu/rsp/vrcpsq.h
@ -1,10 +1,8 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vrcp_vrsq(UINT32 iw, INT32 dp, UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
+inline rsp_vec_t vec_vrcp_vrsq(UINT32 iw, INT32 dp, UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
 {
-	UINT32 shift, idx;
-
 	// Get the element from VT.
 	INT16 vt = m_v[src].s[e & 0x7];

@ -37,7 +35,7 @@ static inline rsp_vec_t vec_vrcp_vrsq(UINT32 iw, INT32 dp, UINT32 src, UINT32 e,

 		if (iw & 0x4) // VRSQ
 		{
-			idx = (idx | 0x200) & 0x3FE | (shift % 2);
+			idx = ((idx | 0x200) & 0x3fe) | (shift % 2);
 			result = rsp_divtable[idx];

 			result = ((0x10000 | result) << 14) >> ((31 - shift) >> 1);
--- a/src/emu/cpu/rsp/vrsq.h
+++ b/src/emu/cpu/rsp/vrsq.h
@ -19,21 +19,22 @@ rsp_vec_t vec_vrsq(INT32 dp, UINT32 src, UINT32 e, UINT32 dest, UINT32 de)
 	}

 	// Handle edge cases.
+	INT32 result;
 	if (data == 0)
 	{
-		result = 0x7fffFFFFU;
+		result = 0x7fffffff;
 	}
 	else if (input == -32768)
 	{
-    	result = 0xffff0000U;
+    	result = 0xffff0000;
 	}
 	else // Main case: compute the reciprocal.
 	{
 		UINT32 shift = count_leading_zeros(data);

-		UINT32 idx = (((UINT64) data << shift) & 0x7FC00000U) >> 22;
-		idx = (idx | 0x200) & 0x3FE | (shift % 2);
-		INT32 result = rsp_reciprocal_rom[idx];
+		UINT32 idx = (((UINT64) data << shift) & 0x7fc00000) >> 22;
+		idx = ((idx | 0x200) & 0x3fe) | (shift % 2);
+		result = rsp_divtable[idx];

 		result = ((0x10000 | result) << 14) >> ((31 - shift) >> 1);
 		result = result ^ input_mask;
--- a/src/emu/cpu/rsp/vsub.h
+++ b/src/emu/cpu/rsp/vsub.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t rsp_vsub(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t carry, rsp_vec_t *acc_lo)
+inline rsp_vec_t vec_vsub(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t carry, rsp_vec_t *acc_lo)
 {
 	// acc_lo uses saturated arithmetic.
 	rsp_vec_t unsat_diff = _mm_sub_epi16(vt, carry);
--- a/src/emu/cpu/rsp/vsubc.h
+++ b/src/emu/cpu/rsp/vsubc.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vsubc(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *eq, rsp_vec_t *sn)
+inline rsp_vec_t vec_vsubc(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *eq, rsp_vec_t *sn)
 {
 	rsp_vec_t sat_udiff = _mm_subs_epu16(vs, vt);
 	rsp_vec_t equal = _mm_cmpeq_epi16(vs, vt);
--- a/src/emu/cpu/rsp/vxor.h
+++ b/src/emu/cpu/rsp/vxor.h
@ -1,7 +1,7 @@
 // license:BSD-3-Clause
 // copyright-holders:Tyler J. Stachecki,Ryan Holtz

-static inline rsp_vec_t vec_vxor_vnxor(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt)
+inline rsp_vec_t vec_vxor_vnxor(UINT32 iw, rsp_vec_t vs, rsp_vec_t vt)
 {
 	rsp_vec_t vmask = _mm_load_si128((rsp_vec_t *) m_vec_helpers.logic_mask[iw & 0x1]);