voodoo: SSE use shift left by 8 instead of floating point multiply by 256 for perspective correction calculation.

2025-07-12 13:11:05 +03:00 · 2020-07-02 13:44:32 -06:00 · 2020-07-02 13:44:32 -06:00 · 677f4a9b39
commit 677f4a9b39
parent fb90cac2f2
2 changed files with 7 additions and 2 deletions
--- a/src/devices/video/vooddefs.ipp
+++ b/src/devices/video/vooddefs.ipp
@ -2843,7 +2843,12 @@ inline rgbaint_t ATTR_FORCE_INLINE voodoo_device::tmu_state::genTexture(int32_t
 	}
 	else
 	{
+#if ((!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64))
+		// Extra shift by 8 due to how sse class is stored
+		iterstw.get_st_shiftr(s, t, (14 + 10 + 8));
+#else
 		iterstw.get_st_shiftr(s, t, (14 + 10));
+#endif
 	}

 	/* clamp W */
--- a/src/devices/video/voodoo.h
+++ b/src/devices/video/voodoo.h
@ -1416,7 +1416,7 @@ public:
 	stw_t(const stw_t& other) = default;
 	stw_t &operator=(const stw_t& other) = default;

-	void set(s64 s, s64 t, s64 w) { m_st = _mm_set_pd(s, t); m_w = _mm_set1_pd(w); }
+	void set(s64 s, s64 t, s64 w) { m_st = _mm_set_pd(s << 8, t << 8); m_w = _mm_set1_pd(w); }
 	int is_w_neg() const { return _mm_comilt_sd(m_w, _mm_set1_pd(0.0)); }
 	void get_st_shiftr(s32 &s, s32 &t, const s32 &shift) const
 	{
@ -1434,7 +1434,7 @@ public:
 	{
 		__m128d tmp = _mm_div_pd(m_st, m_w);
 		// Allow for 8 bits of decimal in integer
-		tmp = _mm_mul_pd(tmp, _mm_set1_pd(256.0));
+		//tmp = _mm_mul_pd(tmp, _mm_set1_pd(256.0));
 		__m128i tmp2 = _mm_cvttpd_epi32(tmp);
 #ifdef __SSE4_1__
 		sow = _mm_extract_epi32(tmp2, 1);