voodoo: SSE use shift left by 8 instead of floating point multiply by 256 for perspective correction calculation.

This commit is contained in:
Ted Green 2020-07-02 13:44:32 -06:00
parent fb90cac2f2
commit 677f4a9b39
2 changed files with 7 additions and 2 deletions

View File

@ -2843,7 +2843,12 @@ inline rgbaint_t ATTR_FORCE_INLINE voodoo_device::tmu_state::genTexture(int32_t
}
else
{
#if ((!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_MSC_VER)) && defined(PTR64))
// Extra shift by 8 due to how sse class is stored
iterstw.get_st_shiftr(s, t, (14 + 10 + 8));
#else
iterstw.get_st_shiftr(s, t, (14 + 10));
#endif
}
/* clamp W */

View File

@ -1416,7 +1416,7 @@ public:
stw_t(const stw_t& other) = default;
stw_t &operator=(const stw_t& other) = default;
void set(s64 s, s64 t, s64 w) { m_st = _mm_set_pd(s, t); m_w = _mm_set1_pd(w); }
void set(s64 s, s64 t, s64 w) { m_st = _mm_set_pd(s << 8, t << 8); m_w = _mm_set1_pd(w); }
int is_w_neg() const { return _mm_comilt_sd(m_w, _mm_set1_pd(0.0)); }
void get_st_shiftr(s32 &s, s32 &t, const s32 &shift) const
{
@ -1434,7 +1434,7 @@ public:
{
__m128d tmp = _mm_div_pd(m_st, m_w);
// Allow for 8 bits of decimal in integer
tmp = _mm_mul_pd(tmp, _mm_set1_pd(256.0));
//tmp = _mm_mul_pd(tmp, _mm_set1_pd(256.0));
__m128i tmp2 = _mm_cvttpd_epi32(tmp);
#ifdef __SSE4_1__
sow = _mm_extract_epi32(tmp2, 1);