From 2620e569848a1ea00a3909275388cde1f4463db5 Mon Sep 17 00:00:00 2001 From: smf- Date: Wed, 17 Apr 2013 02:15:35 +0000 Subject: [PATCH] Improved FLAG calculation in NCDS, CDP, NCDT, NCCS, CC, NCS, NCT & NCCT by checking for overflow after each addition. [smf] --- src/emu/cpu/psx/gte.c | 94 ++++++++++++++++++++++--------------------- src/emu/cpu/psx/gte.h | 57 +++++++++++++++++++++++--- 2 files changed, 100 insertions(+), 51 deletions(-) diff --git a/src/emu/cpu/psx/gte.c b/src/emu/cpu/psx/gte.c index 115bebad1e7..f5d3ddaf55c 100644 --- a/src/emu/cpu/psx/gte.c +++ b/src/emu/cpu/psx/gte.c @@ -275,17 +275,33 @@ void gte::setcp2cr( UINT32 pc, int reg, UINT32 value ) m_cp2cr[ reg ].d = value; } -INT64 gte::BOUNDS( INT64 value, INT64 max, int max_flag, INT64 min, int min_flag ) +INLINE INT64 gte_shift( INT64 a, int sf ) { - if( value > max ) + if( sf > 0 ) + { + return a >> 12; + } + else if( sf < 0 ) + { + return a << 12; + } + + return a; +} + +INT32 gte::BOUNDS( acc value, int max_flag, int min_flag ) +{ + if( value.positive_overflow() ) { FLAG |= max_flag; } - else if( value < min ) + + if( value.negative_overflow() ) { FLAG |= min_flag; } - return value; + + return gte_shift( value.value(), m_sf ); } static const UINT16 reciprocals[ 32768 ]= @@ -2361,25 +2377,11 @@ INLINE UINT32 gte_divide( INT16 numerator, UINT16 denominator ) return 0xffffffff; } -INLINE INT64 gte_shift( INT64 a, int sf ) -{ - if( sf > 0 ) - { - return a >> 12; - } - else if( sf < 0 ) - { - return a << 12; - } - - return a; -} - /* Setting bits 12 & 19-22 in FLAG does not set bit 31 */ -INT64 gte::A1( INT64 a ) { m_mac1 = BOUNDS( a, U64(0x7ffffffffff), ( 1 << 31 ) | ( 1 << 30 ), U64(-0x80000000000), ( 1 << 31 ) | ( 1 << 27 ) ); return gte_shift( m_mac1, m_sf ); } -INT64 gte::A2( INT64 a ) { m_mac2 = BOUNDS( a, U64(0x7ffffffffff), ( 1 << 31 ) | ( 1 << 29 ), U64(-0x80000000000), ( 1 << 31 ) | ( 1 << 26 ) ); return gte_shift( m_mac2, m_sf ); } -INT64 gte::A3( INT64 a ) { m_mac3 = BOUNDS( a, U64(0x7ffffffffff), ( 1 << 31 ) | ( 1 << 28 ), U64(-0x80000000000), ( 1 << 31 ) | ( 1 << 25 ) ); return gte_shift( m_mac3, m_sf ); } +INT32 gte::A1( acc a ) { m_mac1 = a.value(); return BOUNDS( a, ( 1 << 31 ) | ( 1 << 30 ), ( 1 << 31 ) | ( 1 << 27 ) ); } +INT32 gte::A2( acc a ) { m_mac2 = a.value(); return BOUNDS( a, ( 1 << 31 ) | ( 1 << 29 ), ( 1 << 31 ) | ( 1 << 26 ) ); } +INT32 gte::A3( acc a ) { m_mac3 = a.value(); return BOUNDS( a, ( 1 << 31 ) | ( 1 << 28 ), ( 1 << 31 ) | ( 1 << 25 ) ); } INT32 gte::Lm_B1( INT32 a, int lm ) { return LIM( a, 0x7fff, -0x8000 * !lm, ( 1 << 31 ) | ( 1 << 24 ) ); } INT32 gte::Lm_B2( INT32 a, int lm ) { return LIM( a, 0x7fff, -0x8000 * !lm, ( 1 << 31 ) | ( 1 << 23 ) ); } INT32 gte::Lm_B3( INT32 a, int lm ) { return LIM( a, 0x7fff, -0x8000 * !lm, ( 1 << 22 ) ); } @@ -2428,7 +2430,7 @@ UINT32 gte::Lm_E( UINT32 result ) return result; } -INT64 gte::F( INT64 a ) { m_mac0 = BOUNDS( a, U64(0x7fffffff), ( 1 << 31 ) | ( 1 << 16 ), U64(-0x80000000), ( 1 << 31 ) | ( 1 << 15 ) ); return m_mac0; } +INT32 gte::F( INT64 a ) { m_mac0 = a; if( a > S64( 0x7fffffff ) ) FLAG |= ( 1 << 31 ) | ( 1 << 16 ); if( a < S64(-0x80000000) ) FLAG |= ( 1 << 31 ) | ( 1 << 15 ); return a; } INT32 gte::Lm_G1( INT32 a ) { return LIM( a, 0x3ff, -0x400, ( 1 << 31 ) | ( 1 << 14 ) ); } INT32 gte::Lm_G2( INT32 a ) { return LIM( a, 0x3ff, -0x400, ( 1 << 31 ) | ( 1 << 13 ) ); } @@ -2569,9 +2571,9 @@ int gte::docop2( UINT32 pc, int gteop ) IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); - MAC1 = A1( ( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); - MAC2 = A2( ( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); - MAC3 = A3( ( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); + MAC1 = A1( acc( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); + MAC2 = A2( acc( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); + MAC3 = A3( acc( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); @@ -2592,9 +2594,9 @@ int gte::docop2( UINT32 pc, int gteop ) case 0x14: GTELOG( pc, "%08x CDP", gteop ); - MAC1 = A1( ( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); - MAC2 = A2( ( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); - MAC3 = A3( ( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); + MAC1 = A1( acc( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); + MAC2 = A2( acc( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); + MAC3 = A3( acc( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); @@ -2623,9 +2625,9 @@ int gte::docop2( UINT32 pc, int gteop ) IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); - MAC1 = A1( ( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); - MAC2 = A2( ( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); - MAC3 = A3( ( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); + MAC1 = A1( acc( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); + MAC2 = A2( acc( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); + MAC3 = A3( acc( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); @@ -2653,9 +2655,9 @@ int gte::docop2( UINT32 pc, int gteop ) IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); - MAC1 = A1( ( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); - MAC2 = A2( ( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); - MAC3 = A3( ( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); + MAC1 = A1( acc( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); + MAC2 = A2( acc( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); + MAC3 = A3( acc( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); @@ -2676,9 +2678,9 @@ int gte::docop2( UINT32 pc, int gteop ) case 0x1c: GTELOG( pc, "%08x CC", gteop ); - MAC1 = A1( ( ( (INT64) RBK ) << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); - MAC2 = A2( ( ( (INT64) GBK ) << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); - MAC3 = A3( ( ( (INT64) BBK ) << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); + MAC1 = A1( acc( ( (INT64) RBK ) << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); + MAC2 = A2( acc( ( (INT64) GBK ) << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); + MAC3 = A3( acc( ( (INT64) BBK ) << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); @@ -2705,9 +2707,9 @@ int gte::docop2( UINT32 pc, int gteop ) IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); - MAC1 = A1( ( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); - MAC2 = A2( ( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); - MAC3 = A3( ( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); + MAC1 = A1( acc( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); + MAC2 = A2( acc( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); + MAC3 = A3( acc( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); @@ -2730,9 +2732,9 @@ int gte::docop2( UINT32 pc, int gteop ) IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); - MAC1 = A1( ( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); - MAC2 = A2( ( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); - MAC3 = A3( ( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); + MAC1 = A1( acc( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); + MAC2 = A2( acc( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); + MAC3 = A3( acc( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); @@ -2877,9 +2879,9 @@ int gte::docop2( UINT32 pc, int gteop ) IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); - MAC1 = A1( ( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); - MAC2 = A2( ( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); - MAC3 = A3( ( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); + MAC1 = A1( acc( (INT64) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) ); + MAC2 = A2( acc( (INT64) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) ); + MAC3 = A3( acc( (INT64) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) ); IR1 = Lm_B1( MAC1, lm ); IR2 = Lm_B2( MAC2, lm ); IR3 = Lm_B3( MAC3, lm ); diff --git a/src/emu/cpu/psx/gte.h b/src/emu/cpu/psx/gte.h index 0afc9f38e1a..500c1a00bf9 100644 --- a/src/emu/cpu/psx/gte.h +++ b/src/emu/cpu/psx/gte.h @@ -34,11 +34,58 @@ public: int docop2( UINT32 pc, int gteop ); protected: + class acc + { + public: + acc( INT64 value ) : + m_value( value ), + m_positive_overflow( value > S64( 0x7ffffffffff ) ), + m_negative_overflow( value < S64( -0x80000000000 ) ) + { + } + + acc( INT64 value, bool positive_overflow, bool negative_overflow ) : + m_value( value ), + m_positive_overflow( positive_overflow ), + m_negative_overflow( negative_overflow ) + { + } + + acc operator+( INT64 add ) + { + INT64 value = ( ( m_value + add ) << 20 ) >> 20; + + return acc( value, + m_positive_overflow || ( value < 0 && m_value >= 0 && add >= 0 ), + m_negative_overflow || ( value >= 0 && m_value < 0 && add < 0 ) ); + } + + bool positive_overflow() + { + return m_positive_overflow; + } + + bool negative_overflow() + { + return m_negative_overflow; + } + + INT64 value() + { + return m_value; + } + + private: + INT64 m_value; + bool m_positive_overflow; + bool m_negative_overflow; + }; + INT32 LIM( INT32 value, INT32 max, INT32 min, UINT32 flag ); - INT64 BOUNDS( INT64 n_value, INT64 n_max, int n_maxflag, INT64 n_min, int n_minflag ); - INT64 A1( INT64 a ); - INT64 A2( INT64 a ); - INT64 A3( INT64 a ); + INT32 BOUNDS( acc a, int max_flag, int min_flag ); + INT32 A1( acc a ); + INT32 A2( acc a ); + INT32 A3( acc a ); INT32 Lm_B1( INT32 a, int lm ); INT32 Lm_B2( INT32 a, int lm ); INT32 Lm_B3( INT32 a, int lm ); @@ -48,7 +95,7 @@ protected: INT32 Lm_C3( INT32 a ); INT32 Lm_D( INT64 a, int sf ); UINT32 Lm_E( UINT32 result ); - INT64 F( INT64 a ); + INT32 F( INT64 a ); INT32 Lm_G1( INT32 a ); INT32 Lm_G2( INT32 a ); INT32 Lm_H( INT64 value, int sf );