mame/src/devices/cpu/psx/gte.cpp

929 lines
27 KiB
C++

// license:BSD-3-Clause
// copyright-holders:smf
/*
* PlayStation Geometry Transformation Engine emulator
*
* Copyright 2003-2013 smf
*
*/
#include "emu.h"
#include "gte.h"
#if 0
void ATTR_PRINTF(2,3) GTELOG( uint32_t pc, const char *a, ...)
{
va_list va;
char s_text[ 1024 ];
va_start( va, a );
vsprintf( s_text, a, va );
va_end( va );
logerror( "%08x: GTE: %s\n", pc, s_text );
}
#else
static inline void ATTR_PRINTF(2,3) GTELOG( uint32_t pc, const char *a, ...) {}
#endif
#define VX0 ( m_cp2dr[ 0 ].sw.l )
#define VY0 ( m_cp2dr[ 0 ].sw.h )
#define VZ0 ( m_cp2dr[ 1 ].sw.l )
#define VX1 ( m_cp2dr[ 2 ].w.l )
#define VY1 ( m_cp2dr[ 2 ].w.h )
#define VZ1 ( m_cp2dr[ 3 ].w.l )
#define VX2 ( m_cp2dr[ 4 ].w.l )
#define VY2 ( m_cp2dr[ 4 ].w.h )
#define VZ2 ( m_cp2dr[ 5 ].w.l )
#define R ( m_cp2dr[ 6 ].b.l )
#define G ( m_cp2dr[ 6 ].b.h )
#define B ( m_cp2dr[ 6 ].b.h2 )
#define CODE ( m_cp2dr[ 6 ].b.h3 )
#define OTZ ( m_cp2dr[ 7 ].w.l )
#define IR0 ( m_cp2dr[ 8 ].sw.l )
#define IR1 ( m_cp2dr[ 9 ].sw.l )
#define IR2 ( m_cp2dr[ 10 ].sw.l )
#define IR3 ( m_cp2dr[ 11 ].sw.l )
#define SXY0 ( m_cp2dr[ 12 ].d )
#define SX0 ( m_cp2dr[ 12 ].sw.l )
#define SY0 ( m_cp2dr[ 12 ].sw.h )
#define SXY1 ( m_cp2dr[ 13 ].d )
#define SX1 ( m_cp2dr[ 13 ].sw.l )
#define SY1 ( m_cp2dr[ 13 ].sw.h )
#define SXY2 ( m_cp2dr[ 14 ].d )
#define SX2 ( m_cp2dr[ 14 ].sw.l )
#define SY2 ( m_cp2dr[ 14 ].sw.h )
#define SXYP ( m_cp2dr[ 15 ].d )
#define SXP ( m_cp2dr[ 15 ].sw.l )
#define SYP ( m_cp2dr[ 15 ].sw.h )
#define SZ0 ( m_cp2dr[ 16 ].w.l )
#define SZ1 ( m_cp2dr[ 17 ].w.l )
#define SZ2 ( m_cp2dr[ 18 ].w.l )
#define SZ3 ( m_cp2dr[ 19 ].w.l )
#define RGB0 ( m_cp2dr[ 20 ].d )
#define R0 ( m_cp2dr[ 20 ].b.l )
#define G0 ( m_cp2dr[ 20 ].b.h )
#define B0 ( m_cp2dr[ 20 ].b.h2 )
#define CD0 ( m_cp2dr[ 20 ].b.h3 )
#define RGB1 ( m_cp2dr[ 21 ].d )
#define R1 ( m_cp2dr[ 21 ].b.l )
#define G1 ( m_cp2dr[ 21 ].b.h )
#define B1 ( m_cp2dr[ 21 ].b.h2 )
#define CD1 ( m_cp2dr[ 21 ].b.h3 )
#define RGB2 ( m_cp2dr[ 22 ].d )
#define R2 ( m_cp2dr[ 22 ].b.l )
#define G2 ( m_cp2dr[ 22 ].b.h )
#define B2 ( m_cp2dr[ 22 ].b.h2 )
#define CD2 ( m_cp2dr[ 22 ].b.h3 )
#define RES1 ( m_cp2dr[ 23 ].d )
#define MAC0 ( m_cp2dr[ 24 ].sd )
#define MAC1 ( m_cp2dr[ 25 ].sd )
#define MAC2 ( m_cp2dr[ 26 ].sd )
#define MAC3 ( m_cp2dr[ 27 ].sd )
#define IRGB ( m_cp2dr[ 28 ].d )
#define ORGB ( m_cp2dr[ 29 ].d )
#define LZCS ( m_cp2dr[ 30 ].d )
#define LZCR ( m_cp2dr[ 31 ].d )
#define R11 ( m_cp2cr[ 0 ].sw.l )
#define R12 ( m_cp2cr[ 0 ].sw.h )
#define R13 ( m_cp2cr[ 1 ].sw.l )
#define R21 ( m_cp2cr[ 1 ].sw.h )
#define R22 ( m_cp2cr[ 2 ].sw.l )
#define R23 ( m_cp2cr[ 2 ].sw.h )
#define R31 ( m_cp2cr[ 3 ].sw.l )
#define R32 ( m_cp2cr[ 3 ].sw.h )
#define R33 ( m_cp2cr[ 4 ].sw.l )
#define TRX ( m_cp2cr[ 5 ].sd )
#define TRY ( m_cp2cr[ 6 ].sd )
#define TRZ ( m_cp2cr[ 7 ].sd )
#define L11 ( m_cp2cr[ 8 ].sw.l )
#define L12 ( m_cp2cr[ 8 ].sw.h )
#define L13 ( m_cp2cr[ 9 ].sw.l )
#define L21 ( m_cp2cr[ 9 ].sw.h )
#define L22 ( m_cp2cr[ 10 ].sw.l )
#define L23 ( m_cp2cr[ 10 ].sw.h )
#define L31 ( m_cp2cr[ 11 ].sw.l )
#define L32 ( m_cp2cr[ 11 ].sw.h )
#define L33 ( m_cp2cr[ 12 ].sw.l )
#define RBK ( m_cp2cr[ 13 ].sd )
#define GBK ( m_cp2cr[ 14 ].sd )
#define BBK ( m_cp2cr[ 15 ].sd )
#define LR1 ( m_cp2cr[ 16 ].sw.l )
#define LR2 ( m_cp2cr[ 16 ].sw.h )
#define LR3 ( m_cp2cr[ 17 ].sw.l )
#define LG1 ( m_cp2cr[ 17 ].sw.h )
#define LG2 ( m_cp2cr[ 18 ].sw.l )
#define LG3 ( m_cp2cr[ 18 ].sw.h )
#define LB1 ( m_cp2cr[ 19 ].sw.l )
#define LB2 ( m_cp2cr[ 19 ].sw.h )
#define LB3 ( m_cp2cr[ 20 ].sw.l )
#define RFC ( m_cp2cr[ 21 ].sd )
#define GFC ( m_cp2cr[ 22 ].sd )
#define BFC ( m_cp2cr[ 23 ].sd )
#define OFX ( m_cp2cr[ 24 ].sd )
#define OFY ( m_cp2cr[ 25 ].sd )
#define H ( m_cp2cr[ 26 ].sw.l )
#define DQA ( m_cp2cr[ 27 ].sw.l )
#define DQB ( m_cp2cr[ 28 ].sd )
#define ZSF3 ( m_cp2cr[ 29 ].sw.l )
#define ZSF4 ( m_cp2cr[ 30 ].sw.l )
#define FLAG ( m_cp2cr[ 31 ].d )
#define VX( n ) ( n < 3 ? m_cp2dr[ n << 1 ].sw.l : IR1 )
#define VY( n ) ( n < 3 ? m_cp2dr[ n << 1 ].sw.h : IR2 )
#define VZ( n ) ( n < 3 ? m_cp2dr[ ( n << 1 ) + 1 ].sw.l : IR3 )
#define MX11( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) ].sw.l : -R << 4 )
#define MX12( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) ].sw.h : R << 4 )
#define MX13( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 1 ].sw.l : IR0 )
#define MX21( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 1 ].sw.h : R13 )
#define MX22( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 2 ].sw.l : R13 )
#define MX23( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 2 ].sw.h : R13 )
#define MX31( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 3 ].sw.l : R22 )
#define MX32( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 3 ].sw.h : R22 )
#define MX33( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 4 ].sw.l : R22 )
#define CV1( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 5 ].sd : 0 )
#define CV2( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 6 ].sd : 0 )
#define CV3( n ) ( n < 3 ? m_cp2cr[ ( n << 3 ) + 7 ].sd : 0 )
int32_t gte::LIM( int32_t value, int32_t max, int32_t min, uint32_t flag )
{
if( value > max )
{
FLAG |= flag;
return max;
}
else if( value < min )
{
FLAG |= flag;
return min;
}
return value;
}
uint32_t gte::getcp2dr( uint32_t pc, int reg )
{
switch( reg )
{
case 1:
case 3:
case 5:
case 8:
case 9:
case 10:
case 11:
m_cp2dr[ reg ].d = (int32_t)m_cp2dr[ reg ].sw.l;
break;
case 7:
case 16:
case 17:
case 18:
case 19:
m_cp2dr[ reg ].d = (uint32_t)m_cp2dr[ reg ].w.l;
break;
case 15:
m_cp2dr[ reg ].d = SXY2;
break;
case 28:
case 29:
m_cp2dr[ reg ].d = LIM( IR1 >> 7, 0x1f, 0, 0 ) | ( LIM( IR2 >> 7, 0x1f, 0, 0 ) << 5 ) | ( LIM( IR3 >> 7, 0x1f, 0, 0 ) << 10 );
break;
}
GTELOG( pc, "get CP2DR%u=%08x", reg, m_cp2dr[ reg ].d );
return m_cp2dr[ reg ].d;
}
void gte::setcp2dr( uint32_t pc, int reg, uint32_t value )
{
GTELOG( pc, "set CP2DR%u=%08x", reg, value );
switch( reg )
{
case 15:
SXY0 = SXY1;
SXY1 = SXY2;
SXY2 = value;
break;
case 28:
IR1 = ( value & 0x1f ) << 7;
IR2 = ( value & 0x3e0 ) << 2;
IR3 = ( value & 0x7c00 ) >> 3;
break;
case 30:
LZCR = (value & 0x80000000) == 0 ? count_leading_zeros(value) : count_leading_ones(value);
break;
case 31:
return;
}
m_cp2dr[ reg ].d = value;
}
uint32_t gte::getcp2cr( uint32_t pc, int reg )
{
GTELOG( pc, "get CP2CR%u=%08x", reg, m_cp2cr[ reg ].d );
return m_cp2cr[ reg ].d;
}
void gte::setcp2cr( uint32_t pc, int reg, uint32_t value )
{
GTELOG( pc, "set CP2CR%u=%08x", reg, value );
switch( reg )
{
case 4:
case 12:
case 20:
case 26:
case 27:
case 29:
case 30:
value = (int32_t)(int16_t) value;
break;
case 31:
value = value & 0x7ffff000;
if( ( value & 0x7f87e000 ) != 0 )
{
value |= 0x80000000;
}
break;
}
m_cp2cr[ reg ].d = value;
}
static inline int64_t gte_shift( int64_t a, int sf )
{
if( sf > 0 )
{
return a >> 12;
}
else if( sf < 0 )
{
return a << 12;
}
return a;
}
int32_t gte::BOUNDS( int44 value, int max_flag, int min_flag )
{
if( value.positive_overflow() )
{
FLAG |= max_flag;
}
if( value.negative_overflow() )
{
FLAG |= min_flag;
}
return gte_shift( value.value(), m_sf );
}
static inline uint32_t gte_divide( uint16_t numerator, uint16_t denominator )
{
if( numerator < ( denominator * 2 ) )
{
static uint8_t table[] =
{
0xff, 0xfd, 0xfb, 0xf9, 0xf7, 0xf5, 0xf3, 0xf1, 0xef, 0xee, 0xec, 0xea, 0xe8, 0xe6, 0xe4, 0xe3,
0xe1, 0xdf, 0xdd, 0xdc, 0xda, 0xd8, 0xd6, 0xd5, 0xd3, 0xd1, 0xd0, 0xce, 0xcd, 0xcb, 0xc9, 0xc8,
0xc6, 0xc5, 0xc3, 0xc1, 0xc0, 0xbe, 0xbd, 0xbb, 0xba, 0xb8, 0xb7, 0xb5, 0xb4, 0xb2, 0xb1, 0xb0,
0xae, 0xad, 0xab, 0xaa, 0xa9, 0xa7, 0xa6, 0xa4, 0xa3, 0xa2, 0xa0, 0x9f, 0x9e, 0x9c, 0x9b, 0x9a,
0x99, 0x97, 0x96, 0x95, 0x94, 0x92, 0x91, 0x90, 0x8f, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x87, 0x86,
0x85, 0x84, 0x83, 0x82, 0x81, 0x7f, 0x7e, 0x7d, 0x7c, 0x7b, 0x7a, 0x79, 0x78, 0x77, 0x75, 0x74,
0x73, 0x72, 0x71, 0x70, 0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, 0x66, 0x65, 0x64,
0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58, 0x57, 0x56, 0x55,
0x54, 0x53, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x48,
0x47, 0x46, 0x45, 0x44, 0x43, 0x43, 0x42, 0x41, 0x40, 0x3f, 0x3f, 0x3e, 0x3d, 0x3c, 0x3c, 0x3b,
0x3a, 0x39, 0x39, 0x38, 0x37, 0x36, 0x36, 0x35, 0x34, 0x33, 0x33, 0x32, 0x31, 0x31, 0x30, 0x2f,
0x2e, 0x2e, 0x2d, 0x2c, 0x2c, 0x2b, 0x2a, 0x2a, 0x29, 0x28, 0x28, 0x27, 0x26, 0x26, 0x25, 0x24,
0x24, 0x23, 0x22, 0x22, 0x21, 0x20, 0x20, 0x1f, 0x1e, 0x1e, 0x1d, 0x1d, 0x1c, 0x1b, 0x1b, 0x1a,
0x19, 0x19, 0x18, 0x18, 0x17, 0x16, 0x16, 0x15, 0x15, 0x14, 0x14, 0x13, 0x12, 0x12, 0x11, 0x11,
0x10, 0x0f, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0c, 0x0c, 0x0b, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
0x07, 0x07, 0x06, 0x06, 0x05, 0x05, 0x04, 0x04, 0x03, 0x03, 0x02, 0x02, 0x01, 0x01, 0x00, 0x00,
0x00
};
int shift = count_leading_zeros( denominator ) - 16;
int r1 = ( denominator << shift ) & 0x7fff;
int r2 = table[ ( ( r1 + 0x40 ) >> 7 ) ] + 0x101;
int r3 = ( ( 0x80 - ( r2 * ( r1 + 0x8000 ) ) ) >> 8 ) & 0x1ffff;
uint32_t reciprocal = ( ( r2 * r3 ) + 0x80 ) >> 8;
return (uint32_t)( ( ( (uint64_t) reciprocal * ( numerator << shift ) ) + 0x8000 ) >> 16 );
}
return 0xffffffff;
}
/* Setting bits 12 & 19-22 in FLAG does not set bit 31 */
int32_t gte::A1( int44 a ) { m_mac1 = a.value(); return BOUNDS( a, ( 1 << 31 ) | ( 1 << 30 ), ( 1 << 31 ) | ( 1 << 27 ) ); }
int32_t gte::A2( int44 a ) { m_mac2 = a.value(); return BOUNDS( a, ( 1 << 31 ) | ( 1 << 29 ), ( 1 << 31 ) | ( 1 << 26 ) ); }
int32_t gte::A3( int44 a ) { m_mac3 = a.value(); return BOUNDS( a, ( 1 << 31 ) | ( 1 << 28 ), ( 1 << 31 ) | ( 1 << 25 ) ); }
int32_t gte::Lm_B1( int32_t a, int lm ) { return LIM( a, 0x7fff, -0x8000 * !lm, ( 1 << 31 ) | ( 1 << 24 ) ); }
int32_t gte::Lm_B2( int32_t a, int lm ) { return LIM( a, 0x7fff, -0x8000 * !lm, ( 1 << 31 ) | ( 1 << 23 ) ); }
int32_t gte::Lm_B3( int32_t a, int lm ) { return LIM( a, 0x7fff, -0x8000 * !lm, ( 1 << 22 ) ); }
int32_t gte::Lm_B3_sf( int64_t value, int sf, int lm )
{
int32_t value_sf = gte_shift( value, sf );
int32_t value_12 = gte_shift( value, 1 );
int max = 0x7fff;
int min = 0;
if( lm == 0 )
{
min = -0x8000;
}
if( value_12 < -0x8000 || value_12 > 0x7fff )
{
FLAG |= ( 1 << 22 );
}
if( value_sf > max )
{
return max;
}
else if( value_sf < min )
{
return min;
}
return value_sf;
}
int32_t gte::Lm_C1( int32_t a ) { return LIM( a, 0x00ff, 0x0000, ( 1 << 21 ) ); }
int32_t gte::Lm_C2( int32_t a ) { return LIM( a, 0x00ff, 0x0000, ( 1 << 20 ) ); }
int32_t gte::Lm_C3( int32_t a ) { return LIM( a, 0x00ff, 0x0000, ( 1 << 19 ) ); }
int32_t gte::Lm_D( int64_t a, int sf ) { return LIM( gte_shift( a, sf ), 0xffff, 0x0000, ( 1 << 31 ) | ( 1 << 18 ) ); }
uint32_t gte::Lm_E( uint32_t result )
{
if( result == 0xffffffff )
{
FLAG |= ( 1 << 31 ) | ( 1 << 17 );
return 0x1ffff;
}
if( result > 0x1ffff )
{
return 0x1ffff;
}
return result;
}
int64_t gte::F( int64_t a )
{
m_mac0 = a;
if( a > 0x7fffffff )
{
FLAG |= ( 1 << 31 ) | ( 1 << 16 );
}
if( a < (int32_t) -0x80000000 )
{
FLAG |= ( 1 << 31 ) | ( 1 << 15 );
}
return a;
}
int32_t gte::Lm_G1( int64_t a )
{
if( a > 0x3ff )
{
FLAG |= ( 1 << 31 ) | ( 1 << 14 );
return 0x3ff;
}
if( a < -0x400 )
{
FLAG |= ( 1 << 31 ) | ( 1 << 14 );
return -0x400;
}
return a;
}
int32_t gte::Lm_G2( int64_t a )
{
if( a > 0x3ff )
{
FLAG |= ( 1 << 31 ) | ( 1 << 13 );
return 0x3ff;
}
if( a < -0x400 )
{
FLAG |= ( 1 << 31 ) | ( 1 << 13 );
return -0x400;
}
return a;
}
int32_t gte::Lm_H( int64_t value, int sf )
{
int64_t value_sf = gte_shift( value, sf );
int32_t value_12 = gte_shift( value, 1 );
int max = 0x1000;
int min = 0x0000;
if( value_sf < min || value_sf > max )
{
FLAG |= ( 1 << 12 );
}
if( value_12 > max )
{
return max;
}
if( value_12 < min )
{
return min;
}
return value_12;
}
int gte::docop2( uint32_t pc, int gteop )
{
int v;
int lm;
int cv;
int mx;
int32_t h_over_sz3 = 0;
lm = GTE_LM( gteop );
m_sf = GTE_SF( gteop );
FLAG = 0;
switch( GTE_FUNCT( gteop ) )
{
case 0x00: // drop through to RTPS
case 0x01:
GTELOG( pc, "%08x RTPS", gteop );
MAC1 = A1( int44( (int64_t) TRX << 12 ) + ( R11 * VX0 ) + ( R12 * VY0 ) + ( R13 * VZ0 ) );
MAC2 = A2( int44( (int64_t) TRY << 12 ) + ( R21 * VX0 ) + ( R22 * VY0 ) + ( R23 * VZ0 ) );
MAC3 = A3( int44( (int64_t) TRZ << 12 ) + ( R31 * VX0 ) + ( R32 * VY0 ) + ( R33 * VZ0 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3_sf( m_mac3, m_sf, lm );
SZ0 = SZ1;
SZ1 = SZ2;
SZ2 = SZ3;
SZ3 = Lm_D( m_mac3, 1 );
h_over_sz3 = Lm_E( gte_divide( H, SZ3 ) );
SXY0 = SXY1;
SXY1 = SXY2;
SX2 = Lm_G1( F( (int64_t) OFX + ( (int64_t) IR1 * h_over_sz3 ) ) >> 16 );
SY2 = Lm_G2( F( (int64_t) OFY + ( (int64_t) IR2 * h_over_sz3 ) ) >> 16 );
MAC0 = F( (int64_t) DQB + ( (int64_t) DQA * h_over_sz3 ) );
IR0 = Lm_H( m_mac0, 1 );
return 1;
case 0x06:
GTELOG( pc, "%08x NCLIP", gteop );
MAC0 = F( (int64_t) ( SX0 * SY1 ) + ( SX1 * SY2 ) + ( SX2 * SY0 ) - ( SX0 * SY2 ) - ( SX1 * SY0 ) - ( SX2 * SY1 ) );
return 1;
case 0x0c:
GTELOG( pc, "%08x OP", gteop );
MAC1 = A1( (int64_t) ( R22 * IR3 ) - ( R33 * IR2 ) );
MAC2 = A2( (int64_t) ( R33 * IR1 ) - ( R11 * IR3 ) );
MAC3 = A3( (int64_t) ( R11 * IR2 ) - ( R22 * IR1 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
return 1;
case 0x10:
GTELOG( pc, "%08x DPCS", gteop );
MAC1 = A1( ( R << 16 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( R << 16 ) ), 0 ) ) );
MAC2 = A2( ( G << 16 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( G << 16 ) ), 0 ) ) );
MAC3 = A3( ( B << 16 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( B << 16 ) ), 0 ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
return 1;
case 0x11:
GTELOG( pc, "%08x INTPL", gteop );
MAC1 = A1( ( IR1 << 12 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( IR1 << 12 ) ), 0 ) ) );
MAC2 = A2( ( IR2 << 12 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( IR2 << 12 ) ), 0 ) ) );
MAC3 = A3( ( IR3 << 12 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( IR3 << 12 ) ), 0 ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
return 1;
case 0x12:
GTELOG( pc, "%08x MVMVA", gteop );
mx = GTE_MX( gteop );
v = GTE_V( gteop );
cv = GTE_CV( gteop );
switch( cv )
{
case 2:
MAC1 = A1( (int64_t) ( MX12( mx ) * VY( v ) ) + ( MX13( mx ) * VZ( v ) ) );
MAC2 = A2( (int64_t) ( MX22( mx ) * VY( v ) ) + ( MX23( mx ) * VZ( v ) ) );
MAC3 = A3( (int64_t) ( MX32( mx ) * VY( v ) ) + ( MX33( mx ) * VZ( v ) ) );
Lm_B1( A1( ( (int64_t) CV1( cv ) << 12 ) + ( MX11( mx ) * VX( v ) ) ), 0 );
Lm_B2( A2( ( (int64_t) CV2( cv ) << 12 ) + ( MX21( mx ) * VX( v ) ) ), 0 );
Lm_B3( A3( ( (int64_t) CV3( cv ) << 12 ) + ( MX31( mx ) * VX( v ) ) ), 0 );
break;
default:
MAC1 = A1( int44( (int64_t) CV1( cv ) << 12 ) + ( MX11( mx ) * VX( v ) ) + ( MX12( mx ) * VY( v ) ) + ( MX13( mx ) * VZ( v ) ) );
MAC2 = A2( int44( (int64_t) CV2( cv ) << 12 ) + ( MX21( mx ) * VX( v ) ) + ( MX22( mx ) * VY( v ) ) + ( MX23( mx ) * VZ( v ) ) );
MAC3 = A3( int44( (int64_t) CV3( cv ) << 12 ) + ( MX31( mx ) * VX( v ) ) + ( MX32( mx ) * VY( v ) ) + ( MX33( mx ) * VZ( v ) ) );
break;
}
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
return 1;
case 0x13:
GTELOG( pc, "%08x NCDS", gteop );
MAC1 = A1( (int64_t) ( L11 * VX0 ) + ( L12 * VY0 ) + ( L13 * VZ0 ) );
MAC2 = A2( (int64_t) ( L21 * VX0 ) + ( L22 * VY0 ) + ( L23 * VZ0 ) );
MAC3 = A3( (int64_t) ( L31 * VX0 ) + ( L32 * VY0 ) + ( L33 * VZ0 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( ( ( R << 4 ) * IR1 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( ( R << 4 ) * IR1 ) ), 0 ) ) );
MAC2 = A2( ( ( G << 4 ) * IR2 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( ( G << 4 ) * IR2 ) ), 0 ) ) );
MAC3 = A3( ( ( B << 4 ) * IR3 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( ( B << 4 ) * IR3 ) ), 0 ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
return 1;
case 0x14:
GTELOG( pc, "%08x CDP", gteop );
MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( ( ( R << 4 ) * IR1 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( ( R << 4 ) * IR1 ) ), 0 ) ) );
MAC2 = A2( ( ( G << 4 ) * IR2 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( ( G << 4 ) * IR2 ) ), 0 ) ) );
MAC3 = A3( ( ( B << 4 ) * IR3 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( ( B << 4 ) * IR3 ) ), 0 ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
return 1;
case 0x16:
GTELOG( pc, "%08x NCDT", gteop );
for( v = 0; v < 3; v++ )
{
MAC1 = A1( (int64_t) ( L11 * VX( v ) ) + ( L12 * VY( v ) ) + ( L13 * VZ( v ) ) );
MAC2 = A2( (int64_t) ( L21 * VX( v ) ) + ( L22 * VY( v ) ) + ( L23 * VZ( v ) ) );
MAC3 = A3( (int64_t) ( L31 * VX( v ) ) + ( L32 * VY( v ) ) + ( L33 * VZ( v ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( ( ( R << 4 ) * IR1 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( ( R << 4 ) * IR1 ) ), 0 ) ) );
MAC2 = A2( ( ( G << 4 ) * IR2 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( ( G << 4 ) * IR2 ) ), 0 ) ) );
MAC3 = A3( ( ( B << 4 ) * IR3 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( ( B << 4 ) * IR3 ) ), 0 ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
}
return 1;
case 0x1b:
GTELOG( pc, "%08x NCCS", gteop );
MAC1 = A1( (int64_t) ( L11 * VX0 ) + ( L12 * VY0 ) + ( L13 * VZ0 ) );
MAC2 = A2( (int64_t) ( L21 * VX0 ) + ( L22 * VY0 ) + ( L23 * VZ0 ) );
MAC3 = A3( (int64_t) ( L31 * VX0 ) + ( L32 * VY0 ) + ( L33 * VZ0 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( ( R << 4 ) * IR1 );
MAC2 = A2( ( G << 4 ) * IR2 );
MAC3 = A3( ( B << 4 ) * IR3 );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
return 1;
case 0x1c:
GTELOG( pc, "%08x CC", gteop );
MAC1 = A1( int44( ( (int64_t) RBK ) << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
MAC2 = A2( int44( ( (int64_t) GBK ) << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
MAC3 = A3( int44( ( (int64_t) BBK ) << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( ( R << 4 ) * IR1 );
MAC2 = A2( ( G << 4 ) * IR2 );
MAC3 = A3( ( B << 4 ) * IR3 );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
return 1;
case 0x1e:
GTELOG( pc, "%08x NCS", gteop );
MAC1 = A1( (int64_t) ( L11 * VX0 ) + ( L12 * VY0 ) + ( L13 * VZ0 ) );
MAC2 = A2( (int64_t) ( L21 * VX0 ) + ( L22 * VY0 ) + ( L23 * VZ0 ) );
MAC3 = A3( (int64_t) ( L31 * VX0 ) + ( L32 * VY0 ) + ( L33 * VZ0 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
return 1;
case 0x20:
GTELOG( pc, "%08x NCT", gteop );
for( v = 0; v < 3; v++ )
{
MAC1 = A1( (int64_t) ( L11 * VX( v ) ) + ( L12 * VY( v ) ) + ( L13 * VZ( v ) ) );
MAC2 = A2( (int64_t) ( L21 * VX( v ) ) + ( L22 * VY( v ) ) + ( L23 * VZ( v ) ) );
MAC3 = A3( (int64_t) ( L31 * VX( v ) ) + ( L32 * VY( v ) ) + ( L33 * VZ( v ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
}
return 1;
case 0x28:
GTELOG( pc, "%08x SQR", gteop );
MAC1 = A1( IR1 * IR1 );
MAC2 = A2( IR2 * IR2 );
MAC3 = A3( IR3 * IR3 );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
return 1;
case 0x1a: // end of NCDT
case 0x29:
GTELOG( pc, "%08x DPCL", gteop );
MAC1 = A1( ( ( R << 4 ) * IR1 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( ( R << 4 ) * IR1 ) ), 0 ) ) );
MAC2 = A2( ( ( G << 4 ) * IR2 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( ( G << 4 ) * IR2 ) ), 0 ) ) );
MAC3 = A3( ( ( B << 4 ) * IR3 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( ( B << 4 ) * IR3 ) ), 0 ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
return 1;
case 0x2a:
GTELOG( pc, "%08x DPCT", gteop );
for( v = 0; v < 3; v++ )
{
MAC1 = A1( ( R0 << 16 ) + ( IR0 * Lm_B1( A1( ( (int64_t) RFC << 12 ) - ( R0 << 16 ) ), 0 ) ) );
MAC2 = A2( ( G0 << 16 ) + ( IR0 * Lm_B2( A2( ( (int64_t) GFC << 12 ) - ( G0 << 16 ) ), 0 ) ) );
MAC3 = A3( ( B0 << 16 ) + ( IR0 * Lm_B3( A3( ( (int64_t) BFC << 12 ) - ( B0 << 16 ) ), 0 ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
}
return 1;
case 0x2d:
GTELOG( pc, "%08x AVSZ3", gteop );
MAC0 = F( (int64_t) ( ZSF3 * SZ1 ) + ( ZSF3 * SZ2 ) + ( ZSF3 * SZ3 ) );
OTZ = Lm_D( m_mac0, 1 );
return 1;
case 0x2e:
GTELOG( pc, "%08x AVSZ4", gteop );
MAC0 = F( (int64_t) ( ZSF4 * SZ0 ) + ( ZSF4 * SZ1 ) + ( ZSF4 * SZ2 ) + ( ZSF4 * SZ3 ) );
OTZ = Lm_D( m_mac0, 1 );
return 1;
case 0x30:
GTELOG( pc, "%08x RTPT", gteop );
for( v = 0; v < 3; v++ )
{
MAC1 = A1( int44( (int64_t) TRX << 12 ) + ( R11 * VX( v ) ) + ( R12 * VY( v ) ) + ( R13 * VZ( v ) ) );
MAC2 = A2( int44( (int64_t) TRY << 12 ) + ( R21 * VX( v ) ) + ( R22 * VY( v ) ) + ( R23 * VZ( v ) ) );
MAC3 = A3( int44( (int64_t) TRZ << 12 ) + ( R31 * VX( v ) ) + ( R32 * VY( v ) ) + ( R33 * VZ( v ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3_sf( m_mac3, m_sf, lm );
SZ0 = SZ1;
SZ1 = SZ2;
SZ2 = SZ3;
SZ3 = Lm_D( m_mac3, 1 );
h_over_sz3 = Lm_E( gte_divide( H, SZ3 ) );
SXY0 = SXY1;
SXY1 = SXY2;
SX2 = Lm_G1( F( (int64_t) OFX + ( (int64_t) IR1 * h_over_sz3 ) ) >> 16 );
SY2 = Lm_G2( F( (int64_t) OFY + ( (int64_t) IR2 * h_over_sz3 ) ) >> 16 );
}
MAC0 = F( (int64_t) DQB + ( (int64_t) DQA * h_over_sz3 ) );
IR0 = Lm_H( m_mac0, 1 );
return 1;
case 0x3d:
GTELOG( pc, "%08x GPF", gteop );
MAC1 = A1( IR0 * IR1 );
MAC2 = A2( IR0 * IR2 );
MAC3 = A3( IR0 * IR3 );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
return 1;
case 0x3e:
GTELOG( pc, "%08x GPL", gteop );
MAC1 = A1( gte_shift( MAC1, -m_sf ) + ( IR0 * IR1 ) );
MAC2 = A2( gte_shift( MAC2, -m_sf ) + ( IR0 * IR2 ) );
MAC3 = A3( gte_shift( MAC3, -m_sf ) + ( IR0 * IR3 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
return 1;
case 0x3f:
GTELOG( pc, "%08x NCCT", gteop );
for( v = 0; v < 3; v++ )
{
MAC1 = A1( (int64_t) ( L11 * VX( v ) ) + ( L12 * VY( v ) ) + ( L13 * VZ( v ) ) );
MAC2 = A2( (int64_t) ( L21 * VX( v ) ) + ( L22 * VY( v ) ) + ( L23 * VZ( v ) ) );
MAC3 = A3( (int64_t) ( L31 * VX( v ) ) + ( L32 * VY( v ) ) + ( L33 * VZ( v ) ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( int44( (int64_t) RBK << 12 ) + ( LR1 * IR1 ) + ( LR2 * IR2 ) + ( LR3 * IR3 ) );
MAC2 = A2( int44( (int64_t) GBK << 12 ) + ( LG1 * IR1 ) + ( LG2 * IR2 ) + ( LG3 * IR3 ) );
MAC3 = A3( int44( (int64_t) BBK << 12 ) + ( LB1 * IR1 ) + ( LB2 * IR2 ) + ( LB3 * IR3 ) );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
MAC1 = A1( ( R << 4 ) * IR1 );
MAC2 = A2( ( G << 4 ) * IR2 );
MAC3 = A3( ( B << 4 ) * IR3 );
IR1 = Lm_B1( MAC1, lm );
IR2 = Lm_B2( MAC2, lm );
IR3 = Lm_B3( MAC3, lm );
RGB0 = RGB1;
RGB1 = RGB2;
CD2 = CODE;
R2 = Lm_C1( MAC1 >> 4 );
G2 = Lm_C2( MAC2 >> 4 );
B2 = Lm_C3( MAC3 >> 4 );
}
return 1;
}
//popmessage( "unknown GTE op %08x", gteop );
//logerror( "%08x: unknown GTE op %08x\n", pc, gteop );
return 0;
}