Assorted N64 fixes (#8415)

* -rdp: Fixed incorrect channel swapping on 32-bit resampled framebuffers. [Ryan Holtz]
* -rsp: Fixed LWV and VMOV behavior. Added reserved instructions V056, V057, V073, and VNULL. [Ryan Holtz, krom]
* -rdp: Temporarily adjusted framebuffer resampling to not exceed screen bounds in some games. [Ryan Holtz]
* -n64: Fixed SP DMA behavior based on hardware tests. [Ryan Holtz]
* -rsp: Removed unused DRC and SIMD support. General code cleanup. [Ryan Holtz]
* -n64: Pass K4 and K5 factors to threaded drawing code. Fixes black geometry in Conker's Bad Fur Day. [Ryan Holtz]
* -aleck64: Fixed compile issue with previous commits. [Ryan Holtz]
This commit is contained in:
MooglyGuy 2021-08-10 03:06:08 +02:00 committed by GitHub
parent 1fc8bf7fcd
commit 7fe8f9f74c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
44 changed files with 3549 additions and 11869 deletions

View File

@ -13,7 +13,7 @@
-- Dynamic recompiler objects
--------------------------------------------------
DRC_CPUS = { "E1", "SH", "MIPS3", "POWERPC", "RSP", "ARM7", "ADSP21062", "MB86235", "DSP16", "UNSP" }
DRC_CPUS = { "E1", "SH", "MIPS3", "POWERPC", "ARM7", "ADSP21062", "MB86235", "DSP16", "UNSP" }
CPU_INCLUDE_DRC = false
for i, v in ipairs(DRC_CPUS) do
if (CPUS[v]~=null) then
@ -2143,39 +2143,6 @@ if CPUS["RSP"] then
MAME_DIR .. "src/devices/cpu/rsp/rsp.cpp",
MAME_DIR .. "src/devices/cpu/rsp/rsp.h",
MAME_DIR .. "src/devices/cpu/rsp/rspdefs.h",
MAME_DIR .. "src/devices/cpu/rsp/rspdrc.cpp",
MAME_DIR .. "src/devices/cpu/rsp/rspfe.cpp",
MAME_DIR .. "src/devices/cpu/rsp/rspfe.h",
MAME_DIR .. "src/devices/cpu/rsp/rspcp2.cpp",
MAME_DIR .. "src/devices/cpu/rsp/rspcp2.h",
MAME_DIR .. "src/devices/cpu/rsp/rspcp2d.cpp",
MAME_DIR .. "src/devices/cpu/rsp/rspcp2d.h",
MAME_DIR .. "src/devices/cpu/rsp/clamp.h",
MAME_DIR .. "src/devices/cpu/rsp/vabs.h",
MAME_DIR .. "src/devices/cpu/rsp/vadd.h",
MAME_DIR .. "src/devices/cpu/rsp/vaddc.h",
MAME_DIR .. "src/devices/cpu/rsp/vand.h",
MAME_DIR .. "src/devices/cpu/rsp/vch.h",
MAME_DIR .. "src/devices/cpu/rsp/vcl.h",
MAME_DIR .. "src/devices/cpu/rsp/vcmp.h",
MAME_DIR .. "src/devices/cpu/rsp/vcr.h",
MAME_DIR .. "src/devices/cpu/rsp/vdivh.h",
MAME_DIR .. "src/devices/cpu/rsp/vldst.h",
MAME_DIR .. "src/devices/cpu/rsp/vmac.h",
MAME_DIR .. "src/devices/cpu/rsp/vmov.h",
MAME_DIR .. "src/devices/cpu/rsp/vmrg.h",
MAME_DIR .. "src/devices/cpu/rsp/vmudh.h",
MAME_DIR .. "src/devices/cpu/rsp/vmul.h",
MAME_DIR .. "src/devices/cpu/rsp/vmulh.h",
MAME_DIR .. "src/devices/cpu/rsp/vmull.h",
MAME_DIR .. "src/devices/cpu/rsp/vmulm.h",
MAME_DIR .. "src/devices/cpu/rsp/vmuln.h",
MAME_DIR .. "src/devices/cpu/rsp/vor.h",
MAME_DIR .. "src/devices/cpu/rsp/vrcpsq.h",
MAME_DIR .. "src/devices/cpu/rsp/vrsq.h",
MAME_DIR .. "src/devices/cpu/rsp/vsub.h",
MAME_DIR .. "src/devices/cpu/rsp/vsubc.h",
MAME_DIR .. "src/devices/cpu/rsp/vxor.h",
MAME_DIR .. "src/devices/cpu/rsp/rspdiv.h",
}
end

View File

@ -1,37 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
static inline rsp_vec_t sclamp_acc_to_mid(rsp_vec_t acc_mid, rsp_vec_t acc_hi)
{
return _mm_packs_epi32(
_mm_unpacklo_epi16(acc_mid, acc_hi),
_mm_unpackhi_epi16(acc_mid, acc_hi)
);
}
static inline rsp_vec_t uclamp_acc(rsp_vec_t val, rsp_vec_t acc_mid, rsp_vec_t acc_hi, rsp_vec_t zero)
{
rsp_vec_t hi_negative = _mm_srai_epi16(acc_hi, 15); // 0x0000
rsp_vec_t mid_negative = _mm_srai_epi16(acc_mid, 15); // 0xffff
// We don't have to clamp if the HI part of the
// accumulator is sign-extended down to the MD part.
rsp_vec_t hi_sign_check = _mm_cmpeq_epi16(hi_negative, acc_hi); // 0x0000
rsp_vec_t mid_sign_check = _mm_cmpeq_epi16(hi_negative, mid_negative); // 0x0000
rsp_vec_t clamp_mask = _mm_and_si128(mid_sign_check, hi_sign_check); // 0x0000
// Generate the value in the event we need to clamp.
// * hi_negative, mid_sign => xxxx
// * hi_negative, !mid_sign => 0000
// * !hi_negative, mid_sign => FFFF
// * !hi_negative, !mid_sign => xxxx
rsp_vec_t clamped_val = _mm_cmpeq_epi16(hi_negative, zero); // 0xffff
#if (defined(__SSE4_1__) || defined(_MSC_VER))
return _mm_blendv_epi8(clamped_val, val, clamp_mask);
#else
clamped_val = _mm_and_si128(clamp_mask, val);
val = _mm_andnot_si128(clamp_mask, clamped_val);
return _mm_or_si128(val, clamped_val);
#endif
}

File diff suppressed because it is too large Load Diff

View File

@ -14,9 +14,6 @@
#pragma once
#include "cpu/drcfe.h"
#include "cpu/drcuml.h"
/***************************************************************************
REGISTER ENUMERATION
***************************************************************************/
@ -81,14 +78,9 @@ enum
#define RSP_STATUS_SIGNAL6 0x2000
#define RSP_STATUS_SIGNAL7 0x4000
#define RSPDRC_STRICT_VERIFY 0x0001 /* verify all instructions */
class rsp_device : public cpu_device
{
class frontend;
class cop2;
class cop2_drc;
public:
// construction/destruction
@ -102,24 +94,6 @@ public:
auto sp_reg_w() { return m_sp_reg_w_func.bind(); }
auto status_set() { return m_sp_set_status_func.bind(); }
void rspdrc_flush_drc_cache();
void rspdrc_set_options(uint32_t options);
void rsp_add_dmem(uint32_t *base);
void rsp_add_imem(uint32_t *base);
void ccfunc_read8();
void ccfunc_read16();
void ccfunc_read32();
void ccfunc_write8();
void ccfunc_write16();
void ccfunc_write32();
void ccfunc_get_cop0_reg();
void ccfunc_set_cop0_reg();
void ccfunc_sp_set_status_cb();
void ccfunc_unimplemented();
uint8_t* get_dmem() { return m_dmem8; }
protected:
// device-level overrides
virtual void device_start() override;
@ -146,95 +120,56 @@ protected:
void unimplemented_opcode(uint32_t op);
/* internal compiler state */
struct compiler_state
{
compiler_state &operator=(compiler_state &) = delete;
uint32_t cycles; /* accumulated cycles */
uint8_t checkints; /* need to check interrupts before next instruction */
uint8_t checksoftints; /* need to check software interrupts before next instruction */
uml::code_label labelnum; /* index for local labels */
};
private:
address_space_config m_program_config;
address_space_config m_imem_config;
address_space_config m_dmem_config;
/* fast RAM info */
struct fast_ram_info
{
offs_t start; /* start of the RAM block */
offs_t end; /* end of the RAM block */
bool readonly; /* true if read-only */
void * base; /* base in memory where the RAM lives */
};
uint16_t m_pc;
uint32_t m_r[35];
int m_icount;
int m_ideduct;
bool m_scalar_busy;
bool m_vector_busy;
bool m_paired_busy;
/* core state */
drc_cache m_cache; /* pointer to the DRC code cache */
std::unique_ptr<drcuml_state> m_drcuml; /* DRC UML generator state */
std::unique_ptr<frontend> m_drcfe; /* pointer to the DRC front-end state */
uint32_t m_drcoptions; /* configurable DRC options */
/* internal stuff */
uint8_t m_cache_dirty; /* true if we need to flush the cache */
/* parameters for subroutines */
uint64_t m_numcycles; /* return value from gettotalcycles */
const char * m_format; /* format string for print_debug */
uint32_t m_arg2; /* print_debug argument 3 */
uint32_t m_arg3; /* print_debug argument 4 */
/* register mappings */
uml::parameter m_regmap[34]; /* parameter to register mappings for all 32 integer registers */
/* subroutines */
uml::code_handle * m_entry; /* entry point */
uml::code_handle * m_nocode; /* nocode exception handler */
uml::code_handle * m_out_of_cycles; /* out of cycles exception handler */
uml::code_handle * m_read8; /* read byte */
uml::code_handle * m_write8; /* write byte */
uml::code_handle * m_read16; /* read half */
uml::code_handle * m_write16; /* write half */
uml::code_handle * m_read32; /* read word */
uml::code_handle * m_write32; /* write word */
struct internal_rsp_state
{
uint32_t pc;
uint32_t r[35];
uint32_t arg0;
uint32_t arg1;
uint32_t jmpdest;
int icount;
};
internal_rsp_state *m_rsp_state;
void update_scalar_op_deduction();
void update_vector_op_deduction();
FILE *m_exec_output;
uint32_t m_sr;
uint32_t m_step_count;
uint32_t m_ppc;
uint32_t m_nextpc;
uint16_t m_ppc;
uint16_t m_nextpc;
protected:
memory_access<32, 2, 0, ENDIANNESS_BIG>::cache m_pcache;
memory_access<32, 2, 0, ENDIANNESS_BIG>::specific m_program;
memory_access<12, 2, 0, ENDIANNESS_BIG>::cache m_icache;
memory_access<12, 2, 0, ENDIANNESS_BIG>::specific m_imem;
memory_access<12, 2, 0, ENDIANNESS_BIG>::cache m_dcache;
memory_access<12, 2, 0, ENDIANNESS_BIG>::specific m_dmem;
private:
std::unique_ptr<cop2> m_cop2;
union VECTOR_REG
{
uint64_t d[2];
uint32_t l[4];
uint16_t w[8];
int16_t s[8];
uint8_t b[16];
};
uint32_t *m_dmem32;
uint16_t *m_dmem16;
uint8_t *m_dmem8;
uint32_t *m_imem32;
uint16_t *m_imem16;
uint8_t *m_imem8;
union ACCUMULATOR_REG
{
uint64_t q;
uint32_t l[2];
uint16_t w[4];
};
uint32_t m_debugger_temp;
bool m_isdrc;
uint16_t m_pc_temp;
uint16_t m_ppc_temp;
uint16_t m_nextpc_temp;
devcb_read32 m_dp_reg_r_func;
devcb_write32 m_dp_reg_w_func;
@ -242,42 +177,40 @@ private:
devcb_write32 m_sp_reg_w_func;
devcb_write32 m_sp_set_status_func;
uint8_t READ8(uint32_t address);
uint16_t READ16(uint32_t address);
uint32_t READ32(uint32_t address);
void WRITE8(uint32_t address, uint8_t data);
void WRITE16(uint32_t address, uint16_t data);
void WRITE32(uint32_t address, uint32_t data);
uint8_t read_dmem_byte(uint32_t address);
uint16_t read_dmem_word(uint32_t address);
uint32_t read_dmem_dword(uint32_t address);
void write_dmem_byte(uint32_t address, uint8_t data);
void write_dmem_word(uint32_t address, uint16_t data);
void write_dmem_dword(uint32_t address, uint32_t data);
uint32_t get_cop0_reg(int reg);
void set_cop0_reg(int reg, uint32_t data);
void load_fast_iregs(drcuml_block &block);
void save_fast_iregs(drcuml_block &block);
uint8_t DM_READ8(uint32_t address);
uint16_t DM_READ16(uint32_t address);
uint32_t DM_READ32(uint32_t address);
void DM_WRITE8(uint32_t address, uint8_t data);
void DM_WRITE16(uint32_t address, uint16_t data);
void DM_WRITE32(uint32_t address, uint32_t data);
void rspcom_init();
void execute_run_drc();
void code_flush_cache();
void code_compile_block(offs_t pc);
void static_generate_entry_point();
void static_generate_nocode_handler();
void static_generate_out_of_cycles();
void static_generate_memory_accessor(int size, int iswrite, const char *name, uml::code_handle *&handleptr);
void generate_update_cycles(drcuml_block &block, compiler_state &compiler, uml::parameter param, bool allow_exception);
void generate_checksum_block(drcuml_block &block, compiler_state &compiler, const opcode_desc *seqhead, const opcode_desc *seqlast);
void generate_sequence_instruction(drcuml_block &block, compiler_state &compiler, const opcode_desc *desc);
void generate_delay_slot_and_branch(drcuml_block &block, compiler_state &compiler, const opcode_desc *desc, uint8_t linkreg);
void generate_branch(drcuml_block &block, compiler_state &compiler, const opcode_desc *desc);
bool generate_opcode(drcuml_block &block, compiler_state &compiler, const opcode_desc *desc);
bool generate_special(drcuml_block &block, compiler_state &compiler, const opcode_desc *desc);
bool generate_regimm(drcuml_block &block, compiler_state &compiler, const opcode_desc *desc);
bool generate_cop0(drcuml_block &block, compiler_state &compiler, const opcode_desc *desc);
void log_add_disasm_comment(drcuml_block &block, uint32_t pc, uint32_t op);
};
// COP2 (vectors)
uint16_t SATURATE_ACCUM(int accum, int slice, uint16_t negative, uint16_t positive);
uint16_t m_vres[8];
VECTOR_REG m_v[32];
ACCUMULATOR_REG m_accum[8];
uint8_t m_vcarry;
uint8_t m_vcompare;
uint8_t m_vclip1;
uint8_t m_vzero;
uint8_t m_vclip2;
int32_t m_reciprocal_res;
uint32_t m_reciprocal_high;
int32_t m_dp_allowed;
void handle_cop2(uint32_t op);
void handle_lwc2(uint32_t op);
void handle_swc2(uint32_t op);
void handle_vector_ops(uint32_t op);
uint32_t m_div_in;
uint32_t m_div_out;
};
DECLARE_DEVICE_TYPE(RSP, rsp_device)

View File

@ -157,6 +157,8 @@ void rsp_disassembler::disasm_cop2(std::ostream &stream, uint32_t op)
case 0x2b: util::stream_format(stream, "vnor %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break;
case 0x2c: util::stream_format(stream, "vxor %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break;
case 0x2d: util::stream_format(stream, "vnxor %s, %s, %s%s", vreg[dest], vreg[s1], vreg[s2], element[el]); break;
case 0x2e: util::stream_format(stream, "v056 %s, %s[%c]", vreg[dest], vreg[s2], element2[el][s1 & 7]); break;
case 0x2f: util::stream_format(stream, "v057 %s, %s[%c]", vreg[dest], vreg[s2], element2[el][s1 & 7]); break;
case 0x30: util::stream_format(stream, "vrcp %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break;
case 0x31: util::stream_format(stream, "vrcpl %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break;
case 0x32: util::stream_format(stream, "vrcph %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break;
@ -165,6 +167,8 @@ void rsp_disassembler::disasm_cop2(std::ostream &stream, uint32_t op)
case 0x35: util::stream_format(stream, "vrsql %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break;
case 0x36: util::stream_format(stream, "vrsqh %s[%d], %s[%c]", vreg[dest], s1 & 7, vreg[s2], element2[el][7-(s1 & 7)]); break;
case 0x37: util::stream_format(stream, "vnop"); break;
case 0x3b: util::stream_format(stream, "v073 %s, %s[%c]", vreg[dest], vreg[s2], element2[el][s1 & 7]); break;
case 0x3f: util::stream_format(stream, "vnull"); break;
default: util::stream_format(stream, "??? (VECTOR OP)"); break;
}
break;

File diff suppressed because it is too large Load Diff

View File

@ -1,395 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Ryan Holtz,Tyler J. Stachecki
/***************************************************************************
rspcp2.h
Interface file for Reality Signal Processor (RSP) vector extensions.
***************************************************************************/
#ifndef MAME_CPU_RSP_RSPCP2_H
#define MAME_CPU_RSP_RSPCP2_H
#pragma once
#include "rsp.h"
#include "rspdiv.h"
#include "cpu/drcuml.h"
#define SIMD_OFF (1)
#if (defined(__SSE2__) || defined(__SSE3__) || defined(__SSSE3__) || defined(__SSE4_1__) || defined(__SSE4_2__))
#define SSE_AVAILABLE (1)
#else
#define SSE_AVAILABLE (0)
#endif
#if (SSE_AVAILABLE || defined(_MSC_VER)) && defined(PTR64) && !SIMD_OFF
#define USE_SIMD (1)
#else
#define USE_SIMD (0)
#endif
#if USE_SIMD
#ifdef _MSC_VER
#define __align16 __declspec(align(16))
#else
#define __align16 __attribute__((aligned(16)))
#endif
#if (defined(__SSE4_2__) || defined(_MSC_VER))
#include <nmmintrin.h>
#elif (defined(__SSE4_1__) || defined(_MSC_VER))
#include <smmintrin.h>
#elif (defined(__SSSE3__) || defined(_MSC_VER))
#include <tmmintrin.h>
#elif (defined(__SSE3__ ) || defined(_MSC_VER))
#include <pmmintrin.h>
#else
#include <emmintrin.h>
#endif
typedef __m128i rsp_vec_t;
#endif
union VECTOR_REG
{
uint64_t d[2];
uint32_t l[4];
uint16_t s[8];
uint8_t b[16];
#if USE_SIMD
rsp_vec_t v;
#endif
};
union ACCUMULATOR_REG
{
uint64_t q;
uint32_t l[2];
uint16_t w[4];
};
struct compiler_state;
class rsp_device::cop2
{
friend class rsp_device;
public:
cop2(rsp_device &rsp, running_machine &machine);
protected:
virtual void init();
virtual void start();
virtual bool generate_cop2(drcuml_block &block, rsp_device::compiler_state &compiler, const opcode_desc *desc) { return true; }
virtual bool generate_lwc2(drcuml_block &block, rsp_device::compiler_state &compiler, const opcode_desc *desc) { return true; }
virtual bool generate_swc2(drcuml_block &block, rsp_device::compiler_state &compiler, const opcode_desc *desc) { return true; }
virtual void state_string_export(const int index, std::string &str) const;
public:
virtual ~cop2();
virtual void lbv() { }
virtual void lsv() { }
virtual void llv() { }
virtual void ldv() { }
virtual void lqv() { }
virtual void lrv() { }
virtual void lpv() { }
virtual void luv() { }
virtual void lhv() { }
virtual void lfv() { }
virtual void lwv() { }
virtual void ltv() { }
virtual void sbv() { }
virtual void ssv() { }
virtual void slv() { }
virtual void sdv() { }
virtual void sqv() { }
virtual void srv() { }
virtual void spv() { }
virtual void suv() { }
virtual void shv() { }
virtual void sfv() { }
virtual void swv() { }
virtual void stv() { }
virtual void vmulf() { }
virtual void vmulu() { }
virtual void vmudl() { }
virtual void vmudm() { }
virtual void vmudn() { }
virtual void vmudh() { }
virtual void vmacf() { }
virtual void vmacu() { }
virtual void vmadl() { }
virtual void vmadm() { }
virtual void vmadn() { }
virtual void vmadh() { }
virtual void vadd() { }
virtual void vsub() { }
virtual void vabs() { }
virtual void vaddc() { }
virtual void vsubc() { }
virtual void vaddb() { }
virtual void vsaw() { }
virtual void vlt() { }
virtual void veq() { }
virtual void vne() { }
virtual void vge() { }
virtual void vcl() { }
virtual void vch() { }
virtual void vcr() { }
virtual void vmrg() { }
virtual void vand() { }
virtual void vnand() { }
virtual void vor() { }
virtual void vnor() { }
virtual void vxor() { }
virtual void vnxor() { }
virtual void vrcp() { }
virtual void vrcpl() { }
virtual void vrcph() { }
virtual void vmov() { }
virtual void vrsql() { }
virtual void vrsqh() { }
virtual void vrsq() { }
virtual void mfc2();
virtual void cfc2();
virtual void mtc2();
virtual void ctc2();
virtual void handle_cop2(uint32_t op);
void log_instruction_execution();
virtual void cfunc_unimplemented_opcode() { }
void dump(uint32_t op);
void dump_dmem();
protected:
virtual bool generate_vector_opcode(drcuml_block &block, rsp_device::compiler_state &compiler, const opcode_desc *desc) { return true; }
uint16_t SATURATE_ACCUM(int accum, int slice, uint16_t negative, uint16_t positive);
// Data that needs to be stored close to the generated DRC code
struct internal_rspcop2_state
{
uint32_t op;
};
internal_rspcop2_state *m_rspcop2_state;
rsp_device& m_rsp;
running_machine& m_machine;
uint32_t m_vres[8]; /* used for temporary vector results */
#if USE_SIMD
__align16 VECTOR_REG m_v[32];
#else
VECTOR_REG m_v[32];
#endif
ACCUMULATOR_REG m_accum[8];
uint16_t m_vflag[6][8];
int32_t m_reciprocal_res;
uint32_t m_reciprocal_high;
int32_t m_dp_allowed;
#if USE_SIMD
enum rsp_flags_t {
RSP_VCO = 0,
RSP_VCC = 1,
RSP_VCE = 2
};
enum rsp_acc_t {
RSP_ACC_LO = 16,
RSP_ACC_MD = 8,
RSP_ACC_HI = 0,
};
enum rsp_mem_request_type {
RSP_MEM_REQUEST_NONE,
RSP_MEM_REQUEST_INT_MEM,
RSP_MEM_REQUEST_VECTOR,
RSP_MEM_REQUEST_FOURTH,
RSP_MEM_REQUEST_HALF,
RSP_MEM_REQUEST_PACK,
RSP_MEM_REQUEST_QUAD,
RSP_MEM_REQUEST_REST,
RSP_MEM_REQUEST_UPACK
};
union aligned_rsp_1vect_t {
rsp_vec_t __align;
uint16_t s[8];
};
union aligned_rsp_2vect_t {
rsp_vec_t __align[2];
uint16_t s[16];
};
union aligned_rsp_3vect_t {
rsp_vec_t __align[3];
uint16_t s[24];
};
__align16 aligned_rsp_1vect_t m_vdqm;
__align16 aligned_rsp_2vect_t m_flags[3];
__align16 aligned_rsp_3vect_t m_acc;
uint32_t m_dp_flag;
typedef struct
{
rsp_vec_t dummy_for_alignment;
const uint16_t logic_mask[2][8];
const uint16_t vrsq_mask_table[8][8];
const uint16_t shuffle_keys[16][8];
const uint16_t sll_b2l_keys[16][8];
const uint16_t sll_l2b_keys[16][8];
const uint16_t srl_b2l_keys[16][8];
const uint16_t ror_b2l_keys[16][8];
const uint16_t rol_l2b_keys[16][8];
const uint16_t ror_l2b_keys[16][8];
const uint16_t qr_lut[16][8];
const uint16_t bdls_lut[4][4];
const uint16_t word_reverse[8];
} vec_helpers_t;
static const vec_helpers_t m_vec_helpers;
rsp_vec_t vec_load_and_shuffle_operand(const uint16_t* src, uint32_t element);
static inline uint32_t sign_extend_6(int32_t i) {
return ((i << (32 - 7)) >> (32 - 7)) & 0xfff;
}
static inline rsp_vec_t vec_load_unshuffled_operand(const void* src)
{
return _mm_load_si128((rsp_vec_t*) src);
}
static inline void vec_write_operand(uint16_t* dest, rsp_vec_t src)
{
_mm_store_si128((rsp_vec_t*) dest, src);
}
static inline rsp_vec_t read_acc_lo(const uint16_t* acc)
{
return vec_load_unshuffled_operand(acc + sizeof(rsp_vec_t));
}
static inline rsp_vec_t read_acc_mid(const uint16_t* acc)
{
return vec_load_unshuffled_operand(acc + (sizeof(rsp_vec_t) >> 1));
}
static inline rsp_vec_t read_acc_hi(const void* acc)
{
return vec_load_unshuffled_operand(acc);
}
static inline rsp_vec_t read_vcc_lo(const uint16_t *vcc)
{
return vec_load_unshuffled_operand(vcc + (sizeof(rsp_vec_t) >> 1));
}
static inline rsp_vec_t read_vcc_hi(const uint16_t *vcc)
{
return vec_load_unshuffled_operand(vcc);
}
static inline rsp_vec_t read_vco_lo(const uint16_t *vco)
{
return vec_load_unshuffled_operand(vco + (sizeof(rsp_vec_t) >> 1));
}
static inline rsp_vec_t read_vco_hi(const uint16_t *vco)
{
return vec_load_unshuffled_operand(vco);
}
static inline rsp_vec_t read_vce(const uint16_t *vce)
{
return vec_load_unshuffled_operand(vce + (sizeof(rsp_vec_t) >> 1));
}
static inline void write_acc_lo(uint16_t *acc, rsp_vec_t acc_lo)
{
return vec_write_operand(acc + sizeof(rsp_vec_t), acc_lo);
}
static inline void write_acc_mid(uint16_t *acc, rsp_vec_t acc_mid)
{
return vec_write_operand(acc + (sizeof(rsp_vec_t) >> 1), acc_mid);
}
static inline void write_acc_hi(uint16_t *acc, rsp_vec_t acc_hi)
{
return vec_write_operand(acc, acc_hi);
}
static inline void write_vcc_lo(uint16_t *vcc, rsp_vec_t vcc_lo)
{
return vec_write_operand(vcc + (sizeof(rsp_vec_t) >> 1), vcc_lo);
}
static inline void write_vcc_hi(uint16_t *vcc, rsp_vec_t vcc_hi)
{
return vec_write_operand(vcc, vcc_hi);
}
static inline void write_vco_lo(uint16_t *vcc, rsp_vec_t vco_lo)
{
return vec_write_operand(vcc + (sizeof(rsp_vec_t) >> 1), vco_lo);
}
static inline void write_vco_hi(uint16_t *vcc, rsp_vec_t vco_hi)
{
return vec_write_operand(vcc, vco_hi);
}
static inline void write_vce(uint16_t *vce, rsp_vec_t vce_r)
{
return vec_write_operand(vce + (sizeof(rsp_vec_t) >> 1), vce_r);
}
static inline int16_t get_flags(const uint16_t *flags)
{
return _mm_movemask_epi8(_mm_packs_epi16(_mm_load_si128((rsp_vec_t*) (flags + (sizeof(rsp_vec_t) >> 1))), _mm_load_si128((rsp_vec_t*) flags)));
}
static inline rsp_vec_t vec_zero()
{
return _mm_setzero_si128();
}
void vec_load_group1(uint32_t addr, uint32_t element, uint16_t* regp, rsp_vec_t reg, rsp_vec_t dqm);
void vec_load_group2(uint32_t addr, uint32_t element, uint16_t* regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type);
void vec_load_group4(uint32_t addr, uint32_t element, uint16_t* regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type);
void vec_store_group1(uint32_t addr, uint32_t element, uint16_t* regp, rsp_vec_t reg, rsp_vec_t dqm);
void vec_store_group2(uint32_t addr, uint32_t element, uint16_t* regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type);
void vec_store_group4(uint32_t addr, uint32_t element, uint16_t* regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type);
#include "clamp.h"
#include "vabs.h"
#include "vadd.h"
#include "vaddc.h"
#include "vand.h"
#include "vch.h"
#include "vcmp.h"
#include "vcl.h"
#include "vcr.h"
#include "vdivh.h"
#include "vmac.h"
#include "vmov.h"
#include "vmrg.h"
#include "vmul.h"
#include "vmulh.h"
#include "vmull.h"
#include "vmulm.h"
#include "vmuln.h"
#include "vor.h"
#include "vrcpsq.h"
#include "vrsq.h"
#include "vsub.h"
#include "vsubc.h"
#include "vxor.h"
#include "vldst.h"
#endif
private:
void handle_lwc2(uint32_t op);
void handle_swc2(uint32_t op);
void handle_vector_ops(uint32_t op);
uint32_t m_div_in;
uint32_t m_div_out;
};
#endif // MAME_CPU_RSP_RSPCP2_H

File diff suppressed because it is too large Load Diff

View File

@ -1,179 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Ryan Holtz
/***************************************************************************
rspcp2d.h
Interface file for Reality Signal Processor (RSP) vector extensions
using Universal Machine Language (UML) dynamic recompilation.
***************************************************************************/
#ifndef MAME_CPU_RSP_RSPCP2D_H
#define MAME_CPU_RSP_RSPCP2D_H
#pragma once
#include "rsp.h"
#include "rspcp2.h"
#include "cpu/drcuml.h"
class rsp_device::cop2_drc : public rsp_device::cop2
{
friend class rsp_device;
public:
cop2_drc(rsp_device &rsp, running_machine &machine) : cop2(rsp, machine) { }
private:
virtual bool generate_cop2(drcuml_block &block, rsp_device::compiler_state &compiler, const opcode_desc *desc) override;
virtual bool generate_lwc2(drcuml_block &block, rsp_device::compiler_state &compiler, const opcode_desc *desc) override;
virtual bool generate_swc2(drcuml_block &block, rsp_device::compiler_state &compiler, const opcode_desc *desc) override;
virtual void state_string_export(const int index, std::string &str) const override;
void cfunc_unimplemented_opcode() override;
static void unimplemented_opcode(void *param) { ((cop2 *)param)->cfunc_unimplemented_opcode(); }
static void cfunc_lbv(void *param) { ((cop2 *)param)->lbv(); }
static void cfunc_lsv(void *param) { ((cop2 *)param)->lsv(); }
static void cfunc_llv(void *param) { ((cop2 *)param)->llv(); }
static void cfunc_ldv(void *param) { ((cop2 *)param)->ldv(); }
static void cfunc_lqv(void *param) { ((cop2 *)param)->lqv(); }
static void cfunc_lrv(void *param) { ((cop2 *)param)->lrv(); }
static void cfunc_lpv(void *param) { ((cop2 *)param)->lpv(); }
static void cfunc_luv(void *param) { ((cop2 *)param)->luv(); }
static void cfunc_lhv(void *param) { ((cop2 *)param)->lhv(); }
static void cfunc_lfv(void *param) { ((cop2 *)param)->lfv(); }
static void cfunc_lwv(void *param) { ((cop2 *)param)->lwv(); }
static void cfunc_ltv(void *param) { ((cop2 *)param)->ltv(); }
static void cfunc_sbv(void *param) { ((cop2 *)param)->sbv(); }
static void cfunc_ssv(void *param) { ((cop2 *)param)->ssv(); }
static void cfunc_slv(void *param) { ((cop2 *)param)->slv(); }
static void cfunc_sdv(void *param) { ((cop2 *)param)->sdv(); }
static void cfunc_sqv(void *param) { ((cop2 *)param)->sqv(); }
static void cfunc_srv(void *param) { ((cop2 *)param)->srv(); }
static void cfunc_spv(void *param) { ((cop2 *)param)->spv(); }
static void cfunc_suv(void *param) { ((cop2 *)param)->suv(); }
static void cfunc_shv(void *param) { ((cop2 *)param)->shv(); }
static void cfunc_sfv(void *param) { ((cop2 *)param)->sfv(); }
static void cfunc_swv(void *param) { ((cop2 *)param)->swv(); }
static void cfunc_stv(void *param) { ((cop2 *)param)->stv(); }
static void cfunc_vmulf(void *param) { ((cop2 *)param)->vmulf(); }
static void cfunc_vmulu(void *param) { ((cop2 *)param)->vmulu(); }
static void cfunc_vmudl(void *param) { ((cop2 *)param)->vmudl(); }
static void cfunc_vmudm(void *param) { ((cop2 *)param)->vmudm(); }
static void cfunc_vmudn(void *param) { ((cop2 *)param)->vmudn(); }
static void cfunc_vmudh(void *param) { ((cop2 *)param)->vmudh(); }
static void cfunc_vmacf(void *param) { ((cop2 *)param)->vmacf(); }
static void cfunc_vmacu(void *param) { ((cop2 *)param)->vmacu(); }
static void cfunc_vmadl(void *param) { ((cop2 *)param)->vmadl(); }
static void cfunc_vmadm(void *param) { ((cop2 *)param)->vmadm(); }
static void cfunc_vmadn(void *param) { ((cop2 *)param)->vmadn(); }
static void cfunc_vmadh(void *param) { ((cop2 *)param)->vmadh(); }
static void cfunc_vadd(void *param) { ((cop2 *)param)->vadd(); }
static void cfunc_vsub(void *param) { ((cop2 *)param)->vsub(); }
static void cfunc_vabs(void *param) { ((cop2 *)param)->vabs(); }
static void cfunc_vaddc(void *param) { ((cop2 *)param)->vaddc(); }
static void cfunc_vsubc(void *param) { ((cop2 *)param)->vsubc(); }
static void cfunc_vaddb(void *param) { ((cop2 *)param)->vaddb(); }
static void cfunc_vsaw(void *param) { ((cop2 *)param)->vsaw(); }
static void cfunc_vlt(void *param) { ((cop2 *)param)->vlt(); }
static void cfunc_veq(void *param) { ((cop2 *)param)->veq(); }
static void cfunc_vne(void *param) { ((cop2 *)param)->vne(); }
static void cfunc_vge(void *param) { ((cop2 *)param)->vge(); }
static void cfunc_vcl(void *param) { ((cop2 *)param)->vcl(); }
static void cfunc_vch(void *param) { ((cop2 *)param)->vch(); }
static void cfunc_vcr(void *param) { ((cop2 *)param)->vcr(); }
static void cfunc_vmrg(void *param) { ((cop2 *)param)->vmrg(); }
static void cfunc_vand(void *param) { ((cop2 *)param)->vand(); }
static void cfunc_vnand(void *param) { ((cop2 *)param)->vnand(); }
static void cfunc_vor(void *param) { ((cop2 *)param)->vor(); }
static void cfunc_vnor(void *param) { ((cop2 *)param)->vnor(); }
static void cfunc_vxor(void *param) { ((cop2 *)param)->vxor(); }
static void cfunc_vnxor(void *param) { ((cop2 *)param)->vnxor(); }
static void cfunc_vrcp(void *param) { ((cop2 *)param)->vrcp(); }
static void cfunc_vrcpl(void *param) { ((cop2 *)param)->vrcpl(); }
static void cfunc_vrcph(void *param) { ((cop2 *)param)->vrcph(); }
static void cfunc_vmov(void *param) { ((cop2 *)param)->vmov(); }
static void cfunc_vrsq(void *param) { ((cop2 *)param)->vrsq(); }
static void cfunc_vrsql(void *param) { ((cop2 *)param)->vrsql(); }
static void cfunc_vrsqh(void *param) { ((cop2 *)param)->vrsqh(); }
static void cfunc_mfc2(void *param) { ((cop2 *)param)->mfc2(); }
static void cfunc_cfc2(void *param) { ((cop2 *)param)->cfc2(); }
static void cfunc_mtc2(void *param) { ((cop2 *)param)->mtc2(); }
static void cfunc_ctc2(void *param) { ((cop2 *)param)->ctc2(); }
public:
virtual void lbv() override;
virtual void lsv() override;
virtual void llv() override;
virtual void ldv() override;
virtual void lqv() override;
virtual void lrv() override;
virtual void lpv() override;
virtual void luv() override;
virtual void lhv() override;
virtual void lfv() override;
virtual void lwv() override;
virtual void ltv() override;
virtual void sbv() override;
virtual void ssv() override;
virtual void slv() override;
virtual void sdv() override;
virtual void sqv() override;
virtual void srv() override;
virtual void spv() override;
virtual void suv() override;
virtual void shv() override;
virtual void sfv() override;
virtual void swv() override;
virtual void stv() override;
virtual void vmulf() override;
virtual void vmulu() override;
virtual void vmudl() override;
virtual void vmudm() override;
virtual void vmudn() override;
virtual void vmudh() override;
virtual void vmacf() override;
virtual void vmacu() override;
virtual void vmadl() override;
virtual void vmadm() override;
virtual void vmadn() override;
virtual void vmadh() override;
virtual void vadd() override;
virtual void vsub() override;
virtual void vabs() override;
virtual void vaddc() override;
virtual void vsubc() override;
virtual void vaddb() override;
virtual void vsaw() override;
virtual void vlt() override;
virtual void veq() override;
virtual void vne() override;
virtual void vge() override;
virtual void vcl() override;
virtual void vch() override;
virtual void vcr() override;
virtual void vmrg() override;
virtual void vand() override;
virtual void vnand() override;
virtual void vor() override;
virtual void vnor() override;
virtual void vxor() override;
virtual void vnxor() override;
virtual void vrcp() override;
virtual void vrcpl() override;
virtual void vrcph() override;
virtual void vmov() override;
virtual void vrsql() override;
virtual void vrsqh() override;
virtual void vrsq() override;
virtual void mfc2() override;
virtual void cfc2() override;
virtual void mtc2() override;
virtual void ctc2() override;
private:
virtual bool generate_vector_opcode(drcuml_block &block, rsp_device::compiler_state &compiler, const opcode_desc *desc) override;
};
#endif // MAME_CPU_RSP_RSPCP2D_H

File diff suppressed because it is too large Load Diff

View File

@ -1,307 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Ryan Holtz
/***************************************************************************
rspfe.c
Front-end for RSP recompiler
***************************************************************************/
#include "emu.h"
#include "rspfe.h"
#include "rspdefs.h"
//**************************************************************************
// RSP FRONTEND
//**************************************************************************
//-------------------------------------------------
// rsp_device::frontend - constructor
//-------------------------------------------------
rsp_device::frontend::frontend(rsp_device &rsp, uint32_t window_start, uint32_t window_end, uint32_t max_sequence)
: drc_frontend(rsp, window_start, window_end, max_sequence), m_rsp(rsp)
{
}
//-------------------------------------------------
// describe - build a description of a single
// instruction
//-------------------------------------------------
bool rsp_device::frontend::describe(opcode_desc &desc, const opcode_desc *prev)
{
uint32_t op, opswitch;
// fetch the opcode
op = desc.opptr.l[0] = m_rsp.m_pcache.read_dword((desc.physpc & 0x00000fff) | 0x1000);
// all instructions are 4 bytes and default to a single cycle each
desc.length = 4;
desc.cycles = 1;
// parse the instruction
opswitch = op >> 26;
switch (opswitch)
{
case 0x00: // SPECIAL
return describe_special(op, desc);
case 0x01: // REGIMM
return describe_regimm(op, desc);
case 0x10: // COP0
return describe_cop0(op, desc);
case 0x12: // COP2
return describe_cop2(op, desc);
case 0x02: // J
desc.flags |= OPFLAG_IS_UNCONDITIONAL_BRANCH | OPFLAG_END_SEQUENCE;
desc.targetpc = ((LIMMVAL << 2) & 0x00000fff) | 0x1000;
desc.delayslots = 1;
return true;
case 0x03: // JAL
desc.regout[0] |= REGFLAG_R(31);
desc.flags |= OPFLAG_IS_UNCONDITIONAL_BRANCH | OPFLAG_END_SEQUENCE;
desc.targetpc = ((LIMMVAL << 2) & 0x00000fff) | 0x1000;
desc.delayslots = 1;
return true;
case 0x04: // BEQ
case 0x05: // BNE
if ((opswitch == 0x04 || opswitch == 0x14) && RSREG == RTREG)
desc.flags |= OPFLAG_IS_UNCONDITIONAL_BRANCH | OPFLAG_END_SEQUENCE;
else
{
desc.regin[0] |= REGFLAG_R(RSREG) | REGFLAG_R(RTREG);
desc.flags |= OPFLAG_IS_CONDITIONAL_BRANCH;
}
desc.targetpc = ((desc.pc + 4 + SIMMVAL * 4) & 0x00000fff) | 0x1000;
desc.delayslots = 1;
desc.skipslots = (opswitch & 0x10) ? 1 : 0;
return true;
case 0x06: // BLEZ
case 0x07: // BGTZ
if ((opswitch == 0x06 || opswitch == 0x16) && RSREG == 0)
desc.flags |= OPFLAG_IS_UNCONDITIONAL_BRANCH | OPFLAG_END_SEQUENCE;
else
{
desc.regin[0] |= REGFLAG_R(RSREG);
desc.flags |= OPFLAG_IS_CONDITIONAL_BRANCH;
}
desc.targetpc = ((desc.pc + 4 + SIMMVAL * 4) & 0x00000fff) | 0x1000;
desc.delayslots = 1;
desc.skipslots = (opswitch & 0x10) ? 1 : 0;
return true;
case 0x08: // ADDI
desc.regin[0] |= REGFLAG_R(RSREG);
desc.regout[0] |= REGFLAG_R(RTREG);
return true;
case 0x09: // ADDIU
case 0x0a: // SLTI
case 0x0b: // SLTIU
case 0x0c: // ANDI
case 0x0d: // ORI
case 0x0e: // XORI
desc.regin[0] |= REGFLAG_R(RSREG);
desc.regout[0] |= REGFLAG_R(RTREG);
return true;
case 0x0f: // LUI
desc.regout[0] |= REGFLAG_R(RTREG);
return true;
case 0x20: // LB
case 0x21: // LH
case 0x23: // LW
case 0x24: // LBU
case 0x25: // LHU
case 0x27: // LWU
desc.regin[0] |= REGFLAG_R(RSREG);
desc.regout[0] |= REGFLAG_R(RTREG);
desc.flags |= OPFLAG_READS_MEMORY;
return true;
case 0x28: // SB
case 0x29: // SH
case 0x2b: // SW
desc.regin[0] |= REGFLAG_R(RSREG) | REGFLAG_R(RTREG);
desc.flags |= OPFLAG_WRITES_MEMORY;
return true;
case 0x32: // LWC2
desc.regin[0] |= REGFLAG_R(RSREG);
desc.flags |= OPFLAG_READS_MEMORY;
return true;
case 0x3a: // SWC2
desc.regin[0] |= REGFLAG_R(RSREG);
desc.flags |= OPFLAG_WRITES_MEMORY;
return true;
}
return false;
}
//-------------------------------------------------
// describe_special - build a description of a
// single instruction in the 'special' group
//-------------------------------------------------
bool rsp_device::frontend::describe_special(uint32_t op, opcode_desc &desc)
{
switch (op & 63)
{
case 0x00: // SLL
case 0x02: // SRL
case 0x03: // SRA
desc.regin[0] |= REGFLAG_R(RTREG);
desc.regout[0] |= REGFLAG_R(RDREG);
return true;
case 0x04: // SLLV
case 0x06: // SRLV
case 0x07: // SRAV
case 0x21: // ADDU
case 0x23: // SUBU
case 0x24: // AND
case 0x25: // OR
case 0x26: // XOR
case 0x27: // NOR
case 0x2a: // SLT
case 0x2b: // SLTU
desc.regin[0] |= REGFLAG_R(RSREG) | REGFLAG_R(RTREG);
desc.regout[0] |= REGFLAG_R(RDREG);
return true;
case 0x20: // ADD
case 0x22: // SUB
desc.regin[0] |= REGFLAG_R(RSREG) | REGFLAG_R(RTREG);
desc.regout[0] |= REGFLAG_R(RDREG);
return true;
case 0x08: // JR
desc.regin[0] |= REGFLAG_R(RSREG);
desc.flags |= OPFLAG_IS_UNCONDITIONAL_BRANCH | OPFLAG_END_SEQUENCE;
desc.targetpc = BRANCH_TARGET_DYNAMIC;
desc.delayslots = 1;
return true;
case 0x09: // JALR
desc.regin[0] |= REGFLAG_R(RSREG);
desc.regout[0] |= REGFLAG_R(RDREG);
desc.flags |= OPFLAG_IS_UNCONDITIONAL_BRANCH | OPFLAG_END_SEQUENCE;
desc.targetpc = BRANCH_TARGET_DYNAMIC;
desc.delayslots = 1;
return true;
case 0x0d: // BREAK
desc.flags |= OPFLAG_IS_UNCONDITIONAL_BRANCH | OPFLAG_END_SEQUENCE;
desc.targetpc = (op >> 5) & 0x000fffff;
return true;
}
return false;
}
//-------------------------------------------------
// describe_regimm - build a description of a
// single instruction in the 'regimm' group
//-------------------------------------------------
bool rsp_device::frontend::describe_regimm(uint32_t op, opcode_desc &desc)
{
switch (RTREG)
{
case 0x00: // BLTZ
case 0x01: // BGEZ
if (RTREG == 0x01 && RSREG == 0)
desc.flags |= OPFLAG_IS_UNCONDITIONAL_BRANCH | OPFLAG_END_SEQUENCE;
else
{
desc.regin[0] |= REGFLAG_R(RSREG);
desc.flags |= OPFLAG_IS_CONDITIONAL_BRANCH;
}
desc.targetpc = ((desc.pc + 4 + SIMMVAL * 4) & 0x00000fff) | 0x1000;
desc.delayslots = 1;
desc.skipslots = (RTREG & 0x02) ? 1 : 0;
return true;
case 0x10: // BLTZAL
case 0x11: // BGEZAL
if (RTREG == 0x11 && RSREG == 0)
desc.flags |= OPFLAG_IS_UNCONDITIONAL_BRANCH | OPFLAG_END_SEQUENCE;
else
{
desc.regin[0] |= REGFLAG_R(RSREG);
desc.flags |= OPFLAG_IS_CONDITIONAL_BRANCH;
}
desc.regout[0] |= REGFLAG_R(31);
desc.targetpc = ((desc.pc + 4 + SIMMVAL * 4) & 0x00000fff) | 0x1000;
desc.delayslots = 1;
desc.skipslots = (RTREG & 0x02) ? 1 : 0;
return true;
}
return false;
}
//-------------------------------------------------
// describe_cop0 - build a description of a
// single instruction in the COP0 group
//-------------------------------------------------
bool rsp_device::frontend::describe_cop0(uint32_t op, opcode_desc &desc)
{
switch (RSREG)
{
case 0x00: // MFCz
desc.regout[0] |= REGFLAG_R(RTREG);
return true;
case 0x04: // MTCz
desc.regin[0] |= REGFLAG_R(RTREG);
if(RDREG == 2) // SP_RD_LEN, initiating DMA
{
desc.flags |= OPFLAG_END_SEQUENCE;
}
return true;
}
return false;
}
//-------------------------------------------------
// describe_cop2 - build a description of a
// single instruction in the COP2 group
//-------------------------------------------------
bool rsp_device::frontend::describe_cop2(uint32_t op, opcode_desc &desc)
{
switch (RSREG)
{
case 0x00: // MFCz
case 0x02: // CFCz
desc.regout[0] |= REGFLAG_R(RTREG);
return true;
case 0x04: // MTCz
case 0x06: // CTCz
desc.regin[0] |= REGFLAG_R(RTREG);
return true;
}
return false;
}

View File

@ -1,55 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Ryan Holtz
/***************************************************************************
rspfe.h
Front-end for RSP recompiler
***************************************************************************/
#ifndef MAME_CPU_RSP_RSPFE_H
#define MAME_CPU_RSP_RSPFE_H
#pragma once
#include "rsp.h"
#include "cpu/drcfe.h"
//**************************************************************************
// CONSTANTS
//**************************************************************************
// register flags 0
#define REGFLAG_R(n) (((n) == 0) ? 0 : (1 << (n)))
//**************************************************************************
// TYPE DEFINITIONS
//**************************************************************************
class rsp_device::frontend : public drc_frontend
{
public:
// construction/destruction
frontend(rsp_device &rsp, uint32_t window_start, uint32_t window_end, uint32_t max_sequence);
protected:
// required overrides
virtual bool describe(opcode_desc &desc, const opcode_desc *prev) override;
private:
// internal helpers
bool describe_special(uint32_t op, opcode_desc &desc);
bool describe_regimm(uint32_t op, opcode_desc &desc);
bool describe_cop0(uint32_t op, opcode_desc &desc);
bool describe_cop2(uint32_t op, opcode_desc &desc);
// internal state
rsp_device &m_rsp;
};
#endif // MAME_CPU_RSP_RSPFE_H

View File

@ -1,15 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vabs(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo)
{
rsp_vec_t vs_zero = _mm_cmpeq_epi16(vs, zero);
rsp_vec_t sign_lt = _mm_srai_epi16(vs, 15);
rsp_vec_t vd = _mm_andnot_si128(vs_zero, vt);
// Careful: if VT = 0x8000 and VS is negative,
// acc_lo will be 0x8000 but vd will be 0x7FFF.
vd = _mm_xor_si128(vd, sign_lt);
*acc_lo = _mm_sub_epi16(vd, sign_lt);
return _mm_subs_epi16(vd, sign_lt);
}

View File

@ -1,16 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vadd(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t carry, rsp_vec_t *acc_lo)
{
// VCC uses unsaturated arithmetic.
rsp_vec_t vd = _mm_add_epi16(vs, vt);
*acc_lo = _mm_sub_epi16(vd, carry);
// VD is the signed sum of the two sources and the carry. Since we
// have to saturate the sum of all three, we have to be clever.
rsp_vec_t minimum = _mm_min_epi16(vs, vt);
rsp_vec_t maximum = _mm_max_epi16(vs, vt);
minimum = _mm_subs_epi16(minimum, carry);
return _mm_adds_epi16(minimum, maximum);
}

View File

@ -1,13 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vaddc(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *sn)
{
rsp_vec_t sat_sum = _mm_adds_epu16(vs, vt);
rsp_vec_t unsat_sum = _mm_add_epi16(vs, vt);
*sn = _mm_cmpeq_epi16(sat_sum, unsat_sum);
*sn = _mm_cmpeq_epi16(*sn, zero);
return unsat_sum;
}

View File

@ -1,9 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vand_vnand(uint32_t iw, rsp_vec_t vs, rsp_vec_t vt) {
rsp_vec_t vmask = _mm_load_si128((rsp_vec_t *) m_vec_helpers.logic_mask[iw & 0x1]);
rsp_vec_t vd = _mm_and_si128(vs, vt);
return _mm_xor_si128(vd, vmask);
}

View File

@ -1,57 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vch(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le, rsp_vec_t *eq, rsp_vec_t *sign, rsp_vec_t *vce) {
// sign = (vs ^ vt) < 0
*sign = _mm_xor_si128(vs, vt);
*sign = _mm_cmplt_epi16(*sign, zero);
// sign_negvt = sign ? -vt : vt
rsp_vec_t sign_negvt = _mm_xor_si128(vt, *sign);
sign_negvt = _mm_sub_epi16(sign_negvt, *sign);
// Compute diff, diff_zero:
rsp_vec_t diff = _mm_sub_epi16(vs, sign_negvt);
rsp_vec_t diff_zero = _mm_cmpeq_epi16(diff, zero);
// Compute le/ge:
rsp_vec_t vt_neg = _mm_cmplt_epi16(vt, zero);
rsp_vec_t diff_lez = _mm_cmpgt_epi16(diff, zero);
rsp_vec_t diff_gez = _mm_or_si128(diff_lez, diff_zero);
diff_lez = _mm_cmpeq_epi16(zero, diff_lez);
#if (defined(__SSE4_1__) || defined(_MSC_VER))
*ge = _mm_blendv_epi8(diff_gez, vt_neg, *sign);
*le = _mm_blendv_epi8(vt_neg, diff_lez, *sign);
#else
*ge = _mm_and_si128(*sign, vt_neg);
diff_gez = _mm_andnot_si128(*sign, diff_gez);
*ge = _mm_or_si128(*ge, diff_gez);
*le = _mm_and_si128(*sign, diff_lez);
diff_lez = _mm_andnot_si128(*sign, vt_neg);
*le = _mm_or_si128(*le, diff_lez);
#endif
// Compute vce:
*vce = _mm_cmpeq_epi16(diff, *sign);
*vce = _mm_and_si128(*vce, *sign);
// Compute !eq:
*eq = _mm_or_si128(diff_zero, *vce);
*eq = _mm_cmpeq_epi16(*eq, zero);
// Compute result:
#if (defined(__SSE4_1__) || defined(_MSC_VER))
rsp_vec_t diff_sel_mask = _mm_blendv_epi8(*ge, *le, *sign);
return _mm_blendv_epi8(vs, sign_negvt, diff_sel_mask);
#else
diff_lez = _mm_and_si128(*sign, *le);
diff_gez = _mm_andnot_si128(*sign, *ge);
rsp_vec_t diff_sel_mask = _mm_or_si128(diff_lez, diff_gez);
diff_lez = _mm_and_si128(diff_sel_mask, sign_negvt);
diff_gez = _mm_andnot_si128(diff_sel_mask, vs);
return _mm_or_si128(diff_lez, diff_gez);
#endif
}

View File

@ -1,65 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vcl(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le, rsp_vec_t eq, rsp_vec_t sign, rsp_vec_t vce)
{
// sign_negvt = sign ? -vt : vt
rsp_vec_t sign_negvt = _mm_xor_si128(vt, sign);
sign_negvt = _mm_sub_epi16(sign_negvt, sign);
// Compute diff, diff_zero, ncarry, and nvce:
// Note: diff = sign ? (vs + vt) : (vs - vt).
rsp_vec_t diff = _mm_sub_epi16(vs, sign_negvt);
rsp_vec_t ncarry = _mm_adds_epu16(vs, vt);
ncarry = _mm_cmpeq_epi16(diff, ncarry);
rsp_vec_t nvce = _mm_cmpeq_epi16(vce, zero);
rsp_vec_t diff_zero = _mm_cmpeq_epi16(diff, zero);
// Compute results for if (sign && ne):
rsp_vec_t le_case1 = _mm_and_si128(diff_zero, ncarry);
le_case1 = _mm_and_si128(nvce, le_case1);
rsp_vec_t le_case2 = _mm_or_si128(diff_zero, ncarry);
le_case2 = _mm_and_si128(vce, le_case2);
rsp_vec_t le_eq = _mm_or_si128(le_case1, le_case2);
// Compute results for if (!sign && ne):
rsp_vec_t ge_eq = _mm_subs_epu16(vt, vs);
ge_eq = _mm_cmpeq_epi16(ge_eq, zero);
// Blend everything together. Caveat: we don't update
// the results of ge/le if ne is false, so be careful.
rsp_vec_t do_le = _mm_andnot_si128(eq, sign);
#if (defined(__SSE4_1__) || defined(_MSC_VER))
*le = _mm_blendv_epi8(*le, le_eq, do_le);
#else
le_eq = _mm_and_si128(do_le, le_eq);
*le = _mm_andnot_si128(do_le, *le);
*le = _mm_or_si128(le_eq, *le);
#endif
rsp_vec_t do_ge = _mm_or_si128(sign, eq);
#if (defined(__SSE4_1__) || defined(_MSC_VER))
*ge = _mm_blendv_epi8(ge_eq, *ge, do_ge);
#else
*ge = _mm_and_si128(do_ge, *ge);
ge_eq = _mm_andnot_si128(do_ge, ge_eq);
*ge = _mm_or_si128(ge_eq, *ge);
#endif
// Mux the result based on the value of sign.
#if (defined(__SSE4_1__) || defined(_MSC_VER))
rsp_vec_t mux_mask = _mm_blendv_epi8(*ge, *le, sign);
#else
do_le = _mm_and_si128(sign, *le);
do_ge = _mm_andnot_si128(sign, *ge);
rsp_vec_t mux_mask = _mm_or_si128(do_le, do_ge);
#endif
#if (defined(__SSE4_1__) || defined(_MSC_VER))
return _mm_blendv_epi8(vs, sign_negvt, mux_mask);
#else
sign_negvt = _mm_and_si128(mux_mask, sign_negvt);
vs = _mm_andnot_si128(mux_mask, vs);
return _mm_or_si128(sign_negvt, vs);
#endif
}

View File

@ -1,49 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_veq_vge_vlt_vne(uint32_t iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *le, rsp_vec_t eq, rsp_vec_t sign)
{
rsp_vec_t equal = _mm_cmpeq_epi16(vs, vt);
if (iw & 0x2) // VNE & VGE
{
if (iw & 0x1) // VGE
{
rsp_vec_t gt = _mm_cmpgt_epi16(vs, vt);
rsp_vec_t equalsign = _mm_and_si128(eq, sign);
equal = _mm_andnot_si128(equalsign, equal);
*le = _mm_or_si128(gt, equal);
}
else // VNE
{
rsp_vec_t nequal = _mm_cmpeq_epi16(equal, zero);
*le = _mm_and_si128(eq, equal);
*le = _mm_or_si128(*le, nequal);
}
}
else // VEQ & VLT
{
if (iw & 0x1) // VEQ
{
*le = _mm_andnot_si128(eq, equal);
}
else // VLT
{
rsp_vec_t lt = _mm_cmplt_epi16(vs, vt);
equal = _mm_and_si128(eq, equal);
equal = _mm_and_si128(sign, equal);
*le = _mm_or_si128(lt, equal);
}
}
#if (defined(__SSE4_1__) || defined(_MSC_VER))
return _mm_blendv_epi8(vt, vs, *le);
#else
vs = _mm_and_si128(*le, vs);
vt = _mm_andnot_si128(*le, vt);
return _mm_or_si128(vs, vt);
#endif
}

View File

@ -1,35 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vcr(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *ge, rsp_vec_t *le) {
// sign = (vs ^ vt) < 0
rsp_vec_t sign = _mm_xor_si128(vs, vt);
sign = _mm_srai_epi16(sign, 15);
// Compute le
rsp_vec_t diff_lez = _mm_and_si128(vs, sign);
diff_lez = _mm_add_epi16(diff_lez, vt);
*le = _mm_srai_epi16(diff_lez, 15);
// Compute ge
rsp_vec_t diff_gez = _mm_or_si128(vs, sign);
diff_gez = _mm_min_epi16(diff_gez, vt);
*ge = _mm_cmpeq_epi16(diff_gez, vt);
// sign_notvt = sn ? ~vt : vt
rsp_vec_t sign_notvt = _mm_xor_si128(vt, sign);
// Compute result:
#if (defined(__SSE4_1__) || defined(_MSC_VER))
rsp_vec_t diff_sel_mask = _mm_blendv_epi8(*ge, *le, sign);
return _mm_blendv_epi8(vs, sign_notvt, diff_sel_mask);
#else
rsp_vec_t diff_sel_mask = _mm_sub_epi16(*le, *ge);
diff_sel_mask = _mm_and_si128(diff_sel_mask, sign);
diff_sel_mask = _mm_add_epi16(diff_sel_mask, *ge);
zero = _mm_sub_epi16(sign_notvt, vs);
zero = _mm_and_si128(zero, diff_sel_mask);
return _mm_add_epi16(zero, vs);
#endif
}

View File

@ -1,12 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vdivh(uint32_t src, uint32_t e, uint32_t dest, uint32_t de)
{
// Get the element from VT.
m_div_in = m_v[src].s[e & 0x7];
// Write out the upper part of the result.
m_v[dest].s[de & 0x7] = m_div_out;
return vec_load_unshuffled_operand(m_v[dest].s);
}

View File

@ -1,69 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
// LBV, LDV, LLV, LSV, SBV, SDV, SLV, SSV
inline void vec_lbdlsv_sbdlsv(uint32_t iw, uint32_t rs)
{
const uint32_t shift_and_idx = (iw >> 11) & 0x3;
rsp_vec_t dqm = _mm_loadl_epi64((rsp_vec_t *) (m_vec_helpers.bdls_lut[shift_and_idx]));
const uint32_t addr = (rs + (sign_extend_6(iw) << shift_and_idx)) & 0xfff;
const uint32_t element = (iw >> 7) & 0xf;
uint16_t* regp = m_v[(iw >> 16) & 0x1f].s;
if (iw >> 29 & 0x1)
{
vec_store_group1(addr, element, regp, vec_load_unshuffled_operand(regp), dqm);
}
else
{
vec_load_group1(addr, element, regp, vec_load_unshuffled_operand(regp), dqm);
}
}
// LPV, LUV, SPV, SUV
inline void vec_lfhpuv_sfhpuv(uint32_t iw, uint32_t rs)
{
static const enum rsp_mem_request_type fhpu_type_lut[4] = {
RSP_MEM_REQUEST_PACK,
RSP_MEM_REQUEST_UPACK,
RSP_MEM_REQUEST_HALF,
RSP_MEM_REQUEST_FOURTH
};
const uint32_t addr = (rs + (sign_extend_6(iw) << 3)) & 0xfff;
const uint32_t element = (iw >> 7) & 0xf;
uint16_t* regp = m_v[(iw >> 16) & 0x1f].s;
rsp_mem_request_type request_type = fhpu_type_lut[((iw >> 11) & 0x1f) - 6];
if ((iw >> 29) & 0x1)
{
vec_store_group2(addr, element, regp, vec_load_unshuffled_operand(regp), _mm_setzero_si128(), request_type);
}
else
{
vec_load_group2(addr, element, regp, vec_load_unshuffled_operand(regp), _mm_setzero_si128(), request_type);
}
}
// LQV, LRV, SQV, SRV
inline void vec_lqrv_sqrv(uint32_t iw, uint32_t rs)
{
rs &= 0xfff;
const uint32_t addr = rs + (sign_extend_6(iw) << 4);
const uint32_t element = (iw >> 7) & 0xf;
uint16_t* regp = m_v[(iw >> 16) & 0x1f].s;
memcpy(m_vdqm.s, m_vec_helpers.qr_lut[addr & 0xf], sizeof(m_vdqm.s));
rsp_mem_request_type request_type = (iw >> 11 & 0x1) ? RSP_MEM_REQUEST_REST : RSP_MEM_REQUEST_QUAD;
if ((iw >> 29) & 0x1)
{
vec_store_group4(addr, element, regp, vec_load_unshuffled_operand(regp), vec_load_unshuffled_operand(m_vdqm.s), request_type);
}
else
{
vec_load_group4(addr, element, regp, vec_load_unshuffled_operand(regp), vec_load_unshuffled_operand(m_vdqm.s), request_type);
}
}

View File

@ -1,57 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vmacf_vmacu(uint32_t iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_mid, rsp_vec_t *acc_hi)
{
// Get the product and shift it over
// being sure to save the carries.
rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
rsp_vec_t hi = _mm_mulhi_epi16(vs, vt);
rsp_vec_t mid = _mm_slli_epi16(hi, 1);
rsp_vec_t carry = _mm_srli_epi16(lo, 15);
hi = _mm_srai_epi16(hi, 15);
mid = _mm_or_si128(mid, carry);
lo = _mm_slli_epi16(lo, 1);
// Tricky part: start accumulating everything.
// Get/keep the carry as we'll add it in later.
rsp_vec_t overflow_mask = _mm_adds_epu16(*acc_lo, lo);
*acc_lo = _mm_add_epi16(*acc_lo, lo);
overflow_mask = _mm_cmpeq_epi16(*acc_lo, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
// Add in the carry. If the middle portion is
// already 0xFFFF and we have a carry, we have
// to carry the all the way up to hi.
mid = _mm_sub_epi16(mid, overflow_mask);
carry = _mm_cmpeq_epi16(mid, zero);
carry = _mm_and_si128(carry, overflow_mask);
hi = _mm_sub_epi16(hi, carry);
// Accumulate the middle portion.
overflow_mask = _mm_adds_epu16(*acc_mid, mid);
*acc_mid = _mm_add_epi16(*acc_mid, mid);
overflow_mask = _mm_cmpeq_epi16(*acc_mid, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
// Finish up the accumulation of the... accumulator.
*acc_hi = _mm_add_epi16(*acc_hi, hi);
*acc_hi = _mm_sub_epi16(*acc_hi, overflow_mask);
if (iw & 0x1) // VMACU
{
rsp_vec_t overflow_hi_mask = _mm_srai_epi16(*acc_hi, 15);
rsp_vec_t overflow_mid_mask = _mm_srai_epi16(*acc_mid, 15);
mid = _mm_or_si128(overflow_mid_mask, *acc_mid);
overflow_mask = _mm_cmpgt_epi16(*acc_hi, zero);
mid = _mm_andnot_si128(overflow_hi_mask, mid);
return _mm_or_si128(overflow_mask, mid);
}
else // VMACF
{
return sclamp_acc_to_mid(*acc_mid, *acc_hi);
}
}

View File

@ -1,9 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vmov(uint32_t src, uint32_t e, uint32_t dest, uint32_t de)
{
// Get the element from VT and write out the upper part of the result.
m_v[dest].s[de & 0x7] = m_v[src].s[e & 0x7];
return vec_load_unshuffled_operand(m_v[dest].s);
}

View File

@ -1,13 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vmrg(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t le)
{
#if (defined(__SSE4_1__) || defined(_MSC_VER))
return _mm_blendv_epi8(vt, vs, le);
#else
vs = _mm_and_si128(le, vs);
vt = _mm_andnot_si128(le, vt);
return _mm_or_si128(vs, vt);
#endif
}

View File

@ -1,10 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t rsp_vmudh(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
{
*acc_md = _mm_mullo_epi16(vs, vt);
*acc_hi = _mm_mulhi_epi16(vs, vt);
return sclamp_acc_to_mid(*acc_md, *acc_hi);
}

View File

@ -1,39 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
//
// TODO: CHECK ME.
//
inline rsp_vec_t vec_vmulf_vmulu(uint32_t iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
{
rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
rsp_vec_t round = _mm_cmpeq_epi16(zero, zero);
rsp_vec_t sign1 = _mm_srli_epi16(lo, 15);
lo = _mm_add_epi16(lo, lo);
round = _mm_slli_epi16(round, 15);
rsp_vec_t hi = _mm_mulhi_epi16(vs, vt);
rsp_vec_t sign2 = _mm_srli_epi16(lo, 15);
*acc_lo = _mm_add_epi16(round, lo);
sign1 = _mm_add_epi16(sign1, sign2);
hi = _mm_slli_epi16(hi, 1);
rsp_vec_t eq = _mm_cmpeq_epi16(vs, vt);
rsp_vec_t neq = eq;
*acc_md = _mm_add_epi16(hi, sign1);
rsp_vec_t neg = _mm_srai_epi16(*acc_md, 15);
if (iw & 0x1) // VMULU
{
*acc_hi = _mm_andnot_si128(eq, neg);
hi =_mm_or_si128(*acc_md, neg);
return _mm_andnot_si128(*acc_hi, hi);
}
else // VMULF
{
eq = _mm_and_si128(eq, neg);
*acc_hi = _mm_andnot_si128(neq, neg);
return _mm_add_epi16(*acc_md, eq);
}
}

View File

@ -1,30 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vmadh_vmudh(uint32_t iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
{
rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
rsp_vec_t hi = _mm_mulhi_epi16(vs, vt);
if (iw & 0x8) // VMADH
{
// Tricky part: start accumulating everything.
// Get/keep the carry as we'll add it in later.
rsp_vec_t overflow_mask = _mm_adds_epu16(*acc_md, lo);
*acc_md = _mm_add_epi16(*acc_md, lo);
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
hi = _mm_sub_epi16(hi, overflow_mask);
*acc_hi = _mm_add_epi16(*acc_hi, hi);
}
else // VMUDH
{
*acc_lo = zero;
*acc_md = lo;
*acc_hi = hi;
}
return sclamp_acc_to_mid(*acc_md, *acc_hi);
}

View File

@ -1,44 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vmadl_vmudl(uint32_t iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
{
rsp_vec_t hi = _mm_mulhi_epu16(vs, vt);
if (iw & 0x8) // VMADL
{
// Tricky part: start accumulating everything.
// Get/keep the carry as we'll add it in later.
rsp_vec_t overflow_mask = _mm_adds_epu16(*acc_lo, hi);
*acc_lo = _mm_add_epi16(*acc_lo, hi);
overflow_mask = _mm_cmpeq_epi16(*acc_lo, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
hi = _mm_sub_epi16(zero, overflow_mask);
// Check for overflow of the upper sum.
//
// TODO: Since hi can only be {0,1}, we should
// be able to generalize this for performance.
overflow_mask = _mm_adds_epu16(*acc_md, hi);
*acc_md = _mm_add_epi16(*acc_md, hi);
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
// Finish up the accumulation of the... accumulator.
// Since the product was unsigned, only worry about
// positive overflow (i.e.: borrowing not possible).
*acc_hi = _mm_sub_epi16(*acc_hi, overflow_mask);
return uclamp_acc(*acc_lo, *acc_md, *acc_hi, zero);
}
else // VMUDL
{
*acc_lo = hi;
*acc_md = zero;
*acc_hi = zero;
return hi;
}
}

View File

@ -1,56 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vmadm_vmudm(uint32_t iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
{
rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
rsp_vec_t hi = _mm_mulhi_epu16(vs, vt);
// What we really want to do is unsigned vs * signed vt.
// However, we have no such instructions to do so.
//
// There's a trick to "fix" an unsigned product, though:
// If vt was negative, take the upper 16-bits of the product
// and subtract vs.
rsp_vec_t sign = _mm_srai_epi16(vs, 15);
vt = _mm_and_si128(vt, sign);
hi = _mm_sub_epi16(hi, vt);
if (iw & 0x8) // VMADM
{
// Tricky part: start accumulating everything.
// Get/keep the carry as we'll add it in later.
rsp_vec_t overflow_mask = _mm_adds_epu16(*acc_lo, lo);
*acc_lo = _mm_add_epi16(*acc_lo, lo);
overflow_mask = _mm_cmpeq_epi16(*acc_lo, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
// This is REALLY clever. Since the product results from
// two 16-bit components, one positive and one negative,
// we don't have to worry about carrying the 1 (we can
// only borrow) past 32-bits. So we can just add it here.
hi = _mm_sub_epi16(hi, overflow_mask);
// Check for overflow of the upper sum.
overflow_mask = _mm_adds_epu16(*acc_md, hi);
*acc_md = _mm_add_epi16(*acc_md, hi);
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
// Finish up the accumulation of the... accumulator.
*acc_hi = _mm_add_epi16(*acc_hi, _mm_srai_epi16(hi, 15));
*acc_hi = _mm_sub_epi16(*acc_hi, overflow_mask);
return sclamp_acc_to_mid(*acc_md, *acc_hi);
}
else // VMUDM
{
*acc_lo = lo;
*acc_md = hi;
*acc_hi = _mm_srai_epi16(hi, 15);
return hi;
}
}

View File

@ -1,55 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vmadn_vmudn(uint32_t iw, rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *acc_lo, rsp_vec_t *acc_md, rsp_vec_t *acc_hi)
{
rsp_vec_t lo = _mm_mullo_epi16(vs, vt);
rsp_vec_t hi = _mm_mulhi_epu16(vs, vt);
// What we really want to do is unsigned vs * signed vt.
// However, we have no such instructions to do so.
//
// There's a trick to "fix" an unsigned product, though:
// If vt was negative, take the upper 16-bits of the product
// and subtract vs.
rsp_vec_t sign = _mm_srai_epi16(vt, 15);
vs = _mm_and_si128(vs, sign);
hi = _mm_sub_epi16(hi, vs);
if (iw & 0x8) // VMADN
{
// Tricky part: start accumulating everything.
// Get/keep the carry as we'll add it in later.
rsp_vec_t overflow_mask = _mm_adds_epu16(*acc_lo, lo);
*acc_lo = _mm_add_epi16(*acc_lo, lo);
overflow_mask = _mm_cmpeq_epi16(*acc_lo, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
// This is REALLY clever. Since the product results from
// two 16-bit components, one positive and one negative,
// we don't have to worry about carrying the 1 (we can
// only borrow) past 32-bits. So we can just add it here.
hi = _mm_sub_epi16(hi, overflow_mask);
// Check for overflow of the upper sum.
overflow_mask = _mm_adds_epu16(*acc_md, hi);
*acc_md = _mm_add_epi16(*acc_md, hi);
overflow_mask = _mm_cmpeq_epi16(*acc_md, overflow_mask);
overflow_mask = _mm_cmpeq_epi16(overflow_mask, zero);
// Finish up the accumulation of the... accumulator.
*acc_hi = _mm_add_epi16(*acc_hi, _mm_srai_epi16(hi, 15));
*acc_hi = _mm_sub_epi16(*acc_hi, overflow_mask);
return uclamp_acc(*acc_lo, *acc_md, *acc_hi, zero);
}
else // VMUDN
{
*acc_lo = lo;
*acc_md = hi;
*acc_hi = _mm_srai_epi16(hi, 15);
return lo;
}
}

View File

@ -1,10 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vor_vnor(uint32_t iw, rsp_vec_t vs, rsp_vec_t vt)
{
rsp_vec_t vmask = _mm_load_si128((rsp_vec_t *) m_vec_helpers.logic_mask[iw & 0x1]);
rsp_vec_t vd = _mm_or_si128(vs, vt);
return _mm_xor_si128(vd, vmask);
}

View File

@ -1,58 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vrcp_vrsq(uint32_t iw, int32_t dp, uint32_t src, uint32_t e, uint32_t dest, uint32_t de)
{
// Get the element from VT.
int16_t vt = m_v[src].s[e & 0x7];
uint32_t dp_input = ((uint32_t) m_div_in << 16) | (uint16_t) vt;
uint32_t sp_input = vt;
int32_t input = (dp) ? dp_input : sp_input;
int32_t input_mask = input >> 31;
int32_t data = input ^ input_mask;
if (input > -32768)
{
data -= input_mask;
}
// Handle edge cases.
int32_t result;
if (data == 0)
{
result = 0x7fffffff;
}
else if (input == -32768)
{
result = 0xffff0000;
}
else // Main case: compute the reciprocal.
{
uint32_t shift = count_leading_zeros(data);
uint32_t idx = (((uint64_t) data << shift) & 0x7FC00000) >> 22;
if (iw & 0x4) // VRSQ
{
idx = ((idx | 0x200) & 0x3fe) | (shift % 2);
result = rsp_divtable[idx];
result = ((0x10000 | result) << 14) >> ((31 - shift) >> 1);
}
else // VRCP
{
result = rsp_divtable[idx];
result = ((0x10000 | result) << 14) >> (31 - shift);
}
result = result ^ input_mask;
}
// Write out the results.
m_div_out = result >> 16;
m_v[dest].s[de & 0x7] = result;
return vec_load_unshuffled_operand(m_v[dest].s);
}

View File

@ -1,66 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
rsp_vec_t vec_vrsq(int32_t dp, uint32_t src, uint32_t e, uint32_t dest, uint32_t de)
{
// Get the element from VT.
int16_t vt = m_v[src].s[e & 0x7];
uint32_t dp_input = ((uint32_t) m_div_in << 16) | (uint16_t) vt;
uint32_t sp_input = vt;
int32_t input = (dp) ? dp_input : sp_input;
int32_t input_mask = input >> 31;
int32_t data = input ^ input_mask;
if (input > -32768)
{
data -= input_mask;
}
// Handle edge cases.
int32_t result;
if (data == 0)
{
result = 0x7fffffff;
}
else if (input == -32768)
{
result = 0xffff0000;
}
else // Main case: compute the reciprocal.
{
uint32_t shift = count_leading_zeros(data);
uint32_t idx = (((uint64_t) data << shift) & 0x7fc00000) >> 22;
idx = ((idx | 0x200) & 0x3fe) | (shift % 2);
result = rsp_divtable[idx];
result = ((0x10000 | result) << 14) >> ((31 - shift) >> 1);
result = result ^ input_mask;
}
// Write out the results.
m_div_out = result >> 16;
m_v[dest].s[de & 0x7] = result;
return vec_load_unshuffled_operand(m_v[dest].s);
}
rsp_vec_t vec_vrsqh(uint32_t src, uint32_t e, uint32_t dest, uint32_t de)
{
int16_t elements[8];
// Get the element from VT.
memcpy(elements, &m_v[src], sizeof(rsp_vec_t));
m_div_in = elements[e];
// Write out the upper part of the result.
rsp_vec_t vd_mask = _mm_load_si128((rsp_vec_t *) m_vec_helpers.vrsq_mask_table[de]);
rsp_vec_t vd = _mm_load_si128((rsp_vec_t *) &m_v[dest]);
vd = _mm_andnot_si128(vd_mask, vd);
rsp_vec_t b_result = _mm_set1_epi16(m_div_out);
b_result = _mm_and_si128(vd_mask, b_result);
return _mm_or_si128(b_result, vd);
}

View File

@ -1,17 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vsub(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t carry, rsp_vec_t *acc_lo)
{
// acc_lo uses saturated arithmetic.
rsp_vec_t unsat_diff = _mm_sub_epi16(vt, carry);
rsp_vec_t sat_diff = _mm_subs_epi16(vt, carry);
*acc_lo = _mm_sub_epi16(vs, unsat_diff);
rsp_vec_t vd = _mm_subs_epi16(vs, sat_diff);
// VD is the signed diff of the two sources and the carry. Since we
// have to saturate the diff of all three, we have to be clever.
rsp_vec_t overflow = _mm_cmpgt_epi16(sat_diff, unsat_diff);
return _mm_adds_epi16(vd, overflow);
}

View File

@ -1,14 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vsubc(rsp_vec_t vs, rsp_vec_t vt, rsp_vec_t zero, rsp_vec_t *eq, rsp_vec_t *sn)
{
rsp_vec_t sat_udiff = _mm_subs_epu16(vs, vt);
rsp_vec_t equal = _mm_cmpeq_epi16(vs, vt);
rsp_vec_t sat_udiff_zero = _mm_cmpeq_epi16(sat_udiff, zero);
*eq = _mm_cmpeq_epi16(equal, zero);
*sn = _mm_andnot_si128(equal, sat_udiff_zero);
return _mm_sub_epi16(vs, vt);
}

View File

@ -1,10 +0,0 @@
// license:BSD-3-Clause
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
inline rsp_vec_t vec_vxor_vnxor(uint32_t iw, rsp_vec_t vs, rsp_vec_t vt)
{
rsp_vec_t vmask = _mm_load_si128((rsp_vec_t *) m_vec_helpers.logic_mask[iw & 0x1]);
rsp_vec_t vd = _mm_xor_si128(vs, vt);
return _mm_xor_si128(vd, vmask);
}

View File

@ -208,7 +208,8 @@ private:
void e90_map(address_map &map);
void n64_map(address_map &map);
void rsp_map(address_map &map);
void rsp_imem_map(address_map &map);
void rsp_dmem_map(address_map &map);
optional_shared_ptr<uint32_t> m_e90_vram;
optional_shared_ptr<uint32_t> m_e90_pal;
@ -380,12 +381,14 @@ void aleck64_state::e90_map(address_map &map)
map(0xd0030000, 0xd003001f).rw(FUNC(aleck64_state::e90_prot_r), FUNC(aleck64_state::e90_prot_w));
}
void aleck64_state::rsp_map(address_map &map)
void aleck64_state::rsp_imem_map(address_map &map)
{
map(0x00000000, 0x00000fff).ram().share("rsp_imem");
}
void aleck64_state::rsp_dmem_map(address_map &map)
{
map(0x00000000, 0x00000fff).ram().share("rsp_dmem");
map(0x00001000, 0x00001fff).ram().share("rsp_imem");
map(0x04000000, 0x04000fff).ram().share("rsp_dmem");
map(0x04001000, 0x04001fff).ram().share("rsp_imem");
}
static INPUT_PORTS_START( aleck64 )
@ -1044,8 +1047,8 @@ void aleck64_state::aleck64(machine_config &config)
m_rsp->sp_reg_r().set(m_rcp_periphs, FUNC(n64_periphs::sp_reg_r));
m_rsp->sp_reg_w().set(m_rcp_periphs, FUNC(n64_periphs::sp_reg_w));
m_rsp->status_set().set(m_rcp_periphs, FUNC(n64_periphs::sp_set_status));
m_rsp->set_addrmap(AS_PROGRAM, &aleck64_state::rsp_map);
m_rsp->set_force_no_drc(true);
m_rsp->set_addrmap(AS_PROGRAM, &aleck64_state::rsp_imem_map);
m_rsp->set_addrmap(AS_DATA, &aleck64_state::rsp_dmem_map);
screen_device &screen(SCREEN(config, "screen", SCREEN_TYPE_RASTER));
screen.set_refresh_hz(60);

View File

@ -45,7 +45,8 @@ private:
DECLARE_DEVICE_IMAGE_UNLOAD_MEMBER(unload_n64dd);
void n64_map(address_map &map);
void n64dd_map(address_map &map);
void rsp_map(address_map &map);
void rsp_imem_map(address_map &map);
void rsp_dmem_map(address_map &map);
};
uint32_t n64_mess_state::dd_null_r()
@ -96,12 +97,14 @@ void n64_mess_state::n64dd_map(address_map &map)
map(0x1fc007c0, 0x1fc007ff).rw("rcp", FUNC(n64_periphs::pif_ram_r), FUNC(n64_periphs::pif_ram_w));
}
void n64_mess_state::rsp_map(address_map &map)
void n64_mess_state::rsp_imem_map(address_map &map)
{
map(0x00000000, 0x00000fff).ram().share("rsp_imem");
}
void n64_mess_state::rsp_dmem_map(address_map &map)
{
map(0x00000000, 0x00000fff).ram().share("rsp_dmem");
map(0x00001000, 0x00001fff).ram().share("rsp_imem");
map(0x04000000, 0x04000fff).ram().share("rsp_dmem");
map(0x04001000, 0x04001fff).ram().share("rsp_imem");
}
static INPUT_PORTS_START( n64 )
@ -445,7 +448,8 @@ void n64_mess_state::n64(machine_config &config)
m_rsp->sp_reg_r().set(m_rcp_periphs, FUNC(n64_periphs::sp_reg_r));
m_rsp->sp_reg_w().set(m_rcp_periphs, FUNC(n64_periphs::sp_reg_w));
m_rsp->status_set().set(m_rcp_periphs, FUNC(n64_periphs::sp_set_status));
m_rsp->set_addrmap(AS_PROGRAM, &n64_mess_state::rsp_map);
m_rsp->set_addrmap(AS_PROGRAM, &n64_mess_state::rsp_imem_map);
m_rsp->set_addrmap(AS_DATA, &n64_mess_state::rsp_dmem_map);
config.set_maximum_quantum(attotime::from_hz(500000));

View File

@ -42,8 +42,6 @@ public:
n64_rdp* rdp() { return m_rdp.get(); }
uint32_t* rdram() { return m_rdram; }
uint32_t* sram() { return m_sram; }
uint32_t* rsp_imem() { return m_rsp_imem; }
uint32_t* rsp_dmem() { return m_rsp_dmem; }
protected:
required_device<mips3_device> m_vr4300;
@ -174,11 +172,11 @@ private:
address_space *m_mem_map;
required_device<mips3_device> m_vr4300;
required_device<rsp_device> m_rsp;
required_shared_ptr<uint32_t> m_rsp_imem;
required_shared_ptr<uint32_t> m_rsp_dmem;
uint32_t *m_rdram;
uint32_t *m_sram;
uint32_t *m_rsp_imem;
uint32_t *m_rsp_dmem;
void clear_rcp_interrupt(int interrupt);
@ -228,6 +226,8 @@ private:
uint32_t sp_mem_addr;
uint32_t sp_dram_addr;
uint32_t sp_mem_addr_start;
uint32_t sp_dram_addr_start;
int sp_dma_length;
int sp_dma_count;
int sp_dma_skip;

View File

@ -22,6 +22,8 @@ n64_periphs::n64_periphs(const machine_config &mconfig, const char *tag, device_
, cart_present(false)
, m_vr4300(*this, "^maincpu")
, m_rsp(*this, "^rsp")
, m_rsp_imem(*this, "^rsp_imem")
, m_rsp_dmem(*this, "^rsp_dmem")
, ai_dac(*this, "^dac%u", 1U)
{
for (int32_t i = 0; i < 256; i++)
@ -130,8 +132,6 @@ void n64_periphs::device_reset()
m_mem_map = &m_vr4300->space(AS_PROGRAM);
m_rdram = m_n64->rdram();
m_rsp_imem = m_n64->rsp_imem();
m_rsp_dmem = m_n64->rsp_dmem();
m_sram = m_n64->sram();
mi_version = 0x01010101;
@ -141,6 +141,8 @@ void n64_periphs::device_reset()
sp_mem_addr = 0;
sp_dram_addr = 0;
sp_mem_addr_start = 0;
sp_dram_addr_start = 0;
sp_dma_length = 0;
sp_dma_count = 0;
sp_dma_skip = 0;
@ -546,6 +548,9 @@ void n64_periphs::rdram_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask)
void n64_periphs::sp_dma(int direction)
{
sp_mem_addr = sp_mem_addr_start;
sp_dram_addr = sp_dram_addr_start;
uint32_t length = sp_dma_length + 1;
if ((length & 7) != 0)
@ -571,17 +576,16 @@ void n64_periphs::sp_dma(int direction)
int sp_mem_page = (sp_mem_addr >> 12) & 1;
if(sp_mem_page == 1)
m_rsp->rspdrc_flush_drc_cache();
//printf("DMA %s RSP: mem %08x, dram %08x, sp_dma_length: %03x, length: %04x, count: %02x, skip: %03x\n", direction ? "from" : "to", sp_mem_addr, sp_dram_addr, sp_dma_length, length, sp_dma_count, sp_dma_skip);
if(direction == 0)// RDRAM -> I/DMEM
if (direction == 0)// RDRAM -> I/DMEM
{
for(int c = 0; c <= sp_dma_count; c++)
for (int c = 0; c <= sp_dma_count; c++)
{
uint32_t src = (sp_dram_addr & 0x007fffff) >> 2;
uint32_t src = sp_dram_addr >> 2;
uint32_t dst = (sp_mem_addr & 0xfff) >> 2;
for(int i = 0; i < length / 4; i++)
for (int i = 0; i < length / 4; i++)
{
sp_mem[sp_mem_page][(dst + i) & 0x3ff] = m_rdram[src + i];
}
@ -589,17 +593,18 @@ void n64_periphs::sp_dma(int direction)
sp_mem_addr += length;
sp_dram_addr += length;
sp_dram_addr += sp_dma_skip;
if (c != sp_dma_count)
sp_dram_addr += sp_dma_skip;
}
}
else // I/DMEM -> RDRAM
{
for(int c = 0; c <= sp_dma_count; c++)
for (int c = 0; c <= sp_dma_count; c++)
{
uint32_t src = (sp_mem_addr & 0xfff) >> 2;
uint32_t dst = (sp_dram_addr & 0x007fffff) >> 2;
uint32_t dst = sp_dram_addr >> 2;
for(int i = 0; i < length / 4; i++)
for (int i = 0; i < length / 4; i++)
{
m_rdram[dst + i] = sp_mem[sp_mem_page][(src + i) & 0x3ff];
}
@ -607,9 +612,13 @@ void n64_periphs::sp_dma(int direction)
sp_mem_addr += length;
sp_dram_addr += length;
sp_dram_addr += sp_dma_skip;
if (c != sp_dma_count)
sp_dram_addr += sp_dma_skip;
}
}
sp_dma_count = 0;
sp_dma_length = 0xff8;
}
void n64_periphs::sp_set_status(uint32_t data)
@ -642,6 +651,7 @@ uint32_t n64_periphs::sp_reg_r(offs_t offset)
return sp_dram_addr;
case 0x08/4: // SP_RD_LEN_REG
case 0x0c/4: // SP_WR_LEN_REG
return (sp_dma_skip << 20) | (sp_dma_count << 12) | sp_dma_length;
case 0x10/4: // SP_STATUS_REG
@ -710,24 +720,24 @@ void n64_periphs::sp_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask)
switch (offset & 0xffff)
{
case 0x00/4: // SP_MEM_ADDR_REG
sp_mem_addr = data;
sp_mem_addr_start = data & 0x00001fff;
break;
case 0x04/4: // SP_DRAM_ADDR_REG
sp_dram_addr = data & 0xffffff;
sp_dram_addr_start = data & 0x007fffff;
break;
case 0x08/4: // SP_RD_LEN_REG
sp_dma_length = data & 0xfff;
sp_dma_count = (data >> 12) & 0xff;
sp_dma_skip = (data >> 20) & 0xfff;
sp_dma_skip = (data >> 20) & 0xff8;
sp_dma(0);
break;
case 0x0c/4: // SP_WR_LEN_REG
sp_dma_length = data & 0xfff;
sp_dma_count = (data >> 12) & 0xff;
sp_dma_skip = (data >> 20) & 0xfff;
sp_dma_skip = (data >> 20) & 0xff8;
sp_dma(1);
break;
@ -864,13 +874,13 @@ void n64_periphs::sp_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask)
switch (offset & 0xffff)
{
case 0x00/4: // SP_PC_REG
if( m_rsp->state_int(RSP_NEXTPC) != 0xffffffff )
if (m_rsp->state_int(RSP_NEXTPC) != 0xffff)
{
m_rsp->set_state_int(RSP_NEXTPC, 0x1000 | (data & 0xfff));
m_rsp->set_state_int(RSP_NEXTPC, data & 0xfff);
}
else
{
m_rsp->set_state_int(RSP_PC, 0x1000 | (data & 0xfff));
m_rsp->set_state_int(RSP_PC, data & 0xfff);
}
break;
@ -998,8 +1008,10 @@ void n64_periphs::vi_recalculate_resolution()
int x_end = vi_hstart & 0x000003ff;
int y_start = ((vi_vstart & 0x03ff0000) >> 16) >> 1;
int y_end = (vi_vstart & 0x000003ff) >> 1;
int width = x_end - x_start;
int height = y_end - y_start;
const float hcoeff = ((float)(vi_xscale & 0xfff) / (1 << 10));
const float vcoeff = ((float)(vi_yscale & 0xfff) / (1 << 10));
int width = (x_end - x_start) * hcoeff;
int height = (y_end - y_start) * vcoeff;
rectangle visarea = screen().visible_area();
// DACRATE is the quarter pixel clock and period will be for a field, not a frame
@ -1494,6 +1506,14 @@ uint32_t n64_periphs::pi_reg_r(offs_t offset, uint32_t mem_mask)
ret = pi_cart_addr;
break;
case 0x08/4: // PI_RD_LEN_REG
ret = pi_rd_len;
break;
case 0x0c/4: // PI_WR_LEN_REG
ret = pi_wr_len;
break;
case 0x10/4: // PI_STATUS_REG
ret = pi_status;
break;
@ -1543,13 +1563,10 @@ void n64_periphs::pi_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask)
switch (offset)
{
case 0x00/4: // PI_DRAM_ADDR_REG
{
pi_dram_addr = data;
break;
}
case 0x04/4: // PI_CART_ADDR_REG
{
pi_cart_addr = data;
if(pi_cart_addr == 0x05000400 && dd_present)
{
@ -1566,15 +1583,15 @@ void n64_periphs::pi_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask)
m_vr4300->set_input_line(INPUT_LINE_IRQ1, CLEAR_LINE);
}
break;
}
case 0x08/4: // PI_RD_LEN_REG
{
//printf("pi_rd_len_reg: %08x\n", data);
//logerror("Start PI Read\n");
pi_rd_len = data;
pi_dma_dir = 0;
pi_status |= 1;
//pi_dma_tick();
attotime dma_period = attotime::from_hz(93750000) * (int)((float)(pi_rd_len + 1) * 5.08f); // Measured as between 2.53 cycles per byte and 2.55 cycles per byte
pi_dma_timer->adjust(dma_period);
break;
@ -1582,11 +1599,12 @@ void n64_periphs::pi_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask)
case 0x0c/4: // PI_WR_LEN_REG
{
//printf("pi_wr_len_reg: %08x\n", data);
//logerror("Start PI Write\n");
pi_wr_len = data;
pi_dma_dir = 1;
pi_status |= 1;
//pi_dma_tick();
attotime dma_period = attotime::from_hz(93750000) * (int)((float)(pi_wr_len + 1) * 5.08f); // Measured as between 2.53 cycles per byte and 2.55 cycles per byte
pi_dma_timer->adjust(dma_period);
break;
@ -1762,6 +1780,7 @@ int n64_periphs::pif_channel_handle_command(int channel, int slength, uint8_t *s
}
case 4:
{
//printf("Read EEPROM status, type: %02x\n", (machine().root_device().ioport("input")->read() >> 8) & 0xC0);
// Read EEPROM status
rdata[0] = 0x00;
rdata[1] = (machine().root_device().ioport("input")->read() >> 8) & 0xC0;
@ -1928,6 +1947,7 @@ int n64_periphs::pif_channel_handle_command(int channel, int slength, uint8_t *s
case 0x04: // Read from EEPROM
{
//printf("Read from EEPROM, channel %d: slength %d, rlength %d, sdata[1] %02x\n", channel, slength, rlength, sdata[1]);
if (channel != 4)
{
return 1;
@ -2752,11 +2772,6 @@ void n64_state::machine_start()
/* configure fast RAM regions */
//m_vr4300->add_fastram(0x00000000, 0x007fffff, false, m_rdram);
m_rsp->rspdrc_set_options(RSPDRC_STRICT_VERIFY);
m_rsp->rspdrc_flush_drc_cache();
m_rsp->rsp_add_dmem(m_rsp_dmem);
m_rsp->rsp_add_imem(m_rsp_imem);
/* add a hook for battery save */
machine().add_notifier(MACHINE_NOTIFY_EXIT, machine_notify_delegate(&n64_state::n64_machine_stop,this));
}

File diff suppressed because it is too large Load Diff

View File

@ -127,8 +127,6 @@ class n64_rdp;
#include "video/rdpblend.h"
#include "video/rdptpipe.h"
typedef void (*rdp_command_t)(uint64_t w1);
class n64_state;
class n64_rdp : public poly_manager<uint32_t, rdp_poly_state, 8>
@ -164,24 +162,24 @@ public:
}
void process_command_list();
uint64_t read_data(uint32_t address);
void disassemble(char* buffer);
uint64_t read_data(uint32_t address);
void disassemble(uint64_t *cmd_buf, char* buffer);
void set_machine(running_machine& machine) { m_machine = &machine; }
void set_n64_periphs(n64_periphs* periphs) { m_n64_periphs = periphs; }
// CPU-visible registers
void set_start(uint32_t val) { m_start = val; }
uint32_t get_start() const { return m_start; }
uint32_t get_start() const { return m_start; }
void set_end(uint32_t val) { m_end = val; }
uint32_t get_end() const { return m_end; }
uint32_t get_end() const { return m_end; }
void set_current(uint32_t val) { m_current = val; }
uint32_t get_current() const { return m_current; }
uint32_t get_current() const { return m_current; }
void set_status(uint32_t val) { m_status = val; }
uint32_t get_status() const { return m_status; }
uint32_t get_status() const { return m_status; }
// Color Combiner
int32_t color_combiner_equation(int32_t a, int32_t b, int32_t c, int32_t d);
@ -226,48 +224,39 @@ public:
bool z_compare(uint32_t zcurpixel, uint32_t dzcurpixel, uint32_t sz, uint16_t dzpix, rdp_span_aux* userdata, const rdp_poly_state &object);
// Commands
void cmd_invalid(uint64_t w1);
void cmd_noop(uint64_t w1);
void cmd_triangle(uint64_t w1);
void cmd_triangle_z(uint64_t w1);
void cmd_triangle_t(uint64_t w1);
void cmd_triangle_tz(uint64_t w1);
void cmd_triangle_s(uint64_t w1);
void cmd_triangle_sz(uint64_t w1);
void cmd_triangle_st(uint64_t w1);
void cmd_triangle_stz(uint64_t w1);
void cmd_tex_rect(uint64_t w1);
void cmd_tex_rect_flip(uint64_t w1);
void cmd_sync_load(uint64_t w1);
void cmd_sync_pipe(uint64_t w1);
void cmd_sync_tile(uint64_t w1);
void cmd_sync_full(uint64_t w1);
void cmd_set_key_gb(uint64_t w1);
void cmd_set_key_r(uint64_t w1);
void cmd_set_fill_color32(uint64_t w1);
void cmd_set_convert(uint64_t w1);
void cmd_set_scissor(uint64_t w1);
void cmd_set_prim_depth(uint64_t w1);
void cmd_set_other_modes(uint64_t w1);
void cmd_load_tlut(uint64_t w1);
void cmd_set_tile_size(uint64_t w1);
void cmd_load_block(uint64_t w1);
void cmd_load_tile(uint64_t w1);
void cmd_fill_rect(uint64_t w1);
void cmd_set_tile(uint64_t w1);
void cmd_set_fog_color(uint64_t w1);
void cmd_set_blend_color(uint64_t w1);
void cmd_set_prim_color(uint64_t w1);
void cmd_set_env_color(uint64_t w1);
void cmd_set_combine(uint64_t w1);
void cmd_set_texture_image(uint64_t w1);
void cmd_set_mask_image(uint64_t w1);
void cmd_set_color_image(uint64_t w1);
void cmd_noop(uint64_t *cmd_buf);
void cmd_tex_rect(uint64_t *cmd_buf);
void cmd_tex_rect_flip(uint64_t *cmd_buf);
void cmd_sync_load(uint64_t *cmd_buf);
void cmd_sync_pipe(uint64_t *cmd_buf);
void cmd_sync_tile(uint64_t *cmd_buf);
void cmd_sync_full(uint64_t *cmd_buf);
void cmd_set_key_gb(uint64_t *cmd_buf);
void cmd_set_key_r(uint64_t *cmd_buf);
void cmd_set_fill_color32(uint64_t *cmd_buf);
void cmd_set_convert(uint64_t *cmd_buf);
void cmd_set_scissor(uint64_t *cmd_buf);
void cmd_set_prim_depth(uint64_t *cmd_buf);
void cmd_set_other_modes(uint64_t *cmd_buf);
void cmd_load_tlut(uint64_t *cmd_buf);
void cmd_set_tile_size(uint64_t *cmd_buf);
void cmd_load_block(uint64_t *cmd_buf);
void cmd_load_tile(uint64_t *cmd_buf);
void cmd_fill_rect(uint64_t *cmd_buf);
void cmd_set_tile(uint64_t *cmd_buf);
void cmd_set_fog_color(uint64_t *cmd_buf);
void cmd_set_blend_color(uint64_t *cmd_buf);
void cmd_set_prim_color(uint64_t *cmd_buf);
void cmd_set_env_color(uint64_t *cmd_buf);
void cmd_set_combine(uint64_t *cmd_buf);
void cmd_set_texture_image(uint64_t *cmd_buf);
void cmd_set_mask_image(uint64_t *cmd_buf);
void cmd_set_color_image(uint64_t *cmd_buf);
void rgbaz_clip(int32_t sr, int32_t sg, int32_t sb, int32_t sa, int32_t* sz, rdp_span_aux* userdata);
void rgbaz_correct_triangle(int32_t offx, int32_t offy, int32_t* r, int32_t* g, int32_t* b, int32_t* a, int32_t* z, rdp_span_aux* userdata, const rdp_poly_state &object);
void triangle(bool shade, bool texture, bool zbuffer);
void triangle(uint64_t *cmd_buf, bool shade, bool texture, bool zbuffer);
void get_dither_values(int32_t x, int32_t y, int32_t* cdith, int32_t* adith, const rdp_poly_state &object);
@ -309,7 +298,7 @@ public:
rectangle_t m_scissor;
span_base_t m_span_base;
void draw_triangle(bool shade, bool texture, bool zbuffer, bool rect);
void draw_triangle(uint64_t *cmd_buf, bool shade, bool texture, bool zbuffer, bool rect);
std::unique_ptr<uint8_t[]> m_aux_buf;
uint32_t m_aux_buf_ptr;
@ -374,9 +363,6 @@ private:
uint64_t m_cmd_data[0x800];
uint64_t m_temp_rect_data[0x800];
int32_t m_cmd_ptr;
int32_t m_cmd_cur;
uint32_t m_start;
uint32_t m_end;
uint32_t m_current;
@ -401,7 +387,6 @@ private:
static uint8_t const s_bayer_matrix[16];
static uint8_t const s_magic_matrix[16];
static rdp_command_t const m_commands[0x40];
static int32_t const s_rdp_command_length[];
static char const *const s_image_format[];
static char const *const s_image_size[];

View File

@ -306,6 +306,7 @@ struct rdp_span_aux
uint8_t* m_tmem; /* pointer to texture cache for this polygon */
bool m_start_span;
rgbaint_t m_clamp_diff[8];
combine_modes_t m_combine;
};
struct z_decompress_entry_t