dsp16: move most core state into DRC cache - keeps a lot of details out of the main header (DRC is still stubbed out) (nw)

This commit is contained in:
Vas Crabb 2018-03-21 21:01:36 +11:00
parent 31be84ea40
commit b787818d0c
8 changed files with 736 additions and 519 deletions

View File

@ -271,8 +271,13 @@ if (CPUS["DSP16"]~=null) then
files {
MAME_DIR .. "src/devices/cpu/dsp16/dsp16.cpp",
MAME_DIR .. "src/devices/cpu/dsp16/dsp16.h",
MAME_DIR .. "src/devices/cpu/dsp16/dsp16core.cpp",
MAME_DIR .. "src/devices/cpu/dsp16/dsp16core.h",
MAME_DIR .. "src/devices/cpu/dsp16/dsp16core.ipp",
MAME_DIR .. "src/devices/cpu/dsp16/dsp16fe.cpp",
MAME_DIR .. "src/devices/cpu/dsp16/dsp16fe.h",
MAME_DIR .. "src/devices/cpu/dsp16/dsp16rc.cpp",
MAME_DIR .. "src/devices/cpu/dsp16/dsp16rc.h",
}
end

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,9 @@
#include "dsp16dis.h"
#include "cpu/drccache.h"
#include <memory>
#include <utility>
@ -124,6 +127,7 @@ protected:
// device_t implementation
virtual void device_resolve_objects() override;
virtual void device_start() override;
virtual void device_stop() override;
virtual void device_reset() override;
// device_execute_interface implementation
@ -163,6 +167,12 @@ private:
DSP16_YH, DSP16_A0H, DSP16_A1H, DSP16_YL, DSP16_A0L, DSP16_A1L
};
// recompiler setup
enum : size_t
{
CACHE_SIZE = 1U * 1024 * 1024
};
// masks for registers that aren't power-of-two sizes
enum : s16
{
@ -214,7 +224,11 @@ private:
friend sio_flags &operator|=(sio_flags &, sio_flags);
// recompiler helpers
class core_state;
class frontend;
class recompiler;
using core_state_ptr = std::unique_ptr<core_state, void (*)(core_state *)>;
using recompiler_ptr = std::unique_ptr<recompiler>;
// internal address maps
void program_map(address_map &map);
@ -227,8 +241,6 @@ private:
s16 yaau_read(u16 op);
void yaau_write(u16 op, s16 value);
void yaau_write_z(u16 op);
u64 dau_f1(u16 op);
void dau_f2(u16 op);
// inline helpers
static bool op_interruptible(u16 op);
@ -236,25 +248,11 @@ private:
flags &set_predicate(flags predicate) { return m_flags = (m_flags & ~FLAGS_PRED_MASK) | (predicate & FLAGS_PRED_MASK); }
flags &set_iack(flags iack) { return m_flags = (m_flags & ~FLAGS_IACK_MASK) | (iack & FLAGS_IACK_MASK); }
u16 &set_xaau_pc_offset(u16 offset);
void xaau_increment_pt(s16 increment) { m_xaau_pt = (m_xaau_pt & XAAU_I_EXT) | ((m_xaau_pt + increment) & XAAU_I_MASK); }
s16 get_r(u16 op);
void set_r(u16 op, s16 value);
void yaau_postmodify_r(u16 op);
void set_dau_y(u16 op, s16 value);
s64 dau_saturate(u16 a) const;
void set_dau_at(u16 op, s16 value);
u64 set_dau_psw_flags(s64 d);
u64 get_dau_p_aligned() const;
bool op_dau_con(u16 op, bool inc);
// flag accessors
bool dau_auc_sat(u16 a) const { return bool(BIT(m_dau_auc, 2 + a)); }
u16 dau_auc_align() const { return m_dau_auc & 0x0003U; }
bool dau_psw_lmi() const { return bool(BIT(m_dau_psw, 15)); }
bool dau_psw_leq() const { return bool(BIT(m_dau_psw, 14)); }
bool dau_psw_llv() const { return bool(BIT(m_dau_psw, 13)); }
bool dau_psw_lmv() const { return bool(BIT(m_dau_psw, 12)); }
// opcode field handling
static constexpr u16 op_ja(u16 op) { return op & 0x0fffU; }
static constexpr u16 op_b(u16 op) { return (op >> 8) & 0x0007U; }
@ -267,11 +265,6 @@ private:
static constexpr u16 op_con(u16 op) { return op & 0x001fU; }
static constexpr u16 op_ni(u16 op) { return (op >> 7) & 0x000fU; }
static constexpr u16 op_k(u16 op) { return op & 0x007fU; }
s16 op_xaau_increment(u16 op) const { return op_x(op) ? m_xaau_i : 1; }
u16 &op_yaau_r(u16 op) { return m_yaau_r[(op >> 2) & 0x0003U]; }
s64 &op_dau_as(u16 op) { return m_dau_a[op_s(op)]; }
s64 &op_dau_ad(u16 op) { return m_dau_a[op_d(op)]; }
s64 &op_dau_at(u16 op) { return m_dau_a[op_d(~op)]; }
// serial I/O
bool sio_ld_ick() const { return !BIT(m_sio_sioc, 9); }
@ -323,15 +316,18 @@ private:
// configuration
address_space_config const m_space_config[3];
u16 const m_yaau_mask;
u16 const m_yaau_sign;
u8 const m_yaau_bits;
// memory system access
address_space *m_spaces[3];
direct_read_data<-1> *m_direct;
// recompiler stuff
drc_cache m_drc_cache;
core_state_ptr m_core;
recompiler_ptr m_recompiler;
// execution state
int m_icount;
cache m_cache_mode;
phase m_phase;
u8 m_int_enable[2];
@ -358,30 +354,6 @@ private:
u8 m_pids_out; // parallel input data strobe (sampled on rising edge)
u8 m_pods_out; // parallel output data strobe
// XAAU - ROM Address Arithmetic Unit
u16 m_xaau_pc; // 16 bits unsigned
u16 m_xaau_pt; // 16 bits unsigned
u16 m_xaau_pr; // 16 bits unsigned
u16 m_xaau_pi; // 16 bits unsigned
s16 m_xaau_i; // 12 bits signed
// YAAU - RAM Address Arithmetic Unit
u16 m_yaau_r[4]; // 9/16 bits unsigned
u16 m_yaau_rb; // 9/16 bits unsigned
u16 m_yaau_re; // 9/16 bits unsigned
s16 m_yaau_j; // 9/16 bits signed
s16 m_yaau_k; // 9/16 bits signed
// DAU - Data Arithmetic Unit
s16 m_dau_x; // 16 bits signed
s32 m_dau_y; // 32 bits signed
s32 m_dau_p; // 32 bits signed
s64 m_dau_a[2]; // 36 bits signed
s8 m_dau_c[3]; // 8 bits signed
u8 m_dau_auc; // 7 bits unsigned
u16 m_dau_psw; // 16 bits
s16 m_dau_temp; // 16 bits
// SIO - Serial I/O
u16 m_sio_sioc; // 10 bits
u16 m_sio_obuf; // 16 bits

View File

@ -0,0 +1,38 @@
// license:BSD-3-Clause
// copyright-holders:Vas Crabb
/***************************************************************************
WE|AT&T DSP16 series core state
***************************************************************************/
#include "dsp16core.h"
/***********************************************************************
setup
***********************************************************************/
void dsp16_device_base::core_state::register_save_items(device_t &host)
{
host.save_item(NAME(xaau_pc));
host.save_item(NAME(xaau_pt));
host.save_item(NAME(xaau_pr));
host.save_item(NAME(xaau_pi));
host.save_item(NAME(xaau_i));
host.save_item(NAME(yaau_r));
host.save_item(NAME(yaau_rb));
host.save_item(NAME(yaau_re));
host.save_item(NAME(yaau_j));
host.save_item(NAME(yaau_k));
host.save_item(NAME(dau_x));
host.save_item(NAME(dau_y));
host.save_item(NAME(dau_p));
host.save_item(NAME(dau_a));
host.save_item(NAME(dau_c));
host.save_item(NAME(dau_auc));
host.save_item(NAME(dau_psw));
host.save_item(NAME(dau_temp));
}

View File

@ -0,0 +1,116 @@
// license:BSD-3-Clause
// copyright-holders:Vas Crabb
/***************************************************************************
WE|AT&T DSP16 series core state
***************************************************************************/
#ifndef MAME_CPU_DSP16_DSP16CORE_H
#define MAME_CPU_DSP16_DSP16CORE_H
#pragma once
#include "dsp16.h"
class dsp16_device_base::core_state
{
public:
// construction/destruction
constexpr core_state(u8 yaau_bits) : yaau_mask(u16((u32(1) << yaau_bits) - 1)), yaau_sign(u16(1) << (yaau_bits - 1)) { }
~core_state() = default;
// setup
void register_save_items(device_t &host);
// execution state helpers
constexpr bool icount_remaining() const { return 0 < icount; }
void decrement_icount() { --icount; }
// XAAU helpers
constexpr u16 xaau_next_pc() const { return (xaau_pc & XAAU_I_EXT) | ((xaau_pc + 1) & XAAU_I_MASK); }
void xaau_increment_pt(u16 op) { xaau_pt = (xaau_pt & XAAU_I_EXT) | ((xaau_pt + op_xaau_increment(op)) & XAAU_I_MASK); }
void xaau_extend_i() { xaau_i = (xaau_i & XAAU_I_MASK) | ((xaau_i & XAAU_I_SIGN) ? XAAU_I_EXT : 0); }
// YAAU helpers
u16 &yaau_postmodify_r(u16 op);
s16 &yaau_set_j(s16 value) { return yaau_j = yaau_extend(value); }
s16 &yaau_set_k(s16 value) { return yaau_k = yaau_extend(value); }
u16 &yaau_set_rb(s16 value) { return yaau_rb = value & yaau_mask; }
u16 &yaau_set_re(s16 value) { return yaau_re = value & yaau_mask; }
constexpr s16 yaau_extend(s16 value) { return (value & yaau_mask) | ((value & yaau_sign) ? ~yaau_mask : 0); }
s16 &yaau_extend_j() { return yaau_j = yaau_extend(yaau_j); }
s16 &yaau_extend_k() { return yaau_k = yaau_extend(yaau_k); }
// DAU helpers
u64 dau_f1(u16 op);
void dau_f2(u16 op);
constexpr s16 dau_get_y(u16 op) const { return u16(u32(dau_y) >> (op_x(op) ? 16 : 0)); }
void dau_set_y(u16 op, s16 value) { op_x(op) ? dau_set_y(value) : dau_set_yl(value); }
void dau_set_y(s16 value) { dau_y = (u32(u16(value)) << 16) | u32(BIT(dau_auc, 6) ? u16(u32(dau_y)) : u16(0)); }
void dau_set_yl(s16 value) { dau_y = (dau_y & ~((u32(1) << 16) - 1)) | u16(value); }
s64 &dau_set_at(u16 op, s16 value);
template <unsigned T> void dau_extend_a() { dau_a[T] = (dau_a[T] & DAU_A_MASK) | ((dau_a[T] & DAU_A_SIGN) ? DAU_A_EXT : 0); }
u16 dau_export_psw()
{
return dau_psw = (dau_psw & 0xfe10U) | (u16(u64(dau_a[0]) >> 32) & 0x000fU) | (u16(u64(dau_a[1]) >> 27) & 0x01e0U);
}
void dau_import_psw()
{
dau_a[0] = u64(u32(dau_a[0])) | (u64(dau_psw & 0x000fU) << 32) | (BIT(dau_psw, 3) ? DAU_A_EXT : 0U);
dau_a[1] = u64(u32(dau_a[1])) | (u64(dau_psw & 0x01e0U) << 27) | (BIT(dau_psw, 8) ? DAU_A_EXT : 0U);
}
// DAU flag accessors
constexpr bool dau_auc_sat(u16 a) const { return bool(BIT(dau_auc, 2 + a)); }
constexpr u16 dau_auc_align() const { return dau_auc & 0x0003U; }
constexpr bool dau_psw_lmi() const { return bool(BIT(dau_psw, 15)); }
constexpr bool dau_psw_leq() const { return bool(BIT(dau_psw, 14)); }
constexpr bool dau_psw_llv() const { return bool(BIT(dau_psw, 13)); }
constexpr bool dau_psw_lmv() const { return bool(BIT(dau_psw, 12)); }
// opcode field handling
constexpr s16 op_xaau_increment(u16 op) const { return op_x(op) ? xaau_i : 1; }
u16 &op_yaau_r(u16 op) { return yaau_r[(op >> 2) & 0x0003U]; }
s64 &op_dau_as(u16 op) { return dau_a[op_s(op)]; }
s64 &op_dau_ad(u16 op) { return dau_a[op_d(op)]; }
s64 &op_dau_at(u16 op) { return dau_a[op_d(~op)]; }
// configuration
u16 const yaau_mask;
u16 const yaau_sign;
// execution state
int icount = 0;
// XAAU - ROM Address Arithmetic Unit
u16 xaau_pc = 0U; // 16 bits unsigned
u16 xaau_pt = 0U; // 16 bits unsigned
u16 xaau_pr = 0U; // 16 bits unsigned
u16 xaau_pi = 0U; // 16 bits unsigned
s16 xaau_i = 0; // 12 bits signed
// YAAU - RAM Address Arithmetic Unit
u16 yaau_r[4] = { 0U, 0U, 0U, 0U }; // 9/16 bits unsigned
u16 yaau_rb = 0U; // 9/16 bits unsigned
u16 yaau_re = 0U; // 9/16 bits unsigned
s16 yaau_j = 0; // 9/16 bits signed
s16 yaau_k = 0; // 9/16 bits signed
// DAU - Data Arithmetic Unit
s16 dau_x = 0; // 16 bits signed
s32 dau_y = 0; // 32 bits signed
s32 dau_p = 0; // 32 bits signed
s64 dau_a[2] = { 0, 0 }; // 36 bits signed
s8 dau_c[3] = { 0, 0, 0 }; // 8 bits signed
u8 dau_auc = 0U; // 7 bits unsigned
u16 dau_psw = 0U; // 16 bits
s16 dau_temp = 0; // 16 bits
private:
// internal helpers
u64 dau_get_p_aligned() const;
u64 dau_set_psw_flags(s64 d);
};
#endif // MAME_CPU_DSP16_DSP16CORE_H

View File

@ -0,0 +1,223 @@
// license:BSD-3-Clause
// copyright-holders:Vas Crabb
/***************************************************************************
WE|AT&T DSP16 series core state
***************************************************************************/
#ifndef MAME_CPU_DSP16_DSP16CORE_IPP
#define MAME_CPU_DSP16_DSP16CORE_IPP
#pragma once
#include "dsp16core.h"
/***********************************************************************
YAAU helpers
***********************************************************************/
inline u16 &dsp16_device_base::core_state::yaau_postmodify_r(u16 op)
{
u16 &r(op_yaau_r(op));
switch (op & 0x0003U)
{
case 0x0: // *rN
break;
case 0x1: // *rN++
r = (yaau_re && (yaau_re == r)) ? yaau_rb : (r + 1);
break;
case 0x2: // *rN--
--r;
break;
case 0x3: // *rN++j
r += yaau_j;
break;
}
return r &= yaau_mask;
}
/***********************************************************************
DAU helpers
***********************************************************************/
inline u64 dsp16_device_base::core_state::dau_f1(u16 op)
{
s64 const &s(op_dau_as(op));
s64 d(0);
switch (op_f1(op))
{
case 0x0: // aD = p ; p = x*y
d = dau_get_p_aligned();
dau_p = dau_x * (dau_y >> 16);
break;
case 0x1: // aD = aS + p ; p = x*y
d = s + dau_get_p_aligned();
dau_p = dau_x * (dau_y >> 16);
break;
case 0x2: // p = x*y
dau_p = dau_x * (dau_y >> 16);
return op_dau_ad(op);
case 0x3: // aD = aS - p ; p = x*y
d = s - dau_get_p_aligned();
dau_p = dau_x * (dau_y >> 16);
break;
case 0x4: // aD = p
d = dau_get_p_aligned();
break;
case 0x5: // aD = aS + p
d = s + dau_get_p_aligned();
break;
case 0x6: // NOP
return op_dau_ad(op);
case 0x7: // aD = aS - p
d = s - dau_get_p_aligned();
break;
case 0x8: // aD = aS | y
d = s | dau_y;
break;
case 0x9: // aD = aS ^ y
d = s ^ dau_y;
break;
case 0xa: // aS & y
dau_set_psw_flags(s & dau_y);
return op_dau_ad(op);
case 0xb: // aS - y
dau_set_psw_flags(s - dau_y);
return op_dau_ad(op);
case 0xc: // aD = y
d = dau_y;
break;
case 0xd: // aD = aS + y
d = s + dau_y;
break;
case 0xe: // aD = aS & y
d = s & dau_y;
break;
case 0xf: // aD = aS - y
d = s - dau_y;
break;
}
return dau_set_psw_flags(d);
}
inline void dsp16_device_base::core_state::dau_f2(u16 op)
{
s64 const &s(op_dau_as(op));
s64 &d(op_dau_ad(op));
switch (op_f2(op))
{
case 0x0: // aD = aS >> 1
d = s >> 1;
break;
case 0x1: // aD = aS << 1
d = s32(u32(u64(s)) << 1);
break;
case 0x2: // aD = aS >> 4
d = s >> 4;
break;
case 0x3: // aD = aS << 4
d = s32(u32(u64(s)) << 4);
break;
case 0x4: // aD = aS >> 8
d = s >> 8;
break;
case 0x5: // aD = aS << 8
d = s32(u32(u64(s)) << 8);
break;
case 0x6: // aD = aS >> 16
d = s >> 16;
break;
case 0x7: // aD = aS << 16
d = s32(u32(u64(s)) << 16);
break;
case 0x8: // aD = p
d = dau_get_p_aligned();
break;
case 0x9: // aDh = aSh + 1
d = s64(s32(u32(u64(s)) & ~((u32(1) << 16) - 1))) + (s32(1) << 16);
if (BIT(dau_auc, op_s(op) + 4))
d |= op_dau_ad(op) & ((s64(1) << 16) - 1);
break;
case 0xa: // Reserved
throw emu_fatalerror("DSP16: reserved F2 value %01X (PC = %04X)\n", op_f2(op), xaau_pc/*FIXME: st_pcbase*/);
case 0xb: // aD = rnd(aS)
// FIXME: behaviour is not clear
// p 3-13: "Round upper 20 bits of accumulator."
// p 3-14: "The contents of the source accumulator, aS, are rounded to 16 bits, and the sign-extended result is placed in aD[35 - 16] with zeroes in aD[15 - 0]."
// It presumably rounds to nearest, but does it yield a 16-bit or 20-bit result, and what does it do about overflow?
d = (s + ((0 > s) ? -(s16(1) << 15) : (s16(1) << 15))) & ~((u64(1) << 16) - 1);
break;
case 0xc: // aD = y
d = dau_y;
break;
case 0xd: // aD = aS + 1
d = s + 1;
break;
case 0xe: // aD = aS
d = s;
break;
case 0xf: // aD = -aS
// FIXME: does this detect negation of largest negative number as overflow?
d = -s;
break;
}
d = dau_set_psw_flags(d);
}
inline s64 &dsp16_device_base::core_state::dau_set_at(u16 op, s16 value)
{
s64 &at(op_dau_at(op));
if (op_x(op))
{
bool const clear(!BIT(dau_psw, 4 + op_d(~op)));
return at = s32((u32(u16(value)) << 16) | u32(clear ? u16(0) : u16(u64(at))));
}
else
{
return at = (at & ~((s64(1) << 16) - 1)) | u64(u16(value));
}
}
/***********************************************************************
internal helpers
***********************************************************************/
inline u64 dsp16_device_base::core_state::dau_get_p_aligned() const
{
// TODO: manual is contradictory
// p 2-10: "Bits 1 and 2 of the accumulator are not changed by the load of the accumulator with the data in p, since 00 is added to or copied into these accumulator bits as indicated in Figure 2-8."
// I'm reading this as copying p to aD clears the low two bits, but adding it leaves them unchanged.
switch (dau_auc_align())
{
case 0x0:
return dau_p;
case 0x1:
return dau_p >> 2;
case 0x2:
return u64(dau_p) << 2;
case 0x3:
default:
throw emu_fatalerror("DSP16: reserved ALIGN value %01X (PC = %04X)\n", dau_auc_align(), xaau_pc/*FIXME: st_pcbase*/);
}
}
inline u64 dsp16_device_base::core_state::dau_set_psw_flags(s64 d)
{
dau_psw &= 0x0fffU;
bool const negative(d & DAU_A_SIGN);
if (negative)
dau_psw |= 0x8000U;
if (!(d & DAU_A_MASK))
dau_psw |= 0x4000U;
if ((d >> 36) != (negative ? -1 : 0))
dau_psw |= 0x2000U;
if (((d >> 32) & ((1 << 4) - 1)) != (BIT(d, 31) ? 15 : 0))
dau_psw |= 0x1000U;
if (negative)
return d | DAU_A_EXT;
else
return d & DAU_A_MASK;
}
#endif // MAME_CPU_DSP16_DSP16CORE_IPP

View File

@ -0,0 +1,50 @@
// license:BSD-3-Clause
// copyright-holders:Vas Crabb
/***************************************************************************
WE|AT&T DSP16 series recompiler
There are a number of easy optimisations:
* The RAM space is entirely internal, so the memory system can be
bypassed if debugging is not enabled.
* The DSP16A has 16-bit YAAU registers, so sign extension can be
elided.
* The same code rarely runs with IACK asserted and clear, allowing
optimisation of PI register accesses and interrupt checks.
* The PSW is rarely accessed directly, so the accumulator guard bits
don't need to be kept in sync (interpreter already does this).
* The DAU flags are used infrequently, so it's far cheaper to
calculate them on-demand rather than preemptively.
* The same code will rarely be run with different AUC modes, so it's
cheaper to make assumptions and recompile if they break.
There are some more complex optimisations that give good gains:
* Multiplication is free with F1, so code will sometimes multiply
when it doesn't need to - this can be elided.
* Address register accesses can be elided in many cases as the
values can be computed at translation time.
***************************************************************************/
#include "dsp16rc.h"
#include "cpu/drcumlsh.h"
/***********************************************************************
construction/destruction
***********************************************************************/
dsp16_device_base::recompiler::recompiler(dsp16_device_base &host, u32 flags)
: m_host(host)
, m_core(*host.m_core)
, m_frontend(host, COMPILE_BACKWARDS_BYTES, COMPILE_FORWARDS_BYTES, COMPILE_MAX_SEQUENCE)
, m_uml(host, host.m_drc_cache, flags, 2, 16, 0)
{
(void)m_host;
(void)m_core;
}
dsp16_device_base::recompiler::~recompiler()
{
}

View File

@ -0,0 +1,51 @@
// license:BSD-3-Clause
// copyright-holders:Vas Crabb
/***************************************************************************
WE|AT&T DSP16 series recompiler
***************************************************************************/
#ifndef MAME_CPU_DSP16_DSP16RC_H
#define MAME_CPU_DSP16_DSP16RC_H
#pragma once
#include "dsp16.h"
#include "dsp16fe.h"
#include "cpu/drcuml.h"
class dsp16_device_base::recompiler
{
public:
// construction/destruction
recompiler(dsp16_device_base &host, u32 flags);
~recompiler();
private:
// compilation boundaries
enum : u32
{
COMPILE_BACKWARDS_BYTES = 64,
COMPILE_FORWARDS_BYTES = 256,
COMPILE_MAX_INSTRUCTIONS = (COMPILE_BACKWARDS_BYTES / 2) + (COMPILE_FORWARDS_BYTES / 2),
COMPILE_MAX_SEQUENCE = 64
};
enum : int
{
EXEC_OUT_OF_CYCLES,
EXEC_MISSING_CODE,
EXEC_UNMAPPED_CODE,
EXEC_RESET_CACHE
};
// host CPU device, frontend to describe instructions, and UML engine
dsp16_device_base &m_host;
core_state &m_core;
frontend m_frontend;
drcuml_state m_uml;
};
#endif // MAME_CPU_DSP16_DSP16RC_H