Added m62(kidniki) to nl.lua. Started some experiments around parallel

solving of linear equations. This code is not active. Fix kidniki audio
performance. [Couriersud]
This commit is contained in:
couriersud 2016-03-30 02:39:11 +02:00
parent 86007e7e29
commit bc936dbc26
5 changed files with 238 additions and 17 deletions

View File

@ -4,7 +4,7 @@
#include "netlist/devices/nld_system.h"
#include "netlist/analog/nld_bjt.h"
#define USE_FRONTIERS 1
#define USE_FRONTIERS 0
#define USE_FIXED_STV 1
NETLIST_START(dummy)
@ -14,8 +14,8 @@ NETLIST_START(dummy)
PARAM(Solver.NR_LOOPS, 300)
PARAM(Solver.GS_LOOPS, 1)
PARAM(Solver.GS_THRESHOLD, 6)
PARAM(Solver.ITERATIVE, "W")
//PARAM(Solver.ITERATIVE, "MAT")
//PARAM(Solver.ITERATIVE, "W")
PARAM(Solver.ITERATIVE, "MAT")
//PARAM(Solver.ITERATIVE, "GMRES")
//PARAM(Solver.ITERATIVE, "SOR")
PARAM(Solver.DYNAMIC_TS, 0)

View File

@ -17,7 +17,9 @@
--------------------------------------------------
CPUS["Z80"] = true
--CPUS["M6502"] = true
CPUS["M6800"] = true
CPUS["M6803"] = true
CPUS["M6809"] = true
--CPUS["MCS48"] = true
--CPUS["MCS51"] = true
--CPUS["M6800"] = true
@ -33,9 +35,9 @@ CPUS["Z80"] = true
--SOUNDS["SAMPLES"] = true
SOUNDS["DAC"] = true
--SOUNDS["DISCRETE"] = true
SOUNDS["DISCRETE"] = true
SOUNDS["AY8910"] = true
--SOUNDS["YM2151"] = true
SOUNDS["MSM5205"] = true
--SOUNDS["ASTROCADE"] = true
--SOUNDS["TMS5220"] = true
--SOUNDS["OKIM6295"] = true
@ -84,6 +86,8 @@ function createProjects_mame_nl(_target, _subtarget)
targetsubdir(_target .."_" .. _subtarget)
kind (LIBTYPE)
uuid (os.uuid("drv-mame-nl"))
addprojectflags()
precompiledheaders()
includedirs {
MAME_DIR .. "src/osd",
@ -109,6 +113,12 @@ files{
MAME_DIR .. "src/mame/drivers/popeye.cpp",
MAME_DIR .. "src/mame/includes/popeye.h",
MAME_DIR .. "src/mame/video/popeye.cpp",
MAME_DIR .. "src/mame/drivers/m62.cpp",
MAME_DIR .. "src/mame/includes/m62.h",
MAME_DIR .. "src/mame/video/m62.cpp",
MAME_DIR .. "src/mame/audio/irem.cpp",
MAME_DIR .. "src/mame/audio/irem.h",
}
end

View File

@ -18,7 +18,12 @@
* going forward in case we implement cuda solvers in the future.
*/
#define NL_USE_DYNAMIC_ALLOCATION (0)
#define TEST_PARALLEL (0)
#if TEST_PARALLEL
#include <thread>
#include <atomic>
#endif
NETLIB_NAMESPACE_DEVICES_START()
@ -26,8 +31,92 @@ NETLIB_NAMESPACE_DEVICES_START()
//#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double
#if TEST_PARALLEL
#define MAXTHR 10
static const int num_thr = 2;
struct thr_intf
{
virtual void do_work(const int id, void *param) = 0;
};
struct ti_t
{
volatile std::atomic<int> lo;
thr_intf *intf;
void *params;
int _block[29]; /* make it 256 bytes */
};
static ti_t ti[MAXTHR];
//static std::thread thr[MAXTHR];
static std::thread thr[num_thr];
int thr_init = 0;
static void thr_process_proc(int id)
{
while (true)
{
while (ti[id].lo.load() == 0)
;
if (ti[id].lo.load() == 2)
return;
ti[id].intf->do_work(id, ti[id].params);
ti[id].lo.store(0);
}
}
static void thr_process(int id, thr_intf *intf, void *params)
{
ti[id].intf = intf;
ti[id].params = params;
ti[id].lo.store(1);
}
static void thr_wait()
{
int c=1;
while (c > 0)
{
c=0;
for (int i=0; i<num_thr; i++)
c += ti[i].lo.load();
}
}
static void thr_initialize()
{
thr_init++;
if (thr_init == 1)
{
for (int i=0; i<num_thr; i++)
{
ti[i].lo = 0;
thr[i] = std::thread(thr_process_proc, i);
}
}
}
static void thr_dispose()
{
thr_init--;
if (thr_init == 0)
{
for (int i=0; i<num_thr; i++)
ti[i].lo = 2;
for (int i=0; i<num_thr; i++)
thr[i].join();
}
}
#endif
template <unsigned m_N, unsigned _storage_N>
#if TEST_PARALLEL
class matrix_solver_direct_t: public matrix_solver_t, public thr_intf
#else
class matrix_solver_direct_t: public matrix_solver_t
#endif
{
public:
@ -61,6 +150,13 @@ protected:
virtual netlist_time compute_next_timestep() override;
#if TEST_PARALLEL
int x_i[10];
int x_start[10];
int x_stop[10];
virtual void do_work(const int id, void *param) override;
#endif
#if (NL_USE_DYNAMIC_ALLOCATION)
template <typename T1, typename T2>
inline nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r * m_pitch + c]; }
@ -80,11 +176,12 @@ protected:
private:
static const std::size_t m_pitch = (((_storage_N + 1) + 7) / 8) * 8;
//static const std::size_t m_pitch = (((_storage_N + 1) + 15) / 16) * 16;
//static const std::size_t m_pitch = (((_storage_N + 1) + 31) / 32) * 32;
#if (NL_USE_DYNAMIC_ALLOCATION)
ATTR_ALIGN nl_ext_double * RESTRICT m_A;
#else
ATTR_ALIGN nl_ext_double m_A[_storage_N][m_pitch];
ATTR_ALIGN nl_ext_double m_B[_storage_N][m_pitch];
#endif
//ATTR_ALIGN nl_ext_double m_RHSx[_storage_N];
@ -107,6 +204,10 @@ matrix_solver_direct_t<m_N, _storage_N>::~matrix_solver_direct_t()
#if (NL_USE_DYNAMIC_ALLOCATION)
pfree_array(m_A);
#endif
#if TEST_PARALLEL
thr_dispose();
#endif
}
template <unsigned m_N, unsigned _storage_N>
@ -399,6 +500,37 @@ void matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS()
}
}
#if TEST_PARALLEL
template <unsigned m_N, unsigned _storage_N>
void matrix_solver_direct_t<m_N, _storage_N>::do_work(const int id, void *param)
{
const int i = x_i[id];
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / A(i,i);
const unsigned * RESTRICT const p = m_terms[i]->m_nzrd.data();
const unsigned e = m_terms[i]->m_nzrd.size();
//nl_double A_cache[128];
//for (unsigned k = 0; k < e; k++)
// A_cache[k] = A(i,p[k]);
/* Eliminate column i from row j */
const unsigned * RESTRICT const pb = m_terms[i]->m_nzbd.data();
//const unsigned eb = m_terms[i]->m_nzbd.size();
const unsigned sj = x_start[id];
const unsigned se = x_stop[id];
for (unsigned jb = sj; jb < se; jb++)
{
const unsigned j = pb[jb];
const nl_double f1 = - A(j,i) * f;
for (unsigned k = 0; k < e; k++)
//A(j,p[k]) += A_cache[k] * f1;
A(j,p[k]) += A(i,p[k]) * f1;
//RHS(j) += RHS(i) * f1;
}
}
#endif
template <unsigned m_N, unsigned _storage_N>
void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
{
@ -406,7 +538,7 @@ void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
for (unsigned i = 0; i < kN; i++) {
// FIXME: use a parameter to enable pivoting? m_pivot
if (m_params.m_pivot)
if (!TEST_PARALLEL && m_params.m_pivot)
{
/* Find the row with the largest first value */
unsigned maxrow = i;
@ -452,23 +584,62 @@ void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
}
else
{
#if TEST_PARALLEL
const unsigned eb = m_terms[i]->m_nzbd.size();
if (eb > 16)
{
printf("here %d\n", eb);
unsigned chunks = (eb) / (num_thr + 1);
for (int p=0; p < num_thr + 1; p++)
{
x_i[p] = i;
x_start[p] = chunks * p;
x_stop[p] = std::min(chunks*(p+1), eb);
if (p<num_thr) thr_process(p, this, NULL);
}
do_work(num_thr, NULL);
thr_wait();
}
else if (eb > 0)
{
x_i[0] = i;
x_start[0] = 0;
x_stop[0] = eb;
do_work(0, NULL);
}
#else
#if 0
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / A(i,i);
const unsigned * RESTRICT const p = m_terms[i]->m_nzrd.data();
const unsigned e = m_terms[i]->m_nzrd.size();
const auto &nzrd = m_terms[i]->m_nzrd;
const auto &nzbd = m_terms[i]->m_nzbd;
/* Eliminate column i from row j */
const unsigned * RESTRICT const pb = m_terms[i]->m_nzbd.data();
const unsigned eb = m_terms[i]->m_nzbd.size();
for (unsigned jb = 0; jb < eb; jb++)
for (auto j : nzbd)
{
const unsigned j = pb[jb];
const nl_double f1 = - A(j,i) * f;
for (unsigned k = 0; k < e; k++)
A(j,p[k]) += A(i,p[k]) * f1;
for (auto k : nzrd)
A(j,k) += A(i,k) * f1;
//RHS(j) += RHS(i) * f1;
}
#else
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / A(i,i);
const auto &nzrd = m_terms[i]->m_nzrd;
const auto &nzbd = m_terms[i]->m_nzbd;
/* Eliminate column i from row j */
for (auto j : nzbd)
{
const nl_double f1 = - A(j,i) * f;
for (auto k : nzrd)
A(j,k) += A(i,k) * f1;
//RHS(j) += RHS(i) * f1;
}
#endif
#endif
}
}
}
@ -591,6 +762,9 @@ matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const solver_par
m_last_RHS[k] = 0.0;
m_last_V[k] = 0.0;
}
#if TEST_PARALLEL
thr_initialize();
#endif
}
template <unsigned m_N, unsigned _storage_N>
@ -608,6 +782,9 @@ matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const eSolverTyp
m_last_RHS[k] = 0.0;
m_last_V[k] = 0.0;
}
#if TEST_PARALLEL
thr_initialize();
#endif
}
NETLIB_NAMESPACE_DEVICES_END()

View File

@ -624,7 +624,16 @@ int matrix_solver_w_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const bool
}
m_cnt++;
for (unsigned i=0; i<iN; i++)
{
nl_double tmp = 0.0;
for (unsigned j=0; j<iN; j++)
{
tmp += A(i,j) * new_V[j];
}
if (std::fabs(tmp-RHS(i)) > 1e-6)
printf("%s failed on row %d: %f RHS: %f\n", this->name().cstr(), i, std::fabs(tmp-RHS(i)), RHS(i));
}
if (newton_raphson)
{
nl_double err = delta(new_V);

View File

@ -29,3 +29,28 @@ pong // (c) 1972 Atari
pongd // (c) 1975 Atari
pongf // (c) 1972 Atari
breakout // (c) 1976 Atari
@source:m62.cpp
battroad // (c) 1984
horizon // (c) 1985
kidniki // (c) 1986 + Data East USA license
kidnikiu // (c) 1986 + Data East USA license
kungfub // bootleg
kungfub2 // bootleg
kungfum // (c) 1984
kungfumd // (c) 1984 + Data East license
ldrun // (c) 1984 licensed from Broderbund
ldrun2 // (c) 1984 licensed from Broderbund
ldrun3 // (c) 1985 licensed from Broderbund
ldrun3j // (c) 1985 licensed from Broderbund
ldrun4 // (c) 1986 licensed from Broderbund
ldruna // (c) 1984 licensed from Broderbund
lithero // bootleg
lotlot // (c) 1985 licensed from Tokuma Shoten
spartanx // (c) 1984 (Japan)
spelunk2 // (c) 1986 licensed from Broderbund
spelunkr // (c) 1985 licensed from Broderbund
spelunkrj // (c) 1985 licensed from Broderbund
yanchamr // (c) 1986 (Japan)
youjyudn // (c) 1986 (Japan)