From bc936dbc261a5eeb1400bdf0d0bc945577470905 Mon Sep 17 00:00:00 2001 From: couriersud Date: Wed, 30 Mar 2016 02:39:11 +0200 Subject: [PATCH] Added m62(kidniki) to nl.lua. Started some experiments around parallel solving of linear equations. This code is not active. Fix kidniki audio performance. [Couriersud] --- nl_examples/kidniki.c | 6 +- scripts/target/mame/nl.lua | 16 +- src/lib/netlist/solver/nld_ms_direct.h | 197 +++++++++++++++++++++++-- src/lib/netlist/solver/nld_ms_w.h | 11 +- src/mame/nl.lst | 25 ++++ 5 files changed, 238 insertions(+), 17 deletions(-) diff --git a/nl_examples/kidniki.c b/nl_examples/kidniki.c index 0b558514b0b..6616a5c2a5d 100644 --- a/nl_examples/kidniki.c +++ b/nl_examples/kidniki.c @@ -4,7 +4,7 @@ #include "netlist/devices/nld_system.h" #include "netlist/analog/nld_bjt.h" -#define USE_FRONTIERS 1 +#define USE_FRONTIERS 0 #define USE_FIXED_STV 1 NETLIST_START(dummy) @@ -14,8 +14,8 @@ NETLIST_START(dummy) PARAM(Solver.NR_LOOPS, 300) PARAM(Solver.GS_LOOPS, 1) PARAM(Solver.GS_THRESHOLD, 6) - PARAM(Solver.ITERATIVE, "W") - //PARAM(Solver.ITERATIVE, "MAT") + //PARAM(Solver.ITERATIVE, "W") + PARAM(Solver.ITERATIVE, "MAT") //PARAM(Solver.ITERATIVE, "GMRES") //PARAM(Solver.ITERATIVE, "SOR") PARAM(Solver.DYNAMIC_TS, 0) diff --git a/scripts/target/mame/nl.lua b/scripts/target/mame/nl.lua index bf0fd462beb..3409ce3eb44 100644 --- a/scripts/target/mame/nl.lua +++ b/scripts/target/mame/nl.lua @@ -17,7 +17,9 @@ -------------------------------------------------- CPUS["Z80"] = true ---CPUS["M6502"] = true +CPUS["M6800"] = true +CPUS["M6803"] = true +CPUS["M6809"] = true --CPUS["MCS48"] = true --CPUS["MCS51"] = true --CPUS["M6800"] = true @@ -33,9 +35,9 @@ CPUS["Z80"] = true --SOUNDS["SAMPLES"] = true SOUNDS["DAC"] = true ---SOUNDS["DISCRETE"] = true +SOUNDS["DISCRETE"] = true SOUNDS["AY8910"] = true ---SOUNDS["YM2151"] = true +SOUNDS["MSM5205"] = true --SOUNDS["ASTROCADE"] = true --SOUNDS["TMS5220"] = true --SOUNDS["OKIM6295"] = true @@ -84,6 +86,8 @@ function createProjects_mame_nl(_target, _subtarget) targetsubdir(_target .."_" .. _subtarget) kind (LIBTYPE) uuid (os.uuid("drv-mame-nl")) + addprojectflags() + precompiledheaders() includedirs { MAME_DIR .. "src/osd", @@ -109,6 +113,12 @@ files{ MAME_DIR .. "src/mame/drivers/popeye.cpp", MAME_DIR .. "src/mame/includes/popeye.h", MAME_DIR .. "src/mame/video/popeye.cpp", + + MAME_DIR .. "src/mame/drivers/m62.cpp", + MAME_DIR .. "src/mame/includes/m62.h", + MAME_DIR .. "src/mame/video/m62.cpp", + MAME_DIR .. "src/mame/audio/irem.cpp", + MAME_DIR .. "src/mame/audio/irem.h", } end diff --git a/src/lib/netlist/solver/nld_ms_direct.h b/src/lib/netlist/solver/nld_ms_direct.h index 0badcdce863..dbd8929a725 100644 --- a/src/lib/netlist/solver/nld_ms_direct.h +++ b/src/lib/netlist/solver/nld_ms_direct.h @@ -18,7 +18,12 @@ * going forward in case we implement cuda solvers in the future. */ #define NL_USE_DYNAMIC_ALLOCATION (0) +#define TEST_PARALLEL (0) +#if TEST_PARALLEL +#include +#include +#endif NETLIB_NAMESPACE_DEVICES_START() @@ -26,8 +31,92 @@ NETLIB_NAMESPACE_DEVICES_START() //#define nl_ext_double long double // slightly slower #define nl_ext_double nl_double +#if TEST_PARALLEL +#define MAXTHR 10 +static const int num_thr = 2; + +struct thr_intf +{ + virtual void do_work(const int id, void *param) = 0; +}; + +struct ti_t +{ + volatile std::atomic lo; + thr_intf *intf; + void *params; + int _block[29]; /* make it 256 bytes */ +}; + +static ti_t ti[MAXTHR]; +//static std::thread thr[MAXTHR]; +static std::thread thr[num_thr]; + +int thr_init = 0; + +static void thr_process_proc(int id) +{ + while (true) + { + while (ti[id].lo.load() == 0) + ; + if (ti[id].lo.load() == 2) + return; + ti[id].intf->do_work(id, ti[id].params); + ti[id].lo.store(0); + } +} + +static void thr_process(int id, thr_intf *intf, void *params) +{ + ti[id].intf = intf; + ti[id].params = params; + ti[id].lo.store(1); +} + +static void thr_wait() +{ + int c=1; + while (c > 0) + { + c=0; + for (int i=0; i +#if TEST_PARALLEL +class matrix_solver_direct_t: public matrix_solver_t, public thr_intf +#else class matrix_solver_direct_t: public matrix_solver_t +#endif { public: @@ -61,6 +150,13 @@ protected: virtual netlist_time compute_next_timestep() override; +#if TEST_PARALLEL + int x_i[10]; + int x_start[10]; + int x_stop[10]; + virtual void do_work(const int id, void *param) override; +#endif + #if (NL_USE_DYNAMIC_ALLOCATION) template inline nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r * m_pitch + c]; } @@ -80,11 +176,12 @@ protected: private: static const std::size_t m_pitch = (((_storage_N + 1) + 7) / 8) * 8; + //static const std::size_t m_pitch = (((_storage_N + 1) + 15) / 16) * 16; + //static const std::size_t m_pitch = (((_storage_N + 1) + 31) / 32) * 32; #if (NL_USE_DYNAMIC_ALLOCATION) ATTR_ALIGN nl_ext_double * RESTRICT m_A; #else ATTR_ALIGN nl_ext_double m_A[_storage_N][m_pitch]; - ATTR_ALIGN nl_ext_double m_B[_storage_N][m_pitch]; #endif //ATTR_ALIGN nl_ext_double m_RHSx[_storage_N]; @@ -107,6 +204,10 @@ matrix_solver_direct_t::~matrix_solver_direct_t() #if (NL_USE_DYNAMIC_ALLOCATION) pfree_array(m_A); #endif +#if TEST_PARALLEL + thr_dispose(); +#endif + } template @@ -399,6 +500,37 @@ void matrix_solver_direct_t::build_LE_RHS() } } +#if TEST_PARALLEL +template +void matrix_solver_direct_t::do_work(const int id, void *param) +{ + const int i = x_i[id]; + /* FIXME: Singular matrix? */ + const nl_double f = 1.0 / A(i,i); + const unsigned * RESTRICT const p = m_terms[i]->m_nzrd.data(); + const unsigned e = m_terms[i]->m_nzrd.size(); + //nl_double A_cache[128]; + //for (unsigned k = 0; k < e; k++) + // A_cache[k] = A(i,p[k]); + + /* Eliminate column i from row j */ + + const unsigned * RESTRICT const pb = m_terms[i]->m_nzbd.data(); + //const unsigned eb = m_terms[i]->m_nzbd.size(); + const unsigned sj = x_start[id]; + const unsigned se = x_stop[id]; + for (unsigned jb = sj; jb < se; jb++) + { + const unsigned j = pb[jb]; + const nl_double f1 = - A(j,i) * f; + for (unsigned k = 0; k < e; k++) + //A(j,p[k]) += A_cache[k] * f1; + A(j,p[k]) += A(i,p[k]) * f1; + //RHS(j) += RHS(i) * f1; + } +} +#endif + template void matrix_solver_direct_t::LE_solve() { @@ -406,7 +538,7 @@ void matrix_solver_direct_t::LE_solve() for (unsigned i = 0; i < kN; i++) { // FIXME: use a parameter to enable pivoting? m_pivot - if (m_params.m_pivot) + if (!TEST_PARALLEL && m_params.m_pivot) { /* Find the row with the largest first value */ unsigned maxrow = i; @@ -452,23 +584,62 @@ void matrix_solver_direct_t::LE_solve() } else { +#if TEST_PARALLEL + const unsigned eb = m_terms[i]->m_nzbd.size(); + if (eb > 16) + { + printf("here %d\n", eb); + unsigned chunks = (eb) / (num_thr + 1); + for (int p=0; p < num_thr + 1; p++) + { + x_i[p] = i; + x_start[p] = chunks * p; + x_stop[p] = std::min(chunks*(p+1), eb); + if (p 0) + { + x_i[0] = i; + x_start[0] = 0; + x_stop[0] = eb; + do_work(0, NULL); + } +#else +#if 0 /* FIXME: Singular matrix? */ const nl_double f = 1.0 / A(i,i); - const unsigned * RESTRICT const p = m_terms[i]->m_nzrd.data(); - const unsigned e = m_terms[i]->m_nzrd.size(); + const auto &nzrd = m_terms[i]->m_nzrd; + const auto &nzbd = m_terms[i]->m_nzbd; /* Eliminate column i from row j */ - const unsigned * RESTRICT const pb = m_terms[i]->m_nzbd.data(); - const unsigned eb = m_terms[i]->m_nzbd.size(); - for (unsigned jb = 0; jb < eb; jb++) + for (auto j : nzbd) { - const unsigned j = pb[jb]; const nl_double f1 = - A(j,i) * f; - for (unsigned k = 0; k < e; k++) - A(j,p[k]) += A(i,p[k]) * f1; + for (auto k : nzrd) + A(j,k) += A(i,k) * f1; //RHS(j) += RHS(i) * f1; } +#else + /* FIXME: Singular matrix? */ + const nl_double f = 1.0 / A(i,i); + const auto &nzrd = m_terms[i]->m_nzrd; + const auto &nzbd = m_terms[i]->m_nzbd; + + /* Eliminate column i from row j */ + + for (auto j : nzbd) + { + const nl_double f1 = - A(j,i) * f; + for (auto k : nzrd) + A(j,k) += A(i,k) * f1; + //RHS(j) += RHS(i) * f1; + } +#endif +#endif } } } @@ -591,6 +762,9 @@ matrix_solver_direct_t::matrix_solver_direct_t(const solver_par m_last_RHS[k] = 0.0; m_last_V[k] = 0.0; } +#if TEST_PARALLEL + thr_initialize(); +#endif } template @@ -608,6 +782,9 @@ matrix_solver_direct_t::matrix_solver_direct_t(const eSolverTyp m_last_RHS[k] = 0.0; m_last_V[k] = 0.0; } +#if TEST_PARALLEL + thr_initialize(); +#endif } NETLIB_NAMESPACE_DEVICES_END() diff --git a/src/lib/netlist/solver/nld_ms_w.h b/src/lib/netlist/solver/nld_ms_w.h index b3af357611d..8929edcc1e2 100644 --- a/src/lib/netlist/solver/nld_ms_w.h +++ b/src/lib/netlist/solver/nld_ms_w.h @@ -624,7 +624,16 @@ int matrix_solver_w_t::solve_non_dynamic(ATTR_UNUSED const bool } m_cnt++; - + for (unsigned i=0; i 1e-6) + printf("%s failed on row %d: %f RHS: %f\n", this->name().cstr(), i, std::fabs(tmp-RHS(i)), RHS(i)); + } if (newton_raphson) { nl_double err = delta(new_V); diff --git a/src/mame/nl.lst b/src/mame/nl.lst index 67723374e4b..0b36b3e4e6f 100644 --- a/src/mame/nl.lst +++ b/src/mame/nl.lst @@ -29,3 +29,28 @@ pong // (c) 1972 Atari pongd // (c) 1975 Atari pongf // (c) 1972 Atari breakout // (c) 1976 Atari + +@source:m62.cpp +battroad // (c) 1984 +horizon // (c) 1985 +kidniki // (c) 1986 + Data East USA license +kidnikiu // (c) 1986 + Data East USA license +kungfub // bootleg +kungfub2 // bootleg +kungfum // (c) 1984 +kungfumd // (c) 1984 + Data East license +ldrun // (c) 1984 licensed from Broderbund +ldrun2 // (c) 1984 licensed from Broderbund +ldrun3 // (c) 1985 licensed from Broderbund +ldrun3j // (c) 1985 licensed from Broderbund +ldrun4 // (c) 1986 licensed from Broderbund +ldruna // (c) 1984 licensed from Broderbund +lithero // bootleg +lotlot // (c) 1985 licensed from Tokuma Shoten +spartanx // (c) 1984 (Japan) +spelunk2 // (c) 1986 licensed from Broderbund +spelunkr // (c) 1985 licensed from Broderbund +spelunkrj // (c) 1985 licensed from Broderbund +yanchamr // (c) 1986 (Japan) +youjyudn // (c) 1986 (Japan) +