diff --git a/nl_examples/kidniki.c b/nl_examples/kidniki.c index 422477b94b3..6b2a5c66215 100644 --- a/nl_examples/kidniki.c +++ b/nl_examples/kidniki.c @@ -15,8 +15,8 @@ NETLIST_START(dummy) PARAM(Solver.GS_LOOPS, 1) PARAM(Solver.GS_THRESHOLD, 6) //PARAM(Solver.ITERATIVE, "W") - //PARAM(Solver.ITERATIVE, "MAT_CR") - PARAM(Solver.ITERATIVE, "MAT") + PARAM(Solver.ITERATIVE, "MAT_CR") + //PARAM(Solver.ITERATIVE, "MAT") //PARAM(Solver.ITERATIVE, "GMRES") //PARAM(Solver.ITERATIVE, "SOR") PARAM(Solver.DYNAMIC_TS, 0) diff --git a/scripts/src/netlist.lua b/scripts/src/netlist.lua index b2b9165d550..cb389db8d8d 100644 --- a/scripts/src/netlist.lua +++ b/scripts/src/netlist.lua @@ -42,6 +42,8 @@ project "netlist" MAME_DIR .. "src/lib/netlist/plib/pfmtlog.cpp", MAME_DIR .. "src/lib/netlist/plib/pfmtlog.h", MAME_DIR .. "src/lib/netlist/plib/plists.h", + MAME_DIR .. "src/lib/netlist/plib/pdynlib.cpp", + MAME_DIR .. "src/lib/netlist/plib/pdynlib.h", MAME_DIR .. "src/lib/netlist/plib/poptions.h", MAME_DIR .. "src/lib/netlist/plib/pparser.cpp", MAME_DIR .. "src/lib/netlist/plib/pparser.h", diff --git a/src/lib/netlist/nl_base.cpp b/src/lib/netlist/nl_base.cpp index 16bba23ed3c..c37217c6bbb 100644 --- a/src/lib/netlist/nl_base.cpp +++ b/src/lib/netlist/nl_base.cpp @@ -202,7 +202,8 @@ netlist_t::netlist_t(const pstring &aname) m_params(NULL), m_name(aname), m_setup(NULL), - m_log(this) + m_log(this), + m_lib(nullptr) { save_item(static_cast(m_queue), this, "m_queue"); save_item(m_time, this, "m_time"); @@ -221,6 +222,7 @@ netlist_t::~netlist_t() m_nets.clear(); m_devices.clear_and_free(); + pfree(m_lib); pstring::resetmem(); } @@ -231,6 +233,12 @@ ATTR_HOT nl_double netlist_t::gmin() const ATTR_COLD void netlist_t::start() { + /* load the library ... */ + + pstring libpath = nl_util::environment("NL_BOOSTLIB", nl_util::buildpath({".", "nlboost.so"})); + + m_lib = palloc(pdynlib(libpath)); + /* find the main clock and solver ... */ log().debug("Searching for mainclock and solver ...\n"); diff --git a/src/lib/netlist/nl_base.h b/src/lib/netlist/nl_base.h index b8239addcf6..55be15527de 100644 --- a/src/lib/netlist/nl_base.h +++ b/src/lib/netlist/nl_base.h @@ -162,6 +162,7 @@ #include "nl_util.h" #include "plib/pstate.h" #include "plib/pfmtlog.h" +#include "plib/pdynlib.h" // ---------------------------------------------------------------------------------------- // Type definitions @@ -1273,7 +1274,9 @@ namespace netlist virtual void reset(); - protected: + ATTR_COLD pdynlib &lib() { return *m_lib; } + +protected: #if (NL_KEEP_STATISTICS) // performance @@ -1298,6 +1301,7 @@ namespace netlist pstring m_name; setup_t *m_setup; plog_base m_log; + pdynlib *m_lib; // external lib needs to be loaded as long as netlist exists }; // ----------------------------------------------------------------------------- diff --git a/src/lib/netlist/nl_util.h b/src/lib/netlist/nl_util.h index ab09eac0408..76f49918c18 100644 --- a/src/lib/netlist/nl_util.h +++ b/src/lib/netlist/nl_util.h @@ -12,6 +12,7 @@ #include #include #include +#include #include "plib/pstring.h" #include "plib/plists.h" @@ -24,6 +25,22 @@ private: public: + static const pstring buildpath(std::initializer_list list ) + { + pstring ret = ""; + for( auto elem : list ) + { + if (ret == "") + ret = elem; + else +#ifdef _WIN32 + ret = ret + '\\' + elem; +#else + ret = ret + '/' + elem; +#endif + } + return ret; + } static const pstring environment(const pstring &var, const pstring &default_val = "") { diff --git a/src/lib/netlist/plib/plists.h b/src/lib/netlist/plib/plists.h index 43aa28a229a..b9a9a70e195 100644 --- a/src/lib/netlist/plib/plists.h +++ b/src/lib/netlist/plib/plists.h @@ -413,6 +413,7 @@ struct phash_functor m_hash = result; } friend unsigned operator%(const phash_functor &lhs, const unsigned &rhs) { return lhs.m_hash % rhs; } + unsigned operator()() { return m_hash; } bool operator==(const phash_functor &lhs) const { return (m_hash == lhs.m_hash); } private: unsigned m_hash; diff --git a/src/lib/netlist/prg/nltool.cpp b/src/lib/netlist/prg/nltool.cpp index 6d34683731e..fe4cca62572 100644 --- a/src/lib/netlist/prg/nltool.cpp +++ b/src/lib/netlist/prg/nltool.cpp @@ -36,63 +36,8 @@ inline osd_ticks_t osd_ticks_per_second() { return CLOCKS_PER_SEC; } osd_ticks_t osd_ticks(void) { return clock(); } -#else - #endif -/*************************************************************************** - * MAME COMPATIBILITY ... - * - * These are needed if we link without libutil - ***************************************************************************/ - -#if 0 -void ATTR_PRINTF(1,2) osd_printf_warning(const char *format, ...) -{ - va_list argptr; - - /* do the output */ - va_start(argptr, format); - vprintf(format, argptr); - va_end(argptr); -} - -void *malloc_file_line(size_t size, const char *file, int line) -{ - // allocate the memory and fail if we can't - void *ret = osd_malloc(size); - memset(ret, 0, size); - return ret; -} - -void *malloc_array_file_line(size_t size, const char *file, int line) -{ - // allocate the memory and fail if we can't - void *ret = osd_malloc_array(size); - memset(ret, 0, size); - return ret; -} - -void free_file_line( void *memory, const char *file, int line ) -{ - osd_free( memory ); -} - -void CLIB_DECL logerror(const char *format, ...) -{ - va_list arg; - va_start(arg, format); - vprintf(format, arg); - va_end(arg); -} - -void report_bad_cast(const std::type_info &src_type, const std::type_info &dst_type) -{ - printf("Error: bad downcast<> or device<>. Tried to convert a %s to a %s, which are incompatible.\n", - src_type.name(), dst_type.name()); - throw; -} -#endif class tool_options_t : public poptions { @@ -104,7 +49,7 @@ public: opt_logs("l", "logs", "", "colon separated list of terminals to log", this), opt_file("f", "file", "-", "file to process (default is stdin)", this), opt_type("y", "type", "spice", "spice:eagle", "type of file to be converted: spice,eagle", this), - opt_cmd ("c", "cmd", "run", "run|convert|listdevices", this), + opt_cmd ("c", "cmd", "run", "run|convert|listdevices|static", this), opt_inp( "i", "input", "", "input file to process (default is none)", this), opt_verb("v", "verbose", "be verbose - this produces lots of output", this), opt_quiet("q", "quiet", "be quiet - no warnings", this), @@ -209,7 +154,7 @@ void usage(tool_options_t &opts) { perr("{}", "Usage:\n" - " nltool -help\n" + " nltool --help\n" " nltool [options]\n" "\n" "Where:\n" @@ -319,6 +264,24 @@ static void run(tool_options_t &opts) pout("{1:f} seconds emulation took {2:f} real time ==> {3:5.2f}%\n", ttr, emutime, ttr/emutime*100.0); } +static void static_compile(tool_options_t &opts) +{ + netlist_tool_t nt("netlist"); + + nt.m_opts = &opts; + nt.init(); + + nt.log().verbose.set_enabled(false); + nt.log().warning.set_enabled(false); + + nt.read_netlist(opts.opt_file(), opts.opt_name()); + + nt.solver()->create_solver_code(pout_strm); + + nt.stop(); + +} + /*------------------------------------------------- listdevices - list all known devices -------------------------------------------------*/ @@ -384,6 +347,7 @@ static void listdevices() #include "corealloc.h" #endif +#if 0 static const char *pmf_verbose[] = { "NL_PMF_TYPE_VIRTUAL", @@ -391,6 +355,7 @@ static const char *pmf_verbose[] = "NL_PMF_TYPE_GNUC_PMF_CONV", "NL_PMF_TYPE_INTERNAL" }; +#endif int main(int argc, char *argv[]) { @@ -398,7 +363,7 @@ int main(int argc, char *argv[]) int ret; perr("{}", "WARNING: This is Work In Progress! - It may fail anytime\n"); - perr("Update dispatching using method {}\n", pmf_verbose[NL_PMF_TYPE]); + //perr("Update dispatching using method {}\n", pmf_verbose[NL_PMF_TYPE]); if ((ret = opts.parse(argc, argv)) != argc) { perr("Error parsing {}\n", argv[ret]); @@ -417,6 +382,8 @@ int main(int argc, char *argv[]) listdevices(); else if (cmd == "run") run(opts); + else if (cmd == "static") + static_compile(opts); else if (cmd == "convert") { pstring contents; diff --git a/src/lib/netlist/solver/nld_matrix_solver.h b/src/lib/netlist/solver/nld_matrix_solver.h index 80c932823e9..89dec8eee65 100644 --- a/src/lib/netlist/solver/nld_matrix_solver.h +++ b/src/lib/netlist/solver/nld_matrix_solver.h @@ -9,6 +9,7 @@ #define NLD_MATRIX_SOLVER_H_ #include "solver/nld_solver.h" +#include "plib/pstream.h" #include @@ -112,6 +113,11 @@ public: virtual void log_stats(); + virtual void create_solver_code(postream &strm) + { + strm.writeline(pfmt("/* {1} doesn't support static compile */")); + } + protected: ATTR_COLD void setup_base(analog_net_t::list_t &nets); diff --git a/src/lib/netlist/solver/nld_ms_direct.h b/src/lib/netlist/solver/nld_ms_direct.h index ee3f3cfecb9..444ddd85176 100644 --- a/src/lib/netlist/solver/nld_ms_direct.h +++ b/src/lib/netlist/solver/nld_ms_direct.h @@ -247,612 +247,6 @@ void matrix_solver_direct_t::do_work(const int id, void *param) template void matrix_solver_direct_t::LE_solve() { -#if 0 - // Static matrix compilation - const double fd0 = 1.0 / A(0,0); - const double f1 = -fd0 * A(23,0); - A(23,23) += f1 * A(0,23); - RHS(23) += f1 * RHS(0); - const double fd1 = 1.0 / A(1,1); - const double f2 = -fd1 * A(39,1); - A(39,39) += f2 * A(1,39); - RHS(39) += f2 * RHS(1); - const double fd2 = 1.0 / A(2,2); - const double f3 = -fd2 * A(31,2); - A(31,31) += f3 * A(2,31); - RHS(31) += f3 * RHS(2); - const double fd3 = 1.0 / A(3,3); - const double f4 = -fd3 * A(19,3); - A(19,19) += f4 * A(3,19); - RHS(19) += f4 * RHS(3); - const double fd4 = 1.0 / A(4,4); - const double f5 = -fd4 * A(29,4); - A(29,29) += f5 * A(4,29); - RHS(29) += f5 * RHS(4); - const double fd5 = 1.0 / A(5,5); - const double f6 = -fd5 * A(32,5); - A(32,32) += f6 * A(5,32); - RHS(32) += f6 * RHS(5); - const double fd6 = 1.0 / A(6,6); - const double f7 = -fd6 * A(69,6); - A(69,69) += f7 * A(6,69); - RHS(69) += f7 * RHS(6); - const double fd7 = 1.0 / A(7,7); - const double f8 = -fd7 * A(22,7); - A(22,22) += f8 * A(7,22); - RHS(22) += f8 * RHS(7); - const double fd8 = 1.0 / A(8,8); - const double f9 = -fd8 * A(28,8); - A(28,28) += f9 * A(8,28); - RHS(28) += f9 * RHS(8); - const double fd9 = 1.0 / A(9,9); - const double f10 = -fd9 * A(82,9); - A(82,82) += f10 * A(9,82); - RHS(82) += f10 * RHS(9); - const double fd10 = 1.0 / A(10,10); - const double f11 = -fd10 * A(82,10); - A(82,82) += f11 * A(10,82); - RHS(82) += f11 * RHS(10); - const double fd11 = 1.0 / A(11,11); - const double f12 = -fd11 * A(38,11); - A(38,38) += f12 * A(11,38); - RHS(38) += f12 * RHS(11); - const double fd12 = 1.0 / A(12,12); - const double f13 = -fd12 * A(43,12); - A(43,84) += f13 * A(12,84); - RHS(43) += f13 * RHS(12); - const double f14 = -fd12 * A(84,12); - A(84,84) += f14 * A(12,84); - RHS(84) += f14 * RHS(12); - const double fd13 = 1.0 / A(13,13); - const double f15 = -fd13 * A(48,13); - A(48,48) += f15 * A(13,48); - RHS(48) += f15 * RHS(13); - const double fd14 = 1.0 / A(14,14); - const double f16 = -fd14 * A(56,14); - A(56,56) += f16 * A(14,56); - RHS(56) += f16 * RHS(14); - const double fd15 = 1.0 / A(15,15); - const double f17 = -fd15 * A(60,15); - A(60,60) += f17 * A(15,60); - RHS(60) += f17 * RHS(15); - const double fd16 = 1.0 / A(16,16); - const double f18 = -fd16 * A(81,16); - A(81,81) += f18 * A(16,81); - A(81,85) = f18 * A(16,85); - RHS(81) += f18 * RHS(16); - const double f19 = -fd16 * A(85,16); - A(85,81) = f19 * A(16,81); - A(85,85) += f19 * A(16,85); - RHS(85) += f19 * RHS(16); - const double fd17 = 1.0 / A(17,17); - const double f20 = -fd17 * A(72,17); - A(72,72) += f20 * A(17,72); - A(72,85) = f20 * A(17,85); - RHS(72) += f20 * RHS(17); - const double f21 = -fd17 * A(85,17); - A(85,72) = f21 * A(17,72); - A(85,85) += f21 * A(17,85); - RHS(85) += f21 * RHS(17); - const double fd18 = 1.0 / A(18,18); - const double f22 = -fd18 * A(72,18); - A(72,72) += f22 * A(18,72); - A(72,78) += f22 * A(18,78); - RHS(72) += f22 * RHS(18); - const double f23 = -fd18 * A(78,18); - A(78,72) += f23 * A(18,72); - A(78,78) += f23 * A(18,78); - RHS(78) += f23 * RHS(18); - const double fd19 = 1.0 / A(19,19); - const double f24 = -fd19 * A(87,19); - A(87,87) += f24 * A(19,87); - RHS(87) += f24 * RHS(19); - const double fd20 = 1.0 / A(20,20); - const double f25 = -fd20 * A(71,20); - A(71,69) += f25 * A(20,69); - A(71,85) = f25 * A(20,85); - RHS(71) += f25 * RHS(20); - const double fd21 = 1.0 / A(21,21); - const double f26 = -fd21 * A(24,21); - A(24,63) = f26 * A(21,63); - A(24,74) += f26 * A(21,74); - RHS(24) += f26 * RHS(21); - const double f27 = -fd21 * A(63,21); - A(63,63) += f27 * A(21,63); - A(63,74) += f27 * A(21,74); - RHS(63) += f27 * RHS(21); - const double f28 = -fd21 * A(74,21); - A(74,63) += f28 * A(21,63); - A(74,74) += f28 * A(21,74); - RHS(74) += f28 * RHS(21); - const double fd22 = 1.0 / A(22,22); - const double f29 = -fd22 * A(63,22); - A(63,63) += f29 * A(22,63); - RHS(63) += f29 * RHS(22); - const double fd23 = 1.0 / A(23,23); - const double f30 = -fd23 * A(87,23); - A(87,87) += f30 * A(23,87); - RHS(87) += f30 * RHS(23); - const double fd24 = 1.0 / A(24,24); - const double f31 = -fd24 * A(74,24); - A(74,63) += f31 * A(24,63); - A(74,74) += f31 * A(24,74); - RHS(74) += f31 * RHS(24); - const double fd25 = 1.0 / A(25,25); - const double f32 = -fd25 * A(77,25); - A(77,26) += f32 * A(25,26); - A(77,77) += f32 * A(25,77); - RHS(77) += f32 * RHS(25); - const double fd26 = 1.0 / A(26,26); - const double f33 = -fd26 * A(62,26); - A(62,62) += f33 * A(26,62); - A(62,77) += f33 * A(26,77); - RHS(62) += f33 * RHS(26); - const double f34 = -fd26 * A(77,26); - A(77,62) += f34 * A(26,62); - A(77,77) += f34 * A(26,77); - RHS(77) += f34 * RHS(26); - const double fd27 = 1.0 / A(27,27); - const double f35 = -fd27 * A(62,27); - A(62,62) += f35 * A(27,62); - A(62,68) = f35 * A(27,68); - RHS(62) += f35 * RHS(27); - const double f36 = -fd27 * A(68,27); - A(68,62) = f36 * A(27,62); - A(68,68) += f36 * A(27,68); - RHS(68) += f36 * RHS(27); - const double fd28 = 1.0 / A(28,28); - const double f37 = -fd28 * A(30,28); - A(30,68) += f37 * A(28,68); - RHS(30) += f37 * RHS(28); - const double f38 = -fd28 * A(68,28); - A(68,68) += f38 * A(28,68); - RHS(68) += f38 * RHS(28); - const double fd29 = 1.0 / A(29,29); - const double f39 = -fd29 * A(87,29); - A(87,87) += f39 * A(29,87); - RHS(87) += f39 * RHS(29); - const double fd30 = 1.0 / A(30,30); - const double f40 = -fd30 * A(68,30); - A(68,68) += f40 * A(30,68); - RHS(68) += f40 * RHS(30); - const double fd31 = 1.0 / A(31,31); - const double f41 = -fd31 * A(87,31); - A(87,87) += f41 * A(31,87); - RHS(87) += f41 * RHS(31); - const double fd32 = 1.0 / A(32,32); - const double f42 = -fd32 * A(87,32); - A(87,87) += f42 * A(32,87); - RHS(87) += f42 * RHS(32); - const double fd33 = 1.0 / A(33,33); - const double f43 = -fd33 * A(79,33); - A(79,79) += f43 * A(33,79); - A(79,85) = f43 * A(33,85); - RHS(79) += f43 * RHS(33); - const double f44 = -fd33 * A(85,33); - A(85,79) = f44 * A(33,79); - A(85,85) += f44 * A(33,85); - RHS(85) += f44 * RHS(33); - const double fd34 = 1.0 / A(34,34); - const double f45 = -fd34 * A(50,34); - A(50,65) = f45 * A(34,65); - A(50,79) += f45 * A(34,79); - RHS(50) += f45 * RHS(34); - const double f46 = -fd34 * A(65,34); - A(65,65) += f46 * A(34,65); - A(65,79) += f46 * A(34,79); - RHS(65) += f46 * RHS(34); - const double f47 = -fd34 * A(79,34); - A(79,65) += f47 * A(34,65); - A(79,79) += f47 * A(34,79); - RHS(79) += f47 * RHS(34); - const double fd35 = 1.0 / A(35,35); - const double f48 = -fd35 * A(36,35); - A(36,36) += f48 * A(35,36); - A(36,66) = f48 * A(35,66); - RHS(36) += f48 * RHS(35); - const double f49 = -fd35 * A(66,35); - A(66,36) = f49 * A(35,36); - A(66,66) += f49 * A(35,66); - RHS(66) += f49 * RHS(35); - const double fd36 = 1.0 / A(36,36); - const double f50 = -fd36 * A(37,36); - A(37,37) += f50 * A(36,37); - A(37,66) = f50 * A(36,66); - RHS(37) += f50 * RHS(36); - const double f51 = -fd36 * A(66,36); - A(66,37) = f51 * A(36,37); - A(66,66) += f51 * A(36,66); - RHS(66) += f51 * RHS(36); - const double fd37 = 1.0 / A(37,37); - const double f52 = -fd37 * A(38,37); - A(38,38) += f52 * A(37,38); - A(38,66) = f52 * A(37,66); - RHS(38) += f52 * RHS(37); - const double f53 = -fd37 * A(66,37); - A(66,38) = f53 * A(37,38); - A(66,66) += f53 * A(37,66); - RHS(66) += f53 * RHS(37); - const double fd38 = 1.0 / A(38,38); - const double f54 = -fd38 * A(66,38); - A(66,66) += f54 * A(38,66); - RHS(66) += f54 * RHS(38); - const double fd39 = 1.0 / A(39,39); - const double f55 = -fd39 * A(87,39); - A(87,87) += f55 * A(39,87); - RHS(87) += f55 * RHS(39); - const double fd40 = 1.0 / A(40,40); - const double f56 = -fd40 * A(41,40); - A(41,41) += f56 * A(40,41); - A(41,86) = f56 * A(40,86); - RHS(41) += f56 * RHS(40); - const double f57 = -fd40 * A(86,40); - A(86,41) = f57 * A(40,41); - A(86,86) += f57 * A(40,86); - RHS(86) += f57 * RHS(40); - const double fd41 = 1.0 / A(41,41); - const double f58 = -fd41 * A(42,41); - A(42,42) += f58 * A(41,42); - A(42,86) = f58 * A(41,86); - RHS(42) += f58 * RHS(41); - const double f59 = -fd41 * A(86,41); - A(86,42) = f59 * A(41,42); - A(86,86) += f59 * A(41,86); - RHS(86) += f59 * RHS(41); - const double fd42 = 1.0 / A(42,42); - const double f60 = -fd42 * A(86,42); - A(86,43) = f60 * A(42,43); - A(86,86) += f60 * A(42,86); - RHS(86) += f60 * RHS(42); - const double fd43 = 1.0 / A(43,43); - const double f61 = -fd43 * A(86,43); - A(86,84) = f61 * A(43,84); - RHS(86) += f61 * RHS(43); - const double fd44 = 1.0 / A(44,44); - const double f62 = -fd44 * A(84,44); - A(84,84) += f62 * A(44,84); - A(84,87) = f62 * A(44,87); - RHS(84) += f62 * RHS(44); - const double f63 = -fd44 * A(87,44); - A(87,84) = f63 * A(44,84); - A(87,87) += f63 * A(44,87); - RHS(87) += f63 * RHS(44); - const double fd45 = 1.0 / A(45,45); - const double f64 = -fd45 * A(46,45); - A(46,46) += f64 * A(45,46); - A(46,67) = f64 * A(45,67); - RHS(46) += f64 * RHS(45); - const double f65 = -fd45 * A(67,45); - A(67,46) = f65 * A(45,46); - A(67,67) += f65 * A(45,67); - RHS(67) += f65 * RHS(45); - const double fd46 = 1.0 / A(46,46); - const double f66 = -fd46 * A(47,46); - A(47,47) += f66 * A(46,47); - A(47,67) = f66 * A(46,67); - RHS(47) += f66 * RHS(46); - const double f67 = -fd46 * A(67,46); - A(67,47) = f67 * A(46,47); - A(67,67) += f67 * A(46,67); - RHS(67) += f67 * RHS(46); - const double fd47 = 1.0 / A(47,47); - const double f68 = -fd47 * A(48,47); - A(48,48) += f68 * A(47,48); - A(48,67) = f68 * A(47,67); - RHS(48) += f68 * RHS(47); - const double f69 = -fd47 * A(67,47); - A(67,48) = f69 * A(47,48); - A(67,67) += f69 * A(47,67); - RHS(67) += f69 * RHS(47); - const double fd48 = 1.0 / A(48,48); - const double f70 = -fd48 * A(67,48); - A(67,67) += f70 * A(48,67); - RHS(67) += f70 * RHS(48); - const double fd49 = 1.0 / A(49,49); - const double f71 = -fd49 * A(80,49); - A(80,80) += f71 * A(49,80); - A(80,84) += f71 * A(49,84); - RHS(80) += f71 * RHS(49); - const double f72 = -fd49 * A(84,49); - A(84,80) += f72 * A(49,80); - A(84,84) += f72 * A(49,84); - RHS(84) += f72 * RHS(49); - const double fd50 = 1.0 / A(50,50); - const double f73 = -fd50 * A(79,50); - A(79,65) += f73 * A(50,65); - A(79,79) += f73 * A(50,79); - RHS(79) += f73 * RHS(50); - const double fd51 = 1.0 / A(51,51); - const double f74 = -fd51 * A(83,51); - A(83,83) += f74 * A(51,83); - A(83,85) = f74 * A(51,85); - RHS(83) += f74 * RHS(51); - const double f75 = -fd51 * A(85,51); - A(85,83) = f75 * A(51,83); - A(85,85) += f75 * A(51,85); - RHS(85) += f75 * RHS(51); - const double fd52 = 1.0 / A(52,52); - const double f76 = -fd52 * A(76,52); - A(76,76) += f76 * A(52,76); - A(76,83) += f76 * A(52,83); - RHS(76) += f76 * RHS(52); - const double f77 = -fd52 * A(83,52); - A(83,76) += f77 * A(52,76); - A(83,83) += f77 * A(52,83); - RHS(83) += f77 * RHS(52); - const double fd53 = 1.0 / A(53,53); - const double f78 = -fd53 * A(54,53); - A(54,54) += f78 * A(53,54); - A(54,64) = f78 * A(53,64); - RHS(54) += f78 * RHS(53); - const double f79 = -fd53 * A(64,53); - A(64,54) = f79 * A(53,54); - A(64,64) += f79 * A(53,64); - RHS(64) += f79 * RHS(53); - const double fd54 = 1.0 / A(54,54); - const double f80 = -fd54 * A(64,54); - A(64,64) += f80 * A(54,64); - A(64,83) = f80 * A(54,83); - RHS(64) += f80 * RHS(54); - const double f81 = -fd54 * A(83,54); - A(83,64) = f81 * A(54,64); - A(83,83) += f81 * A(54,83); - RHS(83) += f81 * RHS(54); - const double fd55 = 1.0 / A(55,55); - const double f82 = -fd55 * A(56,55); - A(56,56) += f82 * A(55,56); - A(56,64) = f82 * A(55,64); - RHS(56) += f82 * RHS(55); - const double f83 = -fd55 * A(64,55); - A(64,56) = f83 * A(55,56); - A(64,64) += f83 * A(55,64); - RHS(64) += f83 * RHS(55); - const double fd56 = 1.0 / A(56,56); - const double f84 = -fd56 * A(64,56); - A(64,64) += f84 * A(56,64); - RHS(64) += f84 * RHS(56); - const double fd57 = 1.0 / A(57,57); - const double f85 = -fd57 * A(78,57); - A(78,78) += f85 * A(57,78); - A(78,80) = f85 * A(57,80); - RHS(78) += f85 * RHS(57); - const double f86 = -fd57 * A(80,57); - A(80,78) = f86 * A(57,78); - A(80,80) += f86 * A(57,80); - RHS(80) += f86 * RHS(57); - const double fd58 = 1.0 / A(58,58); - const double f87 = -fd58 * A(75,58); - A(75,75) += f87 * A(58,75); - A(75,81) += f87 * A(58,81); - RHS(75) += f87 * RHS(58); - const double f88 = -fd58 * A(81,58); - A(81,75) += f88 * A(58,75); - A(81,81) += f88 * A(58,81); - RHS(81) += f88 * RHS(58); - const double fd59 = 1.0 / A(59,59); - const double f89 = -fd59 * A(60,59); - A(60,60) += f89 * A(59,60); - A(60,70) = f89 * A(59,70); - RHS(60) += f89 * RHS(59); - const double f90 = -fd59 * A(70,59); - A(70,60) = f90 * A(59,60); - A(70,70) += f90 * A(59,70); - RHS(70) += f90 * RHS(59); - const double fd60 = 1.0 / A(60,60); - const double f91 = -fd60 * A(70,60); - A(70,70) += f91 * A(60,70); - RHS(70) += f91 * RHS(60); - const double fd61 = 1.0 / A(61,61); - const double f92 = -fd61 * A(73,61); - A(73,73) += f92 * A(61,73); - A(73,75) = f92 * A(61,75); - RHS(73) += f92 * RHS(61); - const double f93 = -fd61 * A(75,61); - A(75,73) = f93 * A(61,73); - A(75,75) += f93 * A(61,75); - RHS(75) += f93 * RHS(61); - const double fd62 = 1.0 / A(62,62); - const double f94 = -fd62 * A(68,62); - A(68,68) += f94 * A(62,68); - A(68,77) = f94 * A(62,77); - RHS(68) += f94 * RHS(62); - const double f95 = -fd62 * A(77,62); - A(77,68) = f95 * A(62,68); - A(77,77) += f95 * A(62,77); - RHS(77) += f95 * RHS(62); - const double fd63 = 1.0 / A(63,63); - const double f96 = -fd63 * A(74,63); - A(74,74) += f96 * A(63,74); - RHS(74) += f96 * RHS(63); - const double fd64 = 1.0 / A(64,64); - const double f97 = -fd64 * A(76,64); - A(76,76) += f97 * A(64,76); - A(76,83) += f97 * A(64,83); - RHS(76) += f97 * RHS(64); - const double f98 = -fd64 * A(83,64); - A(83,76) += f98 * A(64,76); - A(83,83) += f98 * A(64,83); - RHS(83) += f98 * RHS(64); - const double fd65 = 1.0 / A(65,65); - const double f99 = -fd65 * A(79,65); - A(79,79) += f99 * A(65,79); - A(79,86) = f99 * A(65,86); - RHS(79) += f99 * RHS(65); - const double f100 = -fd65 * A(86,65); - A(86,79) = f100 * A(65,79); - A(86,86) += f100 * A(65,86); - RHS(86) += f100 * RHS(65); - const double fd66 = 1.0 / A(66,66); - const double f101 = -fd66 * A(86,66); - A(86,86) += f101 * A(66,86); - RHS(86) += f101 * RHS(66); - const double fd67 = 1.0 / A(67,67); - const double f102 = -fd67 * A(86,67); - A(86,86) += f102 * A(67,86); - RHS(86) += f102 * RHS(67); - const double fd68 = 1.0 / A(68,68); - const double f103 = -fd68 * A(77,68); - A(77,77) += f103 * A(68,77); - RHS(77) += f103 * RHS(68); - const double fd69 = 1.0 / A(69,69); - const double f104 = -fd69 * A(71,69); - A(71,71) += f104 * A(69,71); - A(71,85) += f104 * A(69,85); - RHS(71) += f104 * RHS(69); - const double f105 = -fd69 * A(85,69); - A(85,71) = f105 * A(69,71); - A(85,85) += f105 * A(69,85); - RHS(85) += f105 * RHS(69); - const double fd70 = 1.0 / A(70,70); - const double f106 = -fd70 * A(73,70); - A(73,73) += f106 * A(70,73); - A(73,78) = f106 * A(70,78); - RHS(73) += f106 * RHS(70); - const double f107 = -fd70 * A(78,70); - A(78,73) = f107 * A(70,73); - A(78,78) += f107 * A(70,78); - RHS(78) += f107 * RHS(70); - const double fd71 = 1.0 / A(71,71); - const double f108 = -fd71 * A(82,71); - A(82,82) += f108 * A(71,82); - A(82,85) = f108 * A(71,85); - RHS(82) += f108 * RHS(71); - const double f109 = -fd71 * A(85,71); - A(85,82) = f109 * A(71,82); - A(85,85) += f109 * A(71,85); - RHS(85) += f109 * RHS(71); - const double fd72 = 1.0 / A(72,72); - const double f110 = -fd72 * A(78,72); - A(78,78) += f110 * A(72,78); - A(78,85) = f110 * A(72,85); - RHS(78) += f110 * RHS(72); - const double f111 = -fd72 * A(85,72); - A(85,78) = f111 * A(72,78); - A(85,85) += f111 * A(72,85); - RHS(85) += f111 * RHS(72); - const double fd73 = 1.0 / A(73,73); - const double f112 = -fd73 * A(75,73); - A(75,75) += f112 * A(73,75); - A(75,78) = f112 * A(73,78); - A(75,81) += f112 * A(73,81); - RHS(75) += f112 * RHS(73); - const double f113 = -fd73 * A(78,73); - A(78,75) = f113 * A(73,75); - A(78,78) += f113 * A(73,78); - A(78,81) = f113 * A(73,81); - RHS(78) += f113 * RHS(73); - const double f114 = -fd73 * A(81,73); - A(81,75) += f114 * A(73,75); - A(81,78) = f114 * A(73,78); - A(81,81) += f114 * A(73,81); - RHS(81) += f114 * RHS(73); - const double fd74 = 1.0 / A(74,74); - const double f115 = -fd74 * A(82,74); - A(82,82) += f115 * A(74,82); - RHS(82) += f115 * RHS(74); - const double fd75 = 1.0 / A(75,75); - const double f116 = -fd75 * A(78,75); - A(78,78) += f116 * A(75,78); - A(78,81) += f116 * A(75,81); - RHS(78) += f116 * RHS(75); - const double f117 = -fd75 * A(81,75); - A(81,78) += f117 * A(75,78); - A(81,81) += f117 * A(75,81); - RHS(81) += f117 * RHS(75); - const double fd76 = 1.0 / A(76,76); - const double f118 = -fd76 * A(83,76); - A(83,83) += f118 * A(76,83); - RHS(83) += f118 * RHS(76); - const double fd77 = 1.0 / A(77,77); - const double f119 = -fd77 * A(82,77); - A(82,82) += f119 * A(77,82); - RHS(82) += f119 * RHS(77); - const double fd78 = 1.0 / A(78,78); - const double f120 = -fd78 * A(80,78); - A(80,80) += f120 * A(78,80); - A(80,81) = f120 * A(78,81); - A(80,85) = f120 * A(78,85); - RHS(80) += f120 * RHS(78); - const double f121 = -fd78 * A(81,78); - A(81,80) = f121 * A(78,80); - A(81,81) += f121 * A(78,81); - A(81,85) += f121 * A(78,85); - RHS(81) += f121 * RHS(78); - const double f122 = -fd78 * A(85,78); - A(85,80) = f122 * A(78,80); - A(85,81) += f122 * A(78,81); - A(85,85) += f122 * A(78,85); - RHS(85) += f122 * RHS(78); - const double fd79 = 1.0 / A(79,79); - const double f123 = -fd79 * A(85,79); - A(85,85) += f123 * A(79,85); - A(85,86) = f123 * A(79,86); - RHS(85) += f123 * RHS(79); - const double f124 = -fd79 * A(86,79); - A(86,85) = f124 * A(79,85); - A(86,86) += f124 * A(79,86); - RHS(86) += f124 * RHS(79); - const double fd80 = 1.0 / A(80,80); - const double f125 = -fd80 * A(81,80); - A(81,81) += f125 * A(80,81); - A(81,84) = f125 * A(80,84); - A(81,85) += f125 * A(80,85); - RHS(81) += f125 * RHS(80); - const double f126 = -fd80 * A(84,80); - A(84,81) = f126 * A(80,81); - A(84,84) += f126 * A(80,84); - A(84,85) = f126 * A(80,85); - RHS(84) += f126 * RHS(80); - const double f127 = -fd80 * A(85,80); - A(85,81) += f127 * A(80,81); - A(85,84) = f127 * A(80,84); - A(85,85) += f127 * A(80,85); - RHS(85) += f127 * RHS(80); - const double fd81 = 1.0 / A(81,81); - const double f128 = -fd81 * A(84,81); - A(84,84) += f128 * A(81,84); - A(84,85) += f128 * A(81,85); - RHS(84) += f128 * RHS(81); - const double f129 = -fd81 * A(85,81); - A(85,84) += f129 * A(81,84); - A(85,85) += f129 * A(81,85); - RHS(85) += f129 * RHS(81); - const double fd82 = 1.0 / A(82,82); - const double f130 = -fd82 * A(85,82); - A(85,85) += f130 * A(82,85); - RHS(85) += f130 * RHS(82); - const double fd83 = 1.0 / A(83,83); - const double f131 = -fd83 * A(85,83); - A(85,85) += f131 * A(83,85); - RHS(85) += f131 * RHS(83); - const double fd84 = 1.0 / A(84,84); - const double f132 = -fd84 * A(85,84); - A(85,85) += f132 * A(84,85); - A(85,87) = f132 * A(84,87); - RHS(85) += f132 * RHS(84); - const double f133 = -fd84 * A(86,84); - A(86,85) += f133 * A(84,85); - A(86,87) = f133 * A(84,87); - RHS(86) += f133 * RHS(84); - const double f134 = -fd84 * A(87,84); - A(87,85) = f134 * A(84,85); - A(87,87) += f134 * A(84,87); - RHS(87) += f134 * RHS(84); - const double fd85 = 1.0 / A(85,85); - const double f135 = -fd85 * A(86,85); - A(86,86) += f135 * A(85,86); - A(86,87) += f135 * A(85,87); - RHS(86) += f135 * RHS(85); - const double f136 = -fd85 * A(87,85); - A(87,86) = f136 * A(85,86); - A(87,87) += f136 * A(85,87); - RHS(87) += f136 * RHS(85); - const double fd86 = 1.0 / A(86,86); - const double f137 = -fd86 * A(87,86); - A(87,87) += f137 * A(86,87); - RHS(87) += f137 * RHS(86); - return; -#endif const unsigned kN = N(); for (unsigned i = 0; i < kN; i++) { diff --git a/src/lib/netlist/solver/nld_ms_gcr.h b/src/lib/netlist/solver/nld_ms_gcr.h index 9aecb3ba8fe..4dc6ff6015c 100644 --- a/src/lib/netlist/solver/nld_ms_gcr.h +++ b/src/lib/netlist/solver/nld_ms_gcr.h @@ -18,18 +18,22 @@ #include "solver/nld_ms_direct.h" #include "solver/nld_solver.h" #include "solver/vector_base.h" +#include "plib/pdynlib.h" +#include "plib/pstream.h" #define NL_USE_SSE 0 NETLIB_NAMESPACE_DEVICES_START() template -class matrix_solver_GCR_t: public matrix_solver_direct_t +class matrix_solver_GCR_t: public matrix_solver_t { public: matrix_solver_GCR_t(const solver_parameters_t *params, int size) - : matrix_solver_direct_t(matrix_solver_t::ASCENDING, params, size) + : matrix_solver_t(matrix_solver_t::ASCENDING, params) + , m_dim(size) + , m_proc(nullptr) { } @@ -37,15 +41,35 @@ public: { } + inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; } + virtual void vsetup(analog_net_t::list_t &nets) override; virtual int vsolve_non_dynamic(const bool newton_raphson) override; + virtual void create_solver_code(postream &strm) override; + private: + void csc_private(postream &strm); + + typedef void extsolver(double * RESTRICT m_A, double * RESTRICT RHS); + + pstring static_compile_name() + { + postringstream t; + csc_private(t); + phash_functor h(t.str()); + + return pfmt("nl_gcr_{1:x}_{2}")(h())(mat.nz_num); + } + + unsigned m_dim; pvector_t m_term_cr[_storage_N]; mat_cr_t<_storage_N> mat; nl_double m_A[_storage_N * _storage_N]; + extsolver *m_proc; + }; // ---------------------------------------------------------------------------------------- @@ -55,7 +79,7 @@ private: template void matrix_solver_GCR_t::vsetup(analog_net_t::list_t &nets) { - matrix_solver_direct_t::vsetup(nets); + setup_base(nets); unsigned nz = 0; const unsigned iN = this->N(); @@ -123,22 +147,93 @@ void matrix_solver_GCR_t::vsetup(analog_net_t::list_t &nets) /* build pointers into the compressed row format matrix for each terminal */ for (unsigned j=0; j< this->m_terms[k]->m_railstart;j++) { - for (unsigned i = mat.ia[k]; im_terms[k]->net_other()[j] == (int) mat.ja[i]) + int other = this->m_terms[k]->net_other()[j]; + for (unsigned i = mat.ia[k]; i < nz; i++) + if (other == (int) mat.ja[i]) { m_term_cr[k].push_back(i); break; } - nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart); } + nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart); } mat.ia[iN] = nz; mat.nz_num = nz; this->log().verbose("Ops: {1} Occupancy ratio: {2}\n", ops, (double) nz / double (iN * iN)); + + // FIXME: Move me + + if (netlist().lib().isLoaded()) + { + pstring symname = static_compile_name(); + m_proc = this->netlist().lib().template getsym(symname); + if (m_proc != NULL) + this->log().verbose("External static solver {1} found ...", symname); + else + this->log().verbose("External static solver {1} not found ...", symname); + } + } +template +void matrix_solver_GCR_t::csc_private(postream &strm) +{ + const unsigned iN = N(); + for (unsigned i = 0; i < iN - 1; i++) + { + const auto &nzbd = this->m_terms[i]->m_nzbd; + + if (nzbd.size() > 0) + { + unsigned pi = mat.diag[i]; + + //const nl_double f = 1.0 / m_A[pi++]; + strm.writeline(pfmt("const double f{1} = 1.0 / m_A[{2}];")(i)(pi)); + pi++; + const unsigned piie = mat.ia[i+1]; + + for (auto & j : nzbd) + { + // proceed to column i + unsigned pj = mat.ia[j]; + + while (mat.ja[pj] < i) + pj++; + + //const nl_double f1 = - m_A[pj++] * f; + strm.writeline(pfmt("\tconst double f{1}_{2} = -f{3} * m_A[{4}];")(i)(j)(i)(pj)); + pj++; + + // subtract row i from j */ + for (unsigned pii = pi; pii +void matrix_solver_GCR_t::create_solver_code(postream &strm) +{ + //const unsigned iN = N(); + + strm.writeline(pfmt("extern \"C\" void {1}(double * __restrict m_A, double * __restrict RHS)")(static_compile_name())); + strm.writeline("{"); + csc_private(strm); + strm.writeline("}"); +} + + template int matrix_solver_GCR_t::vsolve_non_dynamic(const bool newton_raphson) { @@ -152,17 +247,18 @@ int matrix_solver_GCR_t::vsolve_non_dynamic(const bool newton_r for (unsigned k = 0; k < iN; k++) { + terms_t *t = this->m_terms[k]; nl_double gtot_t = 0.0; nl_double RHS_t = 0.0; - const unsigned term_count = this->m_terms[k]->count(); - const unsigned railstart = this->m_terms[k]->m_railstart; - const nl_double * const RESTRICT gt = this->m_terms[k]->gt(); - const nl_double * const RESTRICT go = this->m_terms[k]->go(); - const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr(); - const nl_double * const * RESTRICT other_cur_analog = this->m_terms[k]->other_curanalog(); + const unsigned term_count = t->count(); + const unsigned railstart = t->m_railstart; + const nl_double * const RESTRICT gt = t->gt(); + const nl_double * const RESTRICT go = t->go(); + const nl_double * const RESTRICT Idr = t->Idr(); + const nl_double * const * RESTRICT other_cur_analog = t->other_curanalog(); -#if (NL_USE_SSE) +#if (0 ||NL_USE_SSE) __m128d mg = _mm_set_pd(0.0, 0.0); __m128d mr = _mm_set_pd(0.0, 0.0); unsigned i = 0; @@ -203,35 +299,43 @@ int matrix_solver_GCR_t::vsolve_non_dynamic(const bool newton_r /* now solve it */ - for (unsigned i = 0; i < iN - 1; i++) + if (m_proc != nullptr) { - const auto &nzbd = this->m_terms[i]->m_nzbd; - - if (nzbd.size() > 0) + //static_solver(m_A, RHS); + m_proc(m_A, RHS); + } + else + { + for (unsigned i = 0; i < iN - 1; i++) { - unsigned pi = mat.diag[i]; - const nl_double f = 1.0 / m_A[pi++]; - const unsigned piie = mat.ia[i+1]; + const auto &nzbd = this->m_terms[i]->m_nzbd; - for (auto & j : nzbd) + if (nzbd.size() > 0) { - // proceed to column i - //__builtin_prefetch(&m_A[mat.diag[j+1]], 1); - unsigned pj = mat.ia[j]; + unsigned pi = mat.diag[i]; + const nl_double f = 1.0 / m_A[pi++]; + const unsigned piie = mat.ia[i+1]; - while (mat.ja[pj] < i) - pj++; - - const nl_double f1 = - m_A[pj++] * f; - - // subtract row i from j */ - for (unsigned pii = pi; pii public: matrix_solver_SOR_t(const solver_parameters_t *params, int size) - : matrix_solver_direct_t(matrix_solver_t::DESCENDING, params, size) + : matrix_solver_direct_t(matrix_solver_t::ASCENDING, params, size) , m_lp_fact(0) { } diff --git a/src/lib/netlist/solver/nld_solver.cpp b/src/lib/netlist/solver/nld_solver.cpp index da04a62eb8a..f9b64c9a93d 100644 --- a/src/lib/netlist/solver/nld_solver.cpp +++ b/src/lib/netlist/solver/nld_solver.cpp @@ -921,4 +921,11 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start() } } +void NETLIB_NAME(solver)::create_solver_code(postream &strm) +{ + for (auto & s : m_mat_solvers) + s->create_solver_code(strm); +} + + NETLIB_NAMESPACE_DEVICES_END() diff --git a/src/lib/netlist/solver/nld_solver.h b/src/lib/netlist/solver/nld_solver.h index 6d08e0bbb73..7d11086edac 100644 --- a/src/lib/netlist/solver/nld_solver.h +++ b/src/lib/netlist/solver/nld_solver.h @@ -10,6 +10,7 @@ #include "nl_setup.h" #include "nl_base.h" +#include "plib/pstream.h" //#define ATTR_ALIGNED(N) __attribute__((aligned(N))) #define ATTR_ALIGNED(N) ATTR_ALIGN @@ -63,6 +64,8 @@ public: inline nl_double gmin() { return m_gmin.Value(); } + void create_solver_code(postream &strm); + protected: void update() override; void start() override; diff --git a/src/mame/audio/nl_kidniki.cpp b/src/mame/audio/nl_kidniki.cpp index 31d31776669..c3003100a02 100644 --- a/src/mame/audio/nl_kidniki.cpp +++ b/src/mame/audio/nl_kidniki.cpp @@ -308,7 +308,7 @@ NETLIST_END() NETLIST_START(kidniki) -#if (USE_FRONTIERS) +#if (1 || USE_FRONTIERS) SOLVER(Solver, 18000) PARAM(Solver.ACCURACY, 1e-8) PARAM(Solver.NR_LOOPS, 300)