// license:GPL-2.0+ // copyright-holders:Couriersud /* * nld_ms_direct.h * * * Woodbury Solver * * Computes the updated solution of A given that the change in A is * * A <- A + (U x transpose(V)) U,V matrices * * The approach is describes in "Numerical Recipes in C", Second edition, Page 75ff * * Whilst the book proposes to invert the matrix R=(I+transpose(V)*Z) we define * * w = transpose(V)*y * a = R^-1 * w * * and consequently * * R * a = w * * And solve for a using Gaussian elimination. This is a lot faster. * * One fact omitted in the book is the fact that actually the matrix Z which contains * in it's columns the solutions of * * A * zk = uk * * for uk being unit vectors for full rank (max(k) == n) is identical to the * inverse of A. * * The approach performs relatively well for matrices up to n ~ 40 (kidniki using frontiers). * Kidniki without frontiers has n==88. Here, the average number of Newton-Raphson * loops increase to 20. It looks like that the approach for larger matrices * introduces numerical instability. */ #ifndef NLD_MS_W_H_ #define NLD_MS_W_H_ #include "nld_matrix_solver.h" #include "nld_solver.h" #include "plib/vector_ops.h" #include namespace netlist { namespace solver { template class matrix_solver_w_t: public matrix_solver_ext_t { friend class matrix_solver_t; public: using float_ext_type = FT; using float_type = FT; // FIXME: dirty hack to make this compile static constexpr const std::size_t storage_N = 100; matrix_solver_w_t(netlist_state_t &anetlist, const pstring &name, const analog_net_t::list_t &nets, const solver_parameters_t *params, const std::size_t size) : matrix_solver_ext_t(anetlist, name, nets, params, size) , m_cnt(0) { this->build_mat_ptr(m_A); } void reset() override { matrix_solver_t::reset(); } protected: unsigned vsolve_non_dynamic(const bool newton_raphson) override; unsigned solve_non_dynamic(const bool newton_raphson); void LE_invert(); template void LE_compute_x(T & x); template float_ext_type &A(const T1 &r, const T2 &c) { return m_A[r][c]; } template float_ext_type &W(const T1 &r, const T2 &c) { return m_W[r][c]; } /* access to Ainv for fixed columns over row, there store transposed */ template float_ext_type &Ainv(const T1 &r, const T2 &c) { return m_Ainv[c][r]; } template float_ext_type &RHS(const T1 &r) { return m_RHS[r]; } template float_ext_type &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; } private: template using array2D = std::array, N>; static constexpr std::size_t m_pitch = ((( storage_N) + 7) / 8) * 8; array2D m_A; array2D m_Ainv; array2D m_W; std::array m_RHS; // right hand side - contains currents array2D m_lA; /* temporary */ array2D H; std::array rows; array2D cols; std::array colcount; unsigned m_cnt; }; // ---------------------------------------------------------------------------------------- // matrix_solver_direct // ---------------------------------------------------------------------------------------- template void matrix_solver_w_t::LE_invert() { const std::size_t kN = this->size(); for (std::size_t i = 0; i < kN; i++) { for (std::size_t j = 0; j < kN; j++) { W(i,j) = lA(i,j) = A(i,j); Ainv(i,j) = plib::constants::zero(); } Ainv(i,i) = plib::constants::one(); } /* down */ for (std::size_t i = 0; i < kN; i++) { /* FIXME: Singular matrix? */ const float_type f = plib::reciprocal(W(i,i)); const auto * const p = this->m_terms[i].m_nzrd.data(); const size_t e = this->m_terms[i].m_nzrd.size(); /* Eliminate column i from row j */ const auto * const pb = this->m_terms[i].m_nzbd.data(); const size_t eb = this->m_terms[i].m_nzbd.size(); for (std::size_t jb = 0; jb < eb; jb++) { const auto j = pb[jb]; const float_type f1 = - W(j,i) * f; // FIXME: comparison to zero if (f1 != plib::constants::zero()) { for (std::size_t k = 0; k < e; k++) W(j,p[k]) += W(i,p[k]) * f1; for (std::size_t k = 0; k <= i; k ++) Ainv(j,k) += Ainv(i,k) * f1; } } } /* up */ for (std::size_t i = kN; i-- > 0; ) { /* FIXME: Singular matrix? */ const float_type f = plib::reciprocal(W(i,i)); for (std::size_t j = i; j-- > 0; ) { const float_type f1 = - W(j,i) * f; // FIXME: comparison to zero if (f1 != plib::constants::zero()) { for (std::size_t k = i; k < kN; k++) W(j,k) += W(i,k) * f1; for (std::size_t k = 0; k < kN; k++) Ainv(j,k) += Ainv(i,k) * f1; } } for (std::size_t k = 0; k < kN; k++) { Ainv(i,k) *= f; } } } template template void matrix_solver_w_t::LE_compute_x( T & x) { const std::size_t kN = this->size(); for (std::size_t i=0; i::zero(); for (std::size_t k=0; k unsigned matrix_solver_w_t::solve_non_dynamic(const bool newton_raphson) { const auto iN = this->size(); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) std::array t; // FIXME: convert to member // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) std::array w; if ((m_cnt % 50) == 0) { /* complete calculation */ this->LE_invert(); this->LE_compute_x(this->m_new_V); } else { /* Solve Ay = b for y */ this->LE_compute_x(this->m_new_V); /* determine changed rows */ unsigned rowcount=0; #define VT(r,c) (A(r,c) - lA(r,c)) for (unsigned row = 0; row < iN; row ++) { unsigned cc=0; auto &nz = this->m_terms[row].m_nz; for (auto & col : nz) { if (A(row,col) != lA(row,col)) cols[rowcount][cc++] = col; } if (cc > 0) { colcount[rowcount] = cc; rows[rowcount++] = row; } } if (rowcount > 0) { /* construct w = transform(V) * y * dim: rowcount x iN * */ for (unsigned i = 0; i < rowcount; i++) { const unsigned r = rows[i]; FT tmp = plib::constants::zero(); for (unsigned k = 0; k < iN; k++) tmp += VT(r,k) * this->m_new_V[k]; w[i] = tmp; } for (unsigned i = 0; i < rowcount; i++) for (unsigned k=0; k< rowcount; k++) H[i][k] = plib::constants::zero(); for (unsigned i = 0; i < rowcount; i++) H[i][i] = plib::constants::one(); /* Construct H = (I + VT*Z) */ for (unsigned i = 0; i < rowcount; i++) for (unsigned k=0; k< colcount[i]; k++) { const unsigned col = cols[i][k]; float_type f = VT(rows[i],col); // FIXME: comparison to zero if (f != plib::constants::zero()) for (unsigned j= 0; j < rowcount; j++) H[i][j] += f * Ainv(col,rows[j]); } /* Gaussian elimination of H */ for (unsigned i = 0; i < rowcount; i++) { // FIXME: comparison to zero if (H[i][i] == plib::constants::zero()) plib::perrlogger("{} H singular\n", this->name()); const float_type f = plib::reciprocal(H[i][i]); for (unsigned j = i+1; j < rowcount; j++) { const float_type f1 = - f * H[j][i]; // FIXME: comparison to zero if (f1 != plib::constants::zero()) { float_type *pj = &H[j][i+1]; const float_type *pi = &H[i][i+1]; for (unsigned k = 0; k < rowcount-i-1; k++) pj[k] += f1 * pi[k]; //H[j][k] += f1 * H[i][k]; w[j] += f1 * w[i]; } } } /* Back substitution */ //inv(H) w = t w = H t for (unsigned j = rowcount; j-- > 0; ) { float_type tmp = 0; const float_type *pj = &H[j][j+1]; const float_type *tj = &t[j+1]; for (unsigned k = 0; k < rowcount-j-1; k++) tmp += pj[k] * tj[k]; //tmp += H[j][k] * t[k]; t[j] = (w[j] - tmp) / H[j][j]; } /* x = y - Zt */ for (unsigned i=0; i::zero(); for (unsigned j=0; jm_new_V[i] -= tmp; } } } m_cnt++; if (false) for (unsigned i=0; i::zero(); for (unsigned j=0; jm_new_V[j]; } if (plib::abs(tmp-RHS(i)) > static_cast(1e-6)) plib::perrlogger("{} failed on row {}: {} RHS: {}\n", this->name(), i, plib::abs(tmp-RHS(i)), RHS(i)); } bool err(false); if (newton_raphson) err = this->check_err(); this->store(); return (err) ? 2 : 1; } template unsigned matrix_solver_w_t::vsolve_non_dynamic(const bool newton_raphson) { this->clear_square_mat(this->m_A); this->fill_matrix_and_rhs(); this->m_stat_calculations++; return this->solve_non_dynamic(newton_raphson); } } // namespace solver } // namespace netlist #endif /* NLD_MS_DIRECT_H_ */