Added a GMRES solver to netlist. The generalized minimal residual method

ist certainly more modern than Gaussian elimination and Gauss-Seidel. However, more the current maximum matrix (KidNiki, 89x89) a combination of Gauss-Seidel to solve for maximum one step to catch quasi-stable conditions and fall-back to optimized Gaussian elimination (for sparse matrix) outperforms GMRES by up to 100%. [Couriersud]
2025-07-05 18:08:04 +03:00 · 2015-06-12 00:29:26 +02:00 · 2015-06-12 00:29:26 +02:00 · 3010f89079
commit 3010f89079
parent 6fd6de50ff
3 changed files with 961 additions and 3 deletions
--- a/src/emu/netlist/analog/mgmres.cpp
+++ b/src/emu/netlist/analog/mgmres.cpp
@ -0,0 +1,911 @@
+# include <cstdlib>
+# include <iostream>
+# include <fstream>
+# include <iomanip>
+# include <cmath>
+# include <ctime>
+
+using namespace std;
+
+#include "mgmres.hpp"
+
+//****************************************************************************80
+// http://people.sc.fsu.edu/~jburkardt/cpp_src/mgmres/mgmres.html
+//****************************************************************************80
+
+void gmres_t::ax_cr(const int * RESTRICT ia, const int * RESTRICT ja, const double * RESTRICT a,
+		const double * RESTRICT x, double * RESTRICT w)
+
+//****************************************************************************80
+//
+//  Purpose:
+//
+//    AX_CR computes A*x for a matrix stored in sparse compressed row form.
+//
+//  Discussion:
+//
+//    The Sparse Compressed Row storage format is used.
+//
+//    The matrix A is assumed to be sparse.  To save on storage, only
+//    the nonzero entries of A are stored.  The vector JA stores the
+//    column index of the nonzero value.  The nonzero values are sorted
+//    by row, and the compressed row vector IA then has the property that
+//    the entries in A and JA that correspond to row I occur in indices
+//    IA[I] through IA[I+1]-1.
+//
+//    For this version of MGMRES, the row and column indices are assumed
+//    to use the C/C++ convention, in which indexing begins at 0.
+//
+//    If your index vectors IA and JA are set up so that indexing is based 
+//    at 1, then each use of those vectors should be shifted down by 1.
+//
+//  Licensing:
+//
+//    This code is distributed under the GNU LGPL license. 
+//
+//  Modified:
+//
+//    18 July 2007
+//
+//  Author:
+//
+//    Original C version by Lili Ju.
+//    C++ version by John Burkardt.
+//
+//  Reference:
+//
+//    Richard Barrett, Michael Berry, Tony Chan, James Demmel,
+//    June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo,
+//    Charles Romine, Henk van der Vorst,
+//    Templates for the Solution of Linear Systems:
+//    Building Blocks for Iterative Methods,
+//    SIAM, 1994,
+//    ISBN: 0898714710,
+//    LC: QA297.8.T45.
+//
+//    Tim Kelley,
+//    Iterative Methods for Linear and Nonlinear Equations,
+//    SIAM, 2004,
+//    ISBN: 0898713528,
+//    LC: QA297.8.K45.
+//
+//    Yousef Saad,
+//    Iterative Methods for Sparse Linear Systems,
+//    Second Edition,
+//    SIAM, 2003,
+//    ISBN: 0898715342,
+//    LC: QA188.S17.
+//
+//  Parameters:
+//
+//    Input, int N, the order of the system.
+//
+//    Input, int NZ_NUM, the number of nonzeros.
+//
+//    Input, int IA[N+1], JA[NZ_NUM], the row and column indices
+//    of the matrix values.  The row vector has been compressed.
+//
+//    Input, double A[NZ_NUM], the matrix values.
+//
+//    Input, double X[N], the vector to be multiplied by A.
+//
+//    Output, double W[N], the value of A*X.
+//
+{
+	const int n = m_n;
+	for ( int i = 0; i < n; i++ )
+	{
+		double tmp = 0.0;
+		int k1 = ia[i];
+		int k2 = ia[i+1];
+
+		for (int k = k1; k < k2; k++ )
+		{
+			tmp += a[k] * x[ja[k]];
+		}
+		w[i] = tmp;
+	}
+	return;
+}
+
+//****************************************************************************80
+
+void gmres_t::diagonal_pointer_cr (const int nz_num, const int ia[], const int ja[])
+
+//****************************************************************************80
+//
+//  Purpose:
+//
+//    DIAGONAL_POINTER_CR finds diagonal entries in a sparse compressed row matrix.
+//
+//  Discussion:
+//
+//    The matrix A is assumed to be stored in compressed row format.  Only
+//    the nonzero entries of A are stored.  The vector JA stores the
+//    column index of the nonzero value.  The nonzero values are sorted
+//    by row, and the compressed row vector IA then has the property that
+//    the entries in A and JA that correspond to row I occur in indices
+//    IA[I] through IA[I+1]-1.
+//
+//    The array UA can be used to locate the diagonal elements of the matrix.
+//
+//    It is assumed that every row of the matrix includes a diagonal element,
+//    and that the elements of each row have been ascending sorted.
+//
+//  Licensing:
+//
+//    This code is distributed under the GNU LGPL license. 
+//
+//  Modified:
+//
+//    18 July 2007
+//
+//  Author:
+//
+//    Original C version by Lili Ju.
+//    C++ version by John Burkardt.
+//
+//  Parameters:
+//
+//    Input, int N, the order of the system.
+//
+//    Input, int NZ_NUM, the number of nonzeros.
+//
+//    Input, int IA[N+1], JA[NZ_NUM], the row and column indices
+//    of the matrix values.  The row vector has been compressed.  On output,
+//    the order of the entries of JA may have changed because of the sorting.
+//
+//    Output, int UA[N], the index of the diagonal element of each row.
+//
+{
+	const int n = m_n;
+
+	for (int i = 0; i < n; i++)
+	{
+		m_ua[i] = -1;
+		const int j1 = ia[i];
+		const int j2 = ia[i + 1];
+
+		for (int j = j1; j < j2; j++)
+		{
+			if (ja[j] == i)
+			{
+				m_ua[i] = j;
+			}
+		}
+
+	}
+  return;
+}
+//****************************************************************************80
+
+void gmres_t::ilu_cr (const int nz_num, const int ia[], const int ja[], const double a[])
+
+//****************************************************************************80
+//
+//  Purpose:
+//
+//    ILU_CR computes the incomplete LU factorization of a matrix.
+//
+//  Discussion:
+//
+//    The matrix A is assumed to be stored in compressed row format.  Only
+//    the nonzero entries of A are stored.  The vector JA stores the
+//    column index of the nonzero value.  The nonzero values are sorted
+//    by row, and the compressed row vector IA then has the property that
+//    the entries in A and JA that correspond to row I occur in indices
+//    IA[I] through IA[I+1]-1.
+//
+//  Licensing:
+//
+//    This code is distributed under the GNU LGPL license. 
+//
+//  Modified:
+//
+//    25 July 2007
+//
+//  Author:
+//
+//    Original C version by Lili Ju.
+//    C++ version by John Burkardt.
+//
+//  Parameters:
+//
+//    Input, int N, the order of the system.
+//
+//    Input, int NZ_NUM, the number of nonzeros.
+//
+//    Input, int IA[N+1], JA[NZ_NUM], the row and column indices
+//    of the matrix values.  The row vector has been compressed.
+//
+//    Input, double A[NZ_NUM], the matrix values.
+//
+//    Input, int UA[N], the index of the diagonal element of each row.
+//
+//    Output, double L[NZ_NUM], the ILU factorization of A.
+//
+{
+	int *iw;
+	int i;
+	int j;
+	int jj;
+	int jrow;
+	int jw;
+	int k;
+	double tl;
+
+	const int n = m_n;
+
+	iw = new int[n];
+//
+//  Copy A.
+//
+	for (k = 0; k < nz_num; k++)
+	{
+		m_l[k] = a[k];
+	}
+
+	for (i = 0; i < n; i++)
+	{
+//
+//  IW points to the nonzero entries in row I.
+//
+		for (j = 0; j < n; j++)
+		{
+			iw[j] = -1;
+		}
+
+		for (k = ia[i]; k <= ia[i + 1] - 1; k++)
+		{
+			iw[ja[k]] = k;
+		}
+
+		j = ia[i];
+		do
+		{
+			jrow = ja[j];
+			if (i <= jrow)
+			{
+				break;
+			}
+			tl = m_l[j] * m_l[m_ua[jrow]];
+			m_l[j] = tl;
+			for (jj = m_ua[jrow] + 1; jj <= ia[jrow + 1] - 1; jj++)
+			{
+				jw = iw[ja[jj]];
+				if (jw != -1)
+				{
+					m_l[jw] = m_l[jw] - tl * m_l[jj];
+				}
+			}
+			j = j + 1;
+		} while (j <= ia[i + 1] - 1);
+
+		m_ua[i] = j;
+
+		if (jrow != i)
+		{
+			cout << "\n";
+			cout << "ILU_CR - Fatal error!\n";
+			cout << "  JROW != I\n";
+			cout << "  JROW = " << jrow << "\n";
+			cout << "  I    = " << i << "\n";
+			exit(1);
+		}
+
+		if (m_l[j] == 0.0)
+		{
+			cout << "\n";
+			cout << "ILU_CR - Fatal error!\n";
+			cout << "  Zero pivot on step I = " << i << "\n";
+			cout << "  L[" << j << "] = 0.0\n";
+			exit(1);
+		}
+
+		m_l[j] = 1.0 / m_l[j];
+	}
+
+	for (k = 0; k < n; k++)
+	{
+		m_l[m_ua[k]] = 1.0 / m_l[m_ua[k]];
+	}
+
+	delete[] iw;
+}
+//****************************************************************************80
+
+void gmres_t::lus_cr (const int * RESTRICT ia, const int * RESTRICT ja, double * RESTRICT r)
+
+//****************************************************************************80
+//
+//  Purpose:
+//
+//    LUS_CR applies the incomplete LU preconditioner.
+//
+//  Discussion:
+//
+//    The linear system M * Z = R is solved for Z.  M is the incomplete
+//    LU preconditioner matrix, and R is a vector supplied by the user.
+//    So essentially, we're solving L * U * Z = R.
+//
+//    The matrix A is assumed to be stored in compressed row format.  Only
+//    the nonzero entries of A are stored.  The vector JA stores the
+//    column index of the nonzero value.  The nonzero values are sorted
+//    by row, and the compressed row vector IA then has the property that
+//    the entries in A and JA that correspond to row I occur in indices
+//    IA[I] through IA[I+1]-1.
+//
+//  Licensing:
+//
+//    This code is distributed under the GNU LGPL license. 
+//
+//  Modified:
+//
+//    18 July 2007
+//
+//  Author:
+//
+//    Original C version by Lili Ju.
+//    C++ version by John Burkardt.
+//
+//  Parameters:
+//
+//    Input, int N, the order of the system.
+//
+//    Input, int NZ_NUM, the number of nonzeros.
+//
+//    Input, int IA[N+1], JA[NZ_NUM], the row and column indices
+//    of the matrix values.  The row vector has been compressed.
+//
+//    Input, double L[NZ_NUM], the matrix values.
+//
+//    Input, int UA[N], the index of the diagonal element of each row.
+//
+//    Input, double R[N], the right hand side.
+//
+//    Output, double Z[N], the solution of the system M * Z = R.
+//
+{
+	const int n = m_n;
+//
+//  Solve L * w = w where L is unit lower triangular.
+//
+	for (int i = 1; i < n; i++ )
+	{
+		double tmp = 0.0;
+		for (int j = ia[i]; j < m_ua[i]; j++ )
+		{
+			tmp +=  m_l[j] * r[ja[j]];
+		}
+		r[i] -= tmp;
+	}
+//
+//  Solve U * w = w, where U is upper triangular.
+//
+	for (int i = n - 1; 0 <= i; i-- )
+	{
+		double tmp = 0.0;
+		for (int j = m_ua[i] + 1; j < ia[i+1]; j++ )
+		{
+			tmp += m_l[j] * r[ja[j]];
+		}
+		r[i] = (r[i] - tmp) / m_l[m_ua[i]];
+	}
+
+	return;
+}
+//****************************************************************************80
+
+static inline void mult_givens ( const double c, const double s, const int k, double g[] )
+
+//****************************************************************************80
+//
+//  Purpose:
+//
+//    MULT_GIVENS applies a Givens rotation to two successive entries of a vector.
+//
+//  Licensing:
+//
+//    This code is distributed under the GNU LGPL license. 
+//
+//  Modified:
+//
+//    08 August 2006
+//
+//  Author:
+//
+//    Original C version by Lili Ju.
+//    C++ version by John Burkardt.
+//
+//  Reference:
+//
+//    Richard Barrett, Michael Berry, Tony Chan, James Demmel,
+//    June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo,
+//    Charles Romine, Henk van der Vorst,
+//    Templates for the Solution of Linear Systems:
+//    Building Blocks for Iterative Methods,
+//    SIAM, 1994,
+//    ISBN: 0898714710,
+//    LC: QA297.8.T45.
+//
+//    Tim Kelley,
+//    Iterative Methods for Linear and Nonlinear Equations,
+//    SIAM, 2004,
+//    ISBN: 0898713528,
+//    LC: QA297.8.K45.
+//
+//    Yousef Saad,
+//    Iterative Methods for Sparse Linear Systems,
+//    Second Edition,
+//    SIAM, 2003,
+//    ISBN: 0898715342,
+//    LC: QA188.S17.
+//
+//  Parameters:
+//
+//    Input, double C, S, the cosine and sine of a Givens
+//    rotation.
+//
+//    Input, int K, indicates the location of the first vector entry.
+//
+//    Input/output, double G[K+2], the vector to be modified.  On output,
+//    the Givens rotation has been applied to entries G(K) and G(K+1).
+//
+{
+
+  double g1;
+  double g2;
+
+  g1 = c * g[k] - s * g[k+1];
+  g2 = s * g[k] + c * g[k+1];
+
+  g[k]   = g1;
+  g[k+1] = g2;
+
+  return;
+}
+//****************************************************************************80
+
+
+gmres_t::gmres_t(const int n)
+: m_n(n)
+{
+	int mr=n; /* FIXME: maximum iterations locked in here */
+
+	m_c = new double[mr+1];
+	m_g = new double[mr+1];
+	m_h = new double *[mr];
+	for (int i = 0; i < mr; i++)
+		m_h[i] = new double[mr+1];
+	/* This is using too much memory, but we are interested in speed for now*/
+	m_l = new double[n*n]; //[ia[n]+1];
+	m_r = new double[n];
+	m_s = new double[mr+1];
+	m_ua = new int[n];
+	m_v = new double *[n];
+	for (int i = 0; i < n; i++)
+		m_v[i] = new double[mr+1];
+	m_y = new double[mr+1];
+}
+
+gmres_t::~gmres_t()
+{
+	  delete [] m_c;
+	  delete [] m_g;
+	  delete [] m_h;
+	  delete [] m_l;
+	  delete [] m_r;
+	  delete [] m_s;
+	  delete [] m_ua;
+	  delete [] m_v;
+	  delete [] m_y;
+}
+
+int gmres_t::pmgmres_ilu_cr (const int nz_num, int ia[], int ja[], double a[],
+  double x[], const double rhs[], const int itr_max, const int mr, const double tol_abs,
+  const double tol_rel )
+
+//****************************************************************************80
+//
+//  Purpose:
+//
+//    PMGMRES_ILU_CR applies the preconditioned restarted GMRES algorithm.
+//
+//  Discussion:
+//
+//    The matrix A is assumed to be stored in compressed row format.  Only
+//    the nonzero entries of A are stored.  The vector JA stores the
+//    column index of the nonzero value.  The nonzero values are sorted
+//    by row, and the compressed row vector IA then has the property that
+//    the entries in A and JA that correspond to row I occur in indices
+//    IA[I] through IA[I+1]-1.
+//
+//    This routine uses the incomplete LU decomposition for the
+//    preconditioning.  This preconditioner requires that the sparse
+//    matrix data structure supplies a storage position for each diagonal
+//    element of the matrix A, and that each diagonal element of the
+//    matrix A is not zero.
+//
+//    Thanks to Jesus Pueblas Sanchez-Guerra for supplying two
+//    corrections to the code on 31 May 2007.
+//
+//
+//    This implementation of the code stores the doubly-dimensioned arrays
+//    H and V as vectors.  However, it follows the C convention of storing
+//    them by rows, rather than my own preference for storing them by
+//    columns.   I may come back and change this some time.
+//
+//  Licensing:
+//
+//    This code is distributed under the GNU LGPL license. 
+//
+//  Modified:
+//
+//    26 July 2007
+//
+//  Author:
+//
+//    Original C version by Lili Ju.
+//    C++ version by John Burkardt.
+//
+//  Reference:
+//
+//    Richard Barrett, Michael Berry, Tony Chan, James Demmel,
+//    June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo,
+//    Charles Romine, Henk van der Vorst,
+//    Templates for the Solution of Linear Systems:
+//    Building Blocks for Iterative Methods,
+//    SIAM, 1994.
+//    ISBN: 0898714710,
+//    LC: QA297.8.T45.
+//
+//    Tim Kelley,
+//    Iterative Methods for Linear and Nonlinear Equations,
+//    SIAM, 2004,
+//    ISBN: 0898713528,
+//    LC: QA297.8.K45.
+//
+//    Yousef Saad,
+//    Iterative Methods for Sparse Linear Systems,
+//    Second Edition,
+//    SIAM, 2003,
+//    ISBN: 0898715342,
+//    LC: QA188.S17.
+//
+//  Parameters:
+//
+//    Input, int N, the order of the linear system.
+//
+//    Input, int NZ_NUM, the number of nonzero matrix values.
+//
+//    Input, int IA[N+1], JA[NZ_NUM], the row and column indices
+//    of the matrix values.  The row vector has been compressed.
+//
+//    Input, double A[NZ_NUM], the matrix values.
+//
+//    Input/output, double X[N]; on input, an approximation to
+//    the solution.  On output, an improved approximation.
+//
+//    Input, double RHS[N], the right hand side of the linear system.
+//
+//    Input, int ITR_MAX, the maximum number of (outer) iterations to take.
+//
+//    Input, int MR, the maximum number of (inner) iterations to take.
+//    MR must be less than N.
+//
+//    Input, double TOL_ABS, an absolute tolerance applied to the
+//    current residual.
+//
+//    Input, double TOL_REL, a relative tolerance comparing the
+//    current residual to the initial residual.
+//
+{
+	double av;
+	double delta = 1.0e-03;
+	double htmp;
+	int itr;
+	int itr_used = 0;
+	int k_copy = 0;
+	double mu;
+	double rho;
+	double rho_tol = 0;
+	const int verbose = 0;
+	const bool pre_ilu = true;
+
+	const int n = m_n;
+
+	rearrange_cr(nz_num, ia, ja, a);
+
+	if (pre_ilu)
+	{
+		diagonal_pointer_cr(nz_num, ia, ja);
+		ilu_cr(nz_num, ia, ja, a);
+	}
+
+	if (verbose)
+	{
+		cout << "\n";
+		cout << "PMGMRES_ILU_CR\n";
+		cout << "  Number of unknowns = " << n << "\n";
+	}
+
+	for (itr = 0; itr < itr_max; itr++)
+	{
+		ax_cr(ia, ja, a, x, m_r);
+
+		for (int i = 0; i < n; i++)
+			m_r[i] = rhs[i] - m_r[i];
+
+		if (pre_ilu)
+			lus_cr(ia, ja, m_r);
+
+		rho = sqrt(r8vec_dot2(m_r));
+
+		if (verbose)
+			cout << "  ITR = " << itr << "  Residual = " << rho << "\n";
+
+		if (itr == 0)
+			rho_tol = rho * tol_rel;
+
+		double rhoq = 1.0 / rho;
+
+		for (int i = 0; i < n; i++)
+			m_v[0][i] = m_r[i] * rhoq;
+
+		for (int i = 0; i < mr + 1; i++)
+			m_g[i] = 0.0;
+
+		m_g[0] = rho;
+
+		for (int i = 0; i < mr; i++)
+			for (int j = 0; j < mr + 1; j++)
+				m_h[i][j] = 0.0;
+
+		for (int k = 0; k < mr; k++)
+		{
+			k_copy = k;
+
+			ax_cr(ia, ja, a, m_v[k], m_v[k + 1]);
+
+			if (pre_ilu)
+				lus_cr(ia, ja, m_v[k + 1]);
+
+			av = sqrt(r8vec_dot2(m_v[k + 1]));
+
+			for (int j = 0; j <= k; j++)
+			{
+				m_h[j][k] = r8vec_dot(m_v[k + 1], m_v[j]);
+				for (int i = 0; i < n; i++)
+				{
+					m_v[k + 1][i] = m_v[k + 1][i]
+							- m_h[j][k] * m_v[j][i];
+				}
+			}
+			m_h[k + 1][k] = sqrt(r8vec_dot2(m_v[k + 1]));
+
+			if ((av + delta * m_h[k + 1][k]) == av)
+			{
+				for (int j = 0; j < k + 1; j++)
+				{
+					htmp = r8vec_dot(m_v[k + 1], m_v[j]);
+					m_h[j][k] = m_h[j][k] + htmp;
+					for (int i = 0; i < n; i++)
+					{
+						m_v[k + 1][i] = m_v[k + 1][i] - htmp * m_v[j][i];
+					}
+				}
+				m_h[k + 1][k] = sqrt(r8vec_dot2(m_v[k + 1]));
+			}
+
+			if (m_h[k + 1][k] != 0.0)
+			{
+				for (int i = 0; i < n; i++)
+				{
+					m_v[k + 1][i] = m_v[k + 1][i]
+							/ m_h[k + 1][k];
+				}
+			}
+
+			if (0 < k)
+			{
+				for (int i = 0; i < k + 2; i++)
+				{
+					m_y[i] = m_h[i][k];
+				}
+				for (int j = 0; j < k; j++)
+				{
+					mult_givens(m_c[j], m_s[j], j, m_y);
+				}
+				for (int i = 0; i < k + 2; i++)
+				{
+					m_h[i][k] = m_y[i];
+				}
+			}
+			mu = sqrt(
+					m_h[k][k] * m_h[k][k]
+							+ m_h[k + 1][k] * m_h[k + 1][k]);
+			m_c[k] = m_h[k][k] / mu;
+			m_s[k] = -m_h[k + 1][k] / mu;
+			m_h[k][k] = m_c[k] * m_h[k][k]
+					- m_s[k] * m_h[k + 1][k];
+			m_h[k + 1][k] = 0.0;
+			mult_givens(m_c[k], m_s[k], k, m_g);
+
+			rho = std::abs(m_g[k + 1]);
+
+			itr_used = itr_used + 1;
+
+			if (verbose)
+			{
+				cout << "  K   = " << k << "  Residual = " << rho << "\n";
+			}
+
+			if (rho <= rho_tol && rho <= tol_abs)
+			{
+				break;
+			}
+		}
+
+		m_y[k_copy] = m_g[k_copy] / m_h[k_copy][k_copy];
+		for (int i = k_copy - 1; 0 <= i; i--)
+		{
+			double tmp = m_g[i];
+			for (int j = i + 1; j < k_copy + 1; j++)
+			{
+				tmp -= m_h[i][j] * m_y[j];
+			}
+			m_y[i] = tmp / m_h[i][i];
+		}
+		double cerr = 0;
+		for (int i = 0; i < n; i++)
+		{
+			double tmp = 0.0;
+			for (int j = 0; j < k_copy + 1; j++)
+			{
+				tmp += m_v[j][i] * m_y[j];
+			}
+			cerr = std::max(std::abs(tmp), cerr);
+			x[i] += tmp;
+		}
+#if 0
+		if (cerr < 1e-8)
+			return 1; //break;
+#else
+		if (rho <= rho_tol && rho <= tol_abs)
+		{
+			break;
+		}
+#endif
+	}
+
+	if (verbose)
+	{
+		cout << "\n";
+		;
+		cout << "PMGMRES_ILU_CR:\n";
+		cout << "  Iterations = " << itr_used << "\n";
+		cout << "  Final residual = " << rho << "\n";
+	}
+
+	//if ( rho >= tol_abs )
+//	  printf("missed!\n");
+
+	return (itr_used - 1) * mr + k_copy;
+}
+//****************************************************************************80
+
+double gmres_t::r8vec_dot (const double a1[], const double a2[] )
+{
+	const int n = m_n;
+	double value = 0.0;
+
+	for ( int i = 0; i < n; i++ )
+		value = value + a1[i] * a2[i];
+	return value;
+}
+
+double gmres_t::r8vec_dot2 (const double a1[])
+{
+	const int n = m_n;
+	double value = 0.0;
+
+	for ( int i = 0; i < n; i++ )
+		value = value + a1[i] * a1[i];
+	return value;
+}
+
+
+//****************************************************************************80
+
+//****************************************************************************80
+
+void gmres_t::rearrange_cr (const int nz_num, int ia[], int ja[], double a[] )
+
+//****************************************************************************80
+//
+//  Purpose:
+//
+//    REARRANGE_CR sorts a sparse compressed row matrix.
+//
+//  Discussion:
+//
+//    This routine guarantees that the entries in the CR matrix
+//    are properly sorted.
+//
+//    After the sorting, the entries of the matrix are rearranged in such
+//    a way that the entries of each column are listed in ascending order
+//    of their column values.
+//
+//    The matrix A is assumed to be stored in compressed row format.  Only
+//    the nonzero entries of A are stored.  The vector JA stores the
+//    column index of the nonzero value.  The nonzero values are sorted
+//    by row, and the compressed row vector IA then has the property that
+//    the entries in A and JA that correspond to row I occur in indices
+//    IA[I] through IA[I+1]-1.
+//
+//  Licensing:
+//
+//    This code is distributed under the GNU LGPL license. 
+//
+//  Modified:
+//
+//    18 July 2007
+//
+//  Author:
+//
+//    Original C version by Lili Ju.
+//    C++ version by John Burkardt.
+//
+//  Parameters:
+//
+//    Input, int N, the order of the system.
+//
+//    Input, int NZ_NUM, the number of nonzeros.
+//
+//    Input, int IA[N+1], the compressed row index.
+//
+//    Input/output, int JA[NZ_NUM], the column indices.  On output,
+//    the order of the entries of JA may have changed because of the sorting.
+//
+//    Input/output, double A[NZ_NUM], the matrix values.  On output, the
+//    order of the entries may have changed because of the sorting.
+//
+{
+  double dtemp;
+  int i;
+  int is;
+  int itemp;
+  int j;
+  int j1;
+  int j2;
+  int k;
+
+  const int n = m_n;
+
+  for ( i = 0; i < n; i++ )
+  {
+    j1 = ia[i];
+    j2 = ia[i+1];
+    is = j2 - j1;
+
+    for ( k = 1; k < is; k++ ) 
+    {
+      for ( j = j1; j < j2 - k; j++ ) 
+      {
+        if ( ja[j+1] < ja[j] ) 
+        {
+          itemp = ja[j+1];
+          ja[j+1] =  ja[j];
+          ja[j] =  itemp;
+
+          dtemp = a[j+1];
+          a[j+1] =  a[j];
+          a[j] = dtemp;
+        }
+      }
+    }
+  }
+  return;
+}
+//****************************************************************************80
+
--- a/src/emu/netlist/analog/mgmres.hpp
+++ b/src/emu/netlist/analog/mgmres.hpp
@ -0,0 +1,43 @@
+
+#ifndef __MGMRES
+#define __MGMRES
+
+class gmres_t
+{
+public:
+	gmres_t(const int n);
+	~gmres_t();
+
+	int pmgmres_ilu_cr (const int nz_num, int ia[], int ja[], double a[],
+	  double x[], const double rhs[], const int itr_max, const int mr, const double tol_abs,
+	  const double tol_rel );
+
+private:
+
+	void diagonal_pointer_cr(const int nz_num, const int ia[], const int ja[]);
+	void rearrange_cr (const int nz_num, int ia[], int ja[], double a[] );
+	inline double r8vec_dot (const double a1[], const double a2[] );
+	inline double r8vec_dot2 (const double a1[]);
+
+	void ax_cr(const int * RESTRICT ia, const int * RESTRICT ja, const double * RESTRICT a,
+			const double * RESTRICT x, double * RESTRICT w);
+
+	void lus_cr (const int * RESTRICT ia, const int * RESTRICT ja, double * RESTRICT r);
+
+	void ilu_cr (const int nz_num, const int ia[], const int ja[], const double a[]);
+
+	const int m_n;
+
+
+	double * RESTRICT m_c;
+	double * RESTRICT m_g;
+	double ** RESTRICT m_h;
+	double * RESTRICT m_l;
+	double * RESTRICT m_r;
+	double * RESTRICT m_s;
+	double ** RESTRICT m_v;
+	int * RESTRICT m_ua;
+	double * RESTRICT m_y;
+};
+
+#endif
--- a/src/emu/netlist/analog/nld_solver.c
+++ b/src/emu/netlist/analog/nld_solver.c
@ -11,8 +11,8 @@

 #if 0
 #pragma GCC optimize "-ffast-math"
-#pragma GCC optimize "-ftree-parallelize-loops=4"
-//#pragma GCC optimize "-funroll-loops"
+//#pragma GCC optimize "-ftree-parallelize-loops=4"
+#pragma GCC optimize "-funroll-loops"
 #pragma GCC optimize "-funswitch-loops"
 #pragma GCC optimize "-fvariable-expansion-in-unroller"
 #pragma GCC optimize "-funsafe-loop-optimizations"
@ -32,6 +32,7 @@
 #include "nld_ms_direct2.h"
 #include "nld_ms_sor.h"
 #include "nld_ms_sor_mat.h"
+#include "nld_ms_gmres.h"
 #include "nld_twoterm.h"
 #include "../nl_lists.h"

@ -410,6 +411,7 @@ netlist_matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const int
 			else
 			{
 				typedef netlist_matrix_solver_SOR_t<m_N,_storage_N> solver_GS;
+				//typedef netlist_matrix_solver_GMRES_t<m_N,_storage_N> solver_GS;
 				return palloc(solver_GS, &m_params, size);
 			}
 		}
@ -423,7 +425,7 @@ netlist_matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const int

 ATTR_COLD void NETLIB_NAME(solver)::post_start()
 {
-	netlist_analog_net_t::list_t groups[100];
+	netlist_analog_net_t::list_t groups[256];
 	int cur_group = -1;
 	const int gs_threshold = m_gs_threshold.Value();
 	const bool use_specific = true;
@ -559,3 +561,5 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
 		}
 	}
 }
+
+#include "mgmres.cpp"