Added a GMRES solver to netlist. The generalized minimal residual method

ist certainly more modern than Gaussian elimination and Gauss-Seidel. However, more the current maximum matrix (KidNiki, 89x89) a combination of Gauss-Seidel to solve for maximum one step to catch quasi-stable conditions and fall-back to optimized Gaussian elimination (for sparse matrix) outperforms GMRES by up to 100%. [Couriersud]
2025-10-08 09:30:17 +03:00 · 2015-06-12 00:29:26 +02:00 · 2015-06-12 00:29:26 +02:00 · 3010f89079
commit 3010f89079
parent 6fd6de50ff
3 changed files with 961 additions and 3 deletions
--- a/src/emu/netlist/analog/mgmres.cpp
+++ b/src/emu/netlist/analog/mgmres.cpp
@ -0,0 +1,911 @@
 # include <cstdlib>
 # include <iostream>
 # include <fstream>
 # include <iomanip>
 # include <cmath>
 # include <ctime>
 using namespace std;
 #include "mgmres.hpp"
 //****************************************************************************80
 // http://people.sc.fsu.edu/~jburkardt/cpp_src/mgmres/mgmres.html
 //****************************************************************************80
 void gmres_t::ax_cr(const int * RESTRICT ia, const int * RESTRICT ja, const double * RESTRICT a,
 		const double * RESTRICT x, double * RESTRICT w)
 //****************************************************************************80
 //
 //  Purpose:
 //
 //    AX_CR computes A*x for a matrix stored in sparse compressed row form.
 //
 //  Discussion:
 //
 //    The Sparse Compressed Row storage format is used.
 //
 //    The matrix A is assumed to be sparse.  To save on storage, only
 //    the nonzero entries of A are stored.  The vector JA stores the
 //    column index of the nonzero value.  The nonzero values are sorted
 //    by row, and the compressed row vector IA then has the property that
 //    the entries in A and JA that correspond to row I occur in indices
 //    IA[I] through IA[I+1]-1.
 //
 //    For this version of MGMRES, the row and column indices are assumed
 //    to use the C/C++ convention, in which indexing begins at 0.
 //
 //    If your index vectors IA and JA are set up so that indexing is based 
 //    at 1, then each use of those vectors should be shifted down by 1.
 //
 //  Licensing:
 //
 //    This code is distributed under the GNU LGPL license. 
 //
 //  Modified:
 //
 //    18 July 2007
 //
 //  Author:
 //
 //    Original C version by Lili Ju.
 //    C++ version by John Burkardt.
 //
 //  Reference:
 //
 //    Richard Barrett, Michael Berry, Tony Chan, James Demmel,
 //    June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo,
 //    Charles Romine, Henk van der Vorst,
 //    Templates for the Solution of Linear Systems:
 //    Building Blocks for Iterative Methods,
 //    SIAM, 1994,
 //    ISBN: 0898714710,
 //    LC: QA297.8.T45.
 //
 //    Tim Kelley,
 //    Iterative Methods for Linear and Nonlinear Equations,
 //    SIAM, 2004,
 //    ISBN: 0898713528,
 //    LC: QA297.8.K45.
 //
 //    Yousef Saad,
 //    Iterative Methods for Sparse Linear Systems,
 //    Second Edition,
 //    SIAM, 2003,
 //    ISBN: 0898715342,
 //    LC: QA188.S17.
 //
 //  Parameters:
 //
 //    Input, int N, the order of the system.
 //
 //    Input, int NZ_NUM, the number of nonzeros.
 //
 //    Input, int IA[N+1], JA[NZ_NUM], the row and column indices
 //    of the matrix values.  The row vector has been compressed.
 //
 //    Input, double A[NZ_NUM], the matrix values.
 //
 //    Input, double X[N], the vector to be multiplied by A.
 //
 //    Output, double W[N], the value of A*X.
 //
 {
 	const int n = m_n;
 	for ( int i = 0; i < n; i++ )
 	{
 		double tmp = 0.0;
 		int k1 = ia[i];
 		int k2 = ia[i+1];
 		for (int k = k1; k < k2; k++ )
 		{
 			tmp += a[k] * x[ja[k]];
 		}
 		w[i] = tmp;
 	}
 	return;
 }
 //****************************************************************************80
 void gmres_t::diagonal_pointer_cr (const int nz_num, const int ia[], const int ja[])
 //****************************************************************************80
 //
 //  Purpose:
 //
 //    DIAGONAL_POINTER_CR finds diagonal entries in a sparse compressed row matrix.
 //
 //  Discussion:
 //
 //    The matrix A is assumed to be stored in compressed row format.  Only
 //    the nonzero entries of A are stored.  The vector JA stores the
 //    column index of the nonzero value.  The nonzero values are sorted
 //    by row, and the compressed row vector IA then has the property that
 //    the entries in A and JA that correspond to row I occur in indices
 //    IA[I] through IA[I+1]-1.
 //
 //    The array UA can be used to locate the diagonal elements of the matrix.
 //
 //    It is assumed that every row of the matrix includes a diagonal element,
 //    and that the elements of each row have been ascending sorted.
 //
 //  Licensing:
 //
 //    This code is distributed under the GNU LGPL license. 
 //
 //  Modified:
 //
 //    18 July 2007
 //
 //  Author:
 //
 //    Original C version by Lili Ju.
 //    C++ version by John Burkardt.
 //
 //  Parameters:
 //
 //    Input, int N, the order of the system.
 //
 //    Input, int NZ_NUM, the number of nonzeros.
 //
 //    Input, int IA[N+1], JA[NZ_NUM], the row and column indices
 //    of the matrix values.  The row vector has been compressed.  On output,
 //    the order of the entries of JA may have changed because of the sorting.
 //
 //    Output, int UA[N], the index of the diagonal element of each row.
 //
 {
 	const int n = m_n;
 	for (int i = 0; i < n; i++)
 	{
 		m_ua[i] = -1;
 		const int j1 = ia[i];
 		const int j2 = ia[i + 1];
 		for (int j = j1; j < j2; j++)
 		{
 			if (ja[j] == i)
 			{
 				m_ua[i] = j;
 			}
 		}
 	}
  return;
 }
 //****************************************************************************80
 void gmres_t::ilu_cr (const int nz_num, const int ia[], const int ja[], const double a[])
 //****************************************************************************80
 //
 //  Purpose:
 //
 //    ILU_CR computes the incomplete LU factorization of a matrix.
 //
 //  Discussion:
 //
 //    The matrix A is assumed to be stored in compressed row format.  Only
 //    the nonzero entries of A are stored.  The vector JA stores the
 //    column index of the nonzero value.  The nonzero values are sorted
 //    by row, and the compressed row vector IA then has the property that
 //    the entries in A and JA that correspond to row I occur in indices
 //    IA[I] through IA[I+1]-1.
 //
 //  Licensing:
 //
 //    This code is distributed under the GNU LGPL license. 
 //
 //  Modified:
 //
 //    25 July 2007
 //
 //  Author:
 //
 //    Original C version by Lili Ju.
 //    C++ version by John Burkardt.
 //
 //  Parameters:
 //
 //    Input, int N, the order of the system.
 //
 //    Input, int NZ_NUM, the number of nonzeros.
 //
 //    Input, int IA[N+1], JA[NZ_NUM], the row and column indices
 //    of the matrix values.  The row vector has been compressed.
 //
 //    Input, double A[NZ_NUM], the matrix values.
 //
 //    Input, int UA[N], the index of the diagonal element of each row.
 //
 //    Output, double L[NZ_NUM], the ILU factorization of A.
 //
 {
 	int *iw;
 	int i;
 	int j;
 	int jj;
 	int jrow;
 	int jw;
 	int k;
 	double tl;
 	const int n = m_n;
 	iw = new int[n];
 //
 //  Copy A.
 //
 	for (k = 0; k < nz_num; k++)
 	{
 		m_l[k] = a[k];
 	}
 	for (i = 0; i < n; i++)
 	{
 //
 //  IW points to the nonzero entries in row I.
 //
 		for (j = 0; j < n; j++)
 		{
 			iw[j] = -1;
 		}
 		for (k = ia[i]; k <= ia[i + 1] - 1; k++)
 		{
 			iw[ja[k]] = k;
 		}
 		j = ia[i];
 		do
 		{
 			jrow = ja[j];
 			if (i <= jrow)
 			{
 				break;
 			}
 			tl = m_l[j] * m_l[m_ua[jrow]];
 			m_l[j] = tl;
 			for (jj = m_ua[jrow] + 1; jj <= ia[jrow + 1] - 1; jj++)
 			{
 				jw = iw[ja[jj]];
 				if (jw != -1)
 				{
 					m_l[jw] = m_l[jw] - tl * m_l[jj];
 				}
 			}
 			j = j + 1;
 		} while (j <= ia[i + 1] - 1);
 		m_ua[i] = j;
 		if (jrow != i)
 		{
 			cout << "\n";
 			cout << "ILU_CR - Fatal error!\n";
 			cout << "  JROW != I\n";
 			cout << "  JROW = " << jrow << "\n";
 			cout << "  I    = " << i << "\n";
 			exit(1);
 		}
 		if (m_l[j] == 0.0)
 		{
 			cout << "\n";
 			cout << "ILU_CR - Fatal error!\n";
 			cout << "  Zero pivot on step I = " << i << "\n";
 			cout << "  L[" << j << "] = 0.0\n";
 			exit(1);
 		}
 		m_l[j] = 1.0 / m_l[j];
 	}
 	for (k = 0; k < n; k++)
 	{
 		m_l[m_ua[k]] = 1.0 / m_l[m_ua[k]];
 	}
 	delete[] iw;
 }
 //****************************************************************************80
 void gmres_t::lus_cr (const int * RESTRICT ia, const int * RESTRICT ja, double * RESTRICT r)
 //****************************************************************************80
 //
 //  Purpose:
 //
 //    LUS_CR applies the incomplete LU preconditioner.
 //
 //  Discussion:
 //
 //    The linear system M * Z = R is solved for Z.  M is the incomplete
 //    LU preconditioner matrix, and R is a vector supplied by the user.
 //    So essentially, we're solving L * U * Z = R.
 //
 //    The matrix A is assumed to be stored in compressed row format.  Only
 //    the nonzero entries of A are stored.  The vector JA stores the
 //    column index of the nonzero value.  The nonzero values are sorted
 //    by row, and the compressed row vector IA then has the property that
 //    the entries in A and JA that correspond to row I occur in indices
 //    IA[I] through IA[I+1]-1.
 //
 //  Licensing:
 //
 //    This code is distributed under the GNU LGPL license. 
 //
 //  Modified:
 //
 //    18 July 2007
 //
 //  Author:
 //
 //    Original C version by Lili Ju.
 //    C++ version by John Burkardt.
 //
 //  Parameters:
 //
 //    Input, int N, the order of the system.
 //
 //    Input, int NZ_NUM, the number of nonzeros.
 //
 //    Input, int IA[N+1], JA[NZ_NUM], the row and column indices
 //    of the matrix values.  The row vector has been compressed.
 //
 //    Input, double L[NZ_NUM], the matrix values.
 //
 //    Input, int UA[N], the index of the diagonal element of each row.
 //
 //    Input, double R[N], the right hand side.
 //
 //    Output, double Z[N], the solution of the system M * Z = R.
 //
 {
 	const int n = m_n;
 //
 //  Solve L * w = w where L is unit lower triangular.
 //
 	for (int i = 1; i < n; i++ )
 	{
 		double tmp = 0.0;
 		for (int j = ia[i]; j < m_ua[i]; j++ )
 		{
 			tmp +=  m_l[j] * r[ja[j]];
 		}
 		r[i] -= tmp;
 	}
 //
 //  Solve U * w = w, where U is upper triangular.
 //
 	for (int i = n - 1; 0 <= i; i-- )
 	{
 		double tmp = 0.0;
 		for (int j = m_ua[i] + 1; j < ia[i+1]; j++ )
 		{
 			tmp += m_l[j] * r[ja[j]];
 		}
 		r[i] = (r[i] - tmp) / m_l[m_ua[i]];
 	}
 	return;
 }
 //****************************************************************************80
 static inline void mult_givens ( const double c, const double s, const int k, double g[] )
 //****************************************************************************80
 //
 //  Purpose:
 //
 //    MULT_GIVENS applies a Givens rotation to two successive entries of a vector.
 //
 //  Licensing:
 //
 //    This code is distributed under the GNU LGPL license. 
 //
 //  Modified:
 //
 //    08 August 2006
 //
 //  Author:
 //
 //    Original C version by Lili Ju.
 //    C++ version by John Burkardt.
 //
 //  Reference:
 //
 //    Richard Barrett, Michael Berry, Tony Chan, James Demmel,
 //    June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo,
 //    Charles Romine, Henk van der Vorst,
 //    Templates for the Solution of Linear Systems:
 //    Building Blocks for Iterative Methods,
 //    SIAM, 1994,
 //    ISBN: 0898714710,
 //    LC: QA297.8.T45.
 //
 //    Tim Kelley,
 //    Iterative Methods for Linear and Nonlinear Equations,
 //    SIAM, 2004,
 //    ISBN: 0898713528,
 //    LC: QA297.8.K45.
 //
 //    Yousef Saad,
 //    Iterative Methods for Sparse Linear Systems,
 //    Second Edition,
 //    SIAM, 2003,
 //    ISBN: 0898715342,
 //    LC: QA188.S17.
 //
 //  Parameters:
 //
 //    Input, double C, S, the cosine and sine of a Givens
 //    rotation.
 //
 //    Input, int K, indicates the location of the first vector entry.
 //
 //    Input/output, double G[K+2], the vector to be modified.  On output,
 //    the Givens rotation has been applied to entries G(K) and G(K+1).
 //
 {
  double g1;
  double g2;
  g1 = c * g[k] - s * g[k+1];
  g2 = s * g[k] + c * g[k+1];
  g[k]   = g1;
  g[k+1] = g2;
  return;
 }
 //****************************************************************************80
 gmres_t::gmres_t(const int n)
 : m_n(n)
 {
 	int mr=n; /* FIXME: maximum iterations locked in here */
 	m_c = new double[mr+1];
 	m_g = new double[mr+1];
 	m_h = new double *[mr];
 	for (int i = 0; i < mr; i++)
 		m_h[i] = new double[mr+1];
 	/* This is using too much memory, but we are interested in speed for now*/
 	m_l = new double[n*n]; //[ia[n]+1];
 	m_r = new double[n];
 	m_s = new double[mr+1];
 	m_ua = new int[n];
 	m_v = new double *[n];
 	for (int i = 0; i < n; i++)
 		m_v[i] = new double[mr+1];
 	m_y = new double[mr+1];
 }
 gmres_t::~gmres_t()
 {
 	  delete [] m_c;
 	  delete [] m_g;
 	  delete [] m_h;
 	  delete [] m_l;
 	  delete [] m_r;
 	  delete [] m_s;
 	  delete [] m_ua;
 	  delete [] m_v;
 	  delete [] m_y;
 }
 int gmres_t::pmgmres_ilu_cr (const int nz_num, int ia[], int ja[], double a[],
  double x[], const double rhs[], const int itr_max, const int mr, const double tol_abs,
  const double tol_rel )
 //****************************************************************************80
 //
 //  Purpose:
 //
 //    PMGMRES_ILU_CR applies the preconditioned restarted GMRES algorithm.
 //
 //  Discussion:
 //
 //    The matrix A is assumed to be stored in compressed row format.  Only
 //    the nonzero entries of A are stored.  The vector JA stores the
 //    column index of the nonzero value.  The nonzero values are sorted
 //    by row, and the compressed row vector IA then has the property that
 //    the entries in A and JA that correspond to row I occur in indices
 //    IA[I] through IA[I+1]-1.
 //
 //    This routine uses the incomplete LU decomposition for the
 //    preconditioning.  This preconditioner requires that the sparse
 //    matrix data structure supplies a storage position for each diagonal
 //    element of the matrix A, and that each diagonal element of the
 //    matrix A is not zero.
 //
 //    Thanks to Jesus Pueblas Sanchez-Guerra for supplying two
 //    corrections to the code on 31 May 2007.
 //
 //
 //    This implementation of the code stores the doubly-dimensioned arrays
 //    H and V as vectors.  However, it follows the C convention of storing
 //    them by rows, rather than my own preference for storing them by
 //    columns.   I may come back and change this some time.
 //
 //  Licensing:
 //
 //    This code is distributed under the GNU LGPL license. 
 //
 //  Modified:
 //
 //    26 July 2007
 //
 //  Author:
 //
 //    Original C version by Lili Ju.
 //    C++ version by John Burkardt.
 //
 //  Reference:
 //
 //    Richard Barrett, Michael Berry, Tony Chan, James Demmel,
 //    June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo,
 //    Charles Romine, Henk van der Vorst,
 //    Templates for the Solution of Linear Systems:
 //    Building Blocks for Iterative Methods,
 //    SIAM, 1994.
 //    ISBN: 0898714710,
 //    LC: QA297.8.T45.
 //
 //    Tim Kelley,
 //    Iterative Methods for Linear and Nonlinear Equations,
 //    SIAM, 2004,
 //    ISBN: 0898713528,
 //    LC: QA297.8.K45.
 //
 //    Yousef Saad,
 //    Iterative Methods for Sparse Linear Systems,
 //    Second Edition,
 //    SIAM, 2003,
 //    ISBN: 0898715342,
 //    LC: QA188.S17.
 //
 //  Parameters:
 //
 //    Input, int N, the order of the linear system.
 //
 //    Input, int NZ_NUM, the number of nonzero matrix values.
 //
 //    Input, int IA[N+1], JA[NZ_NUM], the row and column indices
 //    of the matrix values.  The row vector has been compressed.
 //
 //    Input, double A[NZ_NUM], the matrix values.
 //
 //    Input/output, double X[N]; on input, an approximation to
 //    the solution.  On output, an improved approximation.
 //
 //    Input, double RHS[N], the right hand side of the linear system.
 //
 //    Input, int ITR_MAX, the maximum number of (outer) iterations to take.
 //
 //    Input, int MR, the maximum number of (inner) iterations to take.
 //    MR must be less than N.
 //
 //    Input, double TOL_ABS, an absolute tolerance applied to the
 //    current residual.
 //
 //    Input, double TOL_REL, a relative tolerance comparing the
 //    current residual to the initial residual.
 //
 {
 	double av;
 	double delta = 1.0e-03;
 	double htmp;
 	int itr;
 	int itr_used = 0;
 	int k_copy = 0;
 	double mu;
 	double rho;
 	double rho_tol = 0;
 	const int verbose = 0;
 	const bool pre_ilu = true;
 	const int n = m_n;
 	rearrange_cr(nz_num, ia, ja, a);
 	if (pre_ilu)
 	{
 		diagonal_pointer_cr(nz_num, ia, ja);
 		ilu_cr(nz_num, ia, ja, a);
 	}
 	if (verbose)
 	{
 		cout << "\n";
 		cout << "PMGMRES_ILU_CR\n";
 		cout << "  Number of unknowns = " << n << "\n";
 	}
 	for (itr = 0; itr < itr_max; itr++)
 	{
 		ax_cr(ia, ja, a, x, m_r);
 		for (int i = 0; i < n; i++)
 			m_r[i] = rhs[i] - m_r[i];
 		if (pre_ilu)
 			lus_cr(ia, ja, m_r);
 		rho = sqrt(r8vec_dot2(m_r));
 		if (verbose)
 			cout << "  ITR = " << itr << "  Residual = " << rho << "\n";
 		if (itr == 0)
 			rho_tol = rho * tol_rel;
 		double rhoq = 1.0 / rho;
 		for (int i = 0; i < n; i++)
 			m_v[0][i] = m_r[i] * rhoq;
 		for (int i = 0; i < mr + 1; i++)
 			m_g[i] = 0.0;
 		m_g[0] = rho;
 		for (int i = 0; i < mr; i++)
 			for (int j = 0; j < mr + 1; j++)
 				m_h[i][j] = 0.0;
 		for (int k = 0; k < mr; k++)
 		{
 			k_copy = k;
 			ax_cr(ia, ja, a, m_v[k], m_v[k + 1]);
 			if (pre_ilu)
 				lus_cr(ia, ja, m_v[k + 1]);
 			av = sqrt(r8vec_dot2(m_v[k + 1]));
 			for (int j = 0; j <= k; j++)
 			{
 				m_h[j][k] = r8vec_dot(m_v[k + 1], m_v[j]);
 				for (int i = 0; i < n; i++)
 				{
 					m_v[k + 1][i] = m_v[k + 1][i]
 							- m_h[j][k] * m_v[j][i];
 				}
 			}
 			m_h[k + 1][k] = sqrt(r8vec_dot2(m_v[k + 1]));
 			if ((av + delta * m_h[k + 1][k]) == av)
 			{
 				for (int j = 0; j < k + 1; j++)
 				{
 					htmp = r8vec_dot(m_v[k + 1], m_v[j]);
 					m_h[j][k] = m_h[j][k] + htmp;
 					for (int i = 0; i < n; i++)
 					{
 						m_v[k + 1][i] = m_v[k + 1][i] - htmp * m_v[j][i];
 					}
 				}
 				m_h[k + 1][k] = sqrt(r8vec_dot2(m_v[k + 1]));
 			}
 			if (m_h[k + 1][k] != 0.0)
 			{
 				for (int i = 0; i < n; i++)
 				{
 					m_v[k + 1][i] = m_v[k + 1][i]
 							/ m_h[k + 1][k];
 				}
 			}
 			if (0 < k)
 			{
 				for (int i = 0; i < k + 2; i++)
 				{
 					m_y[i] = m_h[i][k];
 				}
 				for (int j = 0; j < k; j++)
 				{
 					mult_givens(m_c[j], m_s[j], j, m_y);
 				}
 				for (int i = 0; i < k + 2; i++)
 				{
 					m_h[i][k] = m_y[i];
 				}
 			}
 			mu = sqrt(
 					m_h[k][k] * m_h[k][k]
 							+ m_h[k + 1][k] * m_h[k + 1][k]);
 			m_c[k] = m_h[k][k] / mu;
 			m_s[k] = -m_h[k + 1][k] / mu;
 			m_h[k][k] = m_c[k] * m_h[k][k]
 					- m_s[k] * m_h[k + 1][k];
 			m_h[k + 1][k] = 0.0;
 			mult_givens(m_c[k], m_s[k], k, m_g);
 			rho = std::abs(m_g[k + 1]);
 			itr_used = itr_used + 1;
 			if (verbose)
 			{
 				cout << "  K   = " << k << "  Residual = " << rho << "\n";
 			}
 			if (rho <= rho_tol && rho <= tol_abs)
 			{
 				break;
 			}
 		}
 		m_y[k_copy] = m_g[k_copy] / m_h[k_copy][k_copy];
 		for (int i = k_copy - 1; 0 <= i; i--)
 		{
 			double tmp = m_g[i];
 			for (int j = i + 1; j < k_copy + 1; j++)
 			{
 				tmp -= m_h[i][j] * m_y[j];
 			}
 			m_y[i] = tmp / m_h[i][i];
 		}
 		double cerr = 0;
 		for (int i = 0; i < n; i++)
 		{
 			double tmp = 0.0;
 			for (int j = 0; j < k_copy + 1; j++)
 			{
 				tmp += m_v[j][i] * m_y[j];
 			}
 			cerr = std::max(std::abs(tmp), cerr);
 			x[i] += tmp;
 		}
 #if 0
 		if (cerr < 1e-8)
 			return 1; //break;
 #else
 		if (rho <= rho_tol && rho <= tol_abs)
 		{
 			break;
 		}
 #endif
 	}
 	if (verbose)
 	{
 		cout << "\n";
 		;
 		cout << "PMGMRES_ILU_CR:\n";
 		cout << "  Iterations = " << itr_used << "\n";
 		cout << "  Final residual = " << rho << "\n";
 	}
 	//if ( rho >= tol_abs )
 //	  printf("missed!\n");
 	return (itr_used - 1) * mr + k_copy;
 }
 //****************************************************************************80
 double gmres_t::r8vec_dot (const double a1[], const double a2[] )
 {
 	const int n = m_n;
 	double value = 0.0;
 	for ( int i = 0; i < n; i++ )
 		value = value + a1[i] * a2[i];
 	return value;
 }
 double gmres_t::r8vec_dot2 (const double a1[])
 {
 	const int n = m_n;
 	double value = 0.0;
 	for ( int i = 0; i < n; i++ )
 		value = value + a1[i] * a1[i];
 	return value;
 }
 //****************************************************************************80
 //****************************************************************************80
 void gmres_t::rearrange_cr (const int nz_num, int ia[], int ja[], double a[] )
 //****************************************************************************80
 //
 //  Purpose:
 //
 //    REARRANGE_CR sorts a sparse compressed row matrix.
 //
 //  Discussion:
 //
 //    This routine guarantees that the entries in the CR matrix
 //    are properly sorted.
 //
 //    After the sorting, the entries of the matrix are rearranged in such
 //    a way that the entries of each column are listed in ascending order
 //    of their column values.
 //
 //    The matrix A is assumed to be stored in compressed row format.  Only
 //    the nonzero entries of A are stored.  The vector JA stores the
 //    column index of the nonzero value.  The nonzero values are sorted
 //    by row, and the compressed row vector IA then has the property that
 //    the entries in A and JA that correspond to row I occur in indices
 //    IA[I] through IA[I+1]-1.
 //
 //  Licensing:
 //
 //    This code is distributed under the GNU LGPL license. 
 //
 //  Modified:
 //
 //    18 July 2007
 //
 //  Author:
 //
 //    Original C version by Lili Ju.
 //    C++ version by John Burkardt.
 //
 //  Parameters:
 //
 //    Input, int N, the order of the system.
 //
 //    Input, int NZ_NUM, the number of nonzeros.
 //
 //    Input, int IA[N+1], the compressed row index.
 //
 //    Input/output, int JA[NZ_NUM], the column indices.  On output,
 //    the order of the entries of JA may have changed because of the sorting.
 //
 //    Input/output, double A[NZ_NUM], the matrix values.  On output, the
 //    order of the entries may have changed because of the sorting.
 //
 {
  double dtemp;
  int i;
  int is;
  int itemp;
  int j;
  int j1;
  int j2;
  int k;
  const int n = m_n;
  for ( i = 0; i < n; i++ )
  {
    j1 = ia[i];
    j2 = ia[i+1];
    is = j2 - j1;
    for ( k = 1; k < is; k++ ) 
    {
      for ( j = j1; j < j2 - k; j++ ) 
      {
        if ( ja[j+1] < ja[j] ) 
        {
          itemp = ja[j+1];
          ja[j+1] =  ja[j];
          ja[j] =  itemp;
          dtemp = a[j+1];
          a[j+1] =  a[j];
          a[j] = dtemp;
        }
      }
    }
  }
  return;
 }
 //****************************************************************************80
--- a/src/emu/netlist/analog/mgmres.hpp
+++ b/src/emu/netlist/analog/mgmres.hpp
@ -0,0 +1,43 @@
 #ifndef __MGMRES
 #define __MGMRES
 class gmres_t
 {
 public:
 	gmres_t(const int n);
 	~gmres_t();
 	int pmgmres_ilu_cr (const int nz_num, int ia[], int ja[], double a[],
 	  double x[], const double rhs[], const int itr_max, const int mr, const double tol_abs,
 	  const double tol_rel );
 private:
 	void diagonal_pointer_cr(const int nz_num, const int ia[], const int ja[]);
 	void rearrange_cr (const int nz_num, int ia[], int ja[], double a[] );
 	inline double r8vec_dot (const double a1[], const double a2[] );
 	inline double r8vec_dot2 (const double a1[]);
 	void ax_cr(const int * RESTRICT ia, const int * RESTRICT ja, const double * RESTRICT a,
 			const double * RESTRICT x, double * RESTRICT w);
 	void lus_cr (const int * RESTRICT ia, const int * RESTRICT ja, double * RESTRICT r);
 	void ilu_cr (const int nz_num, const int ia[], const int ja[], const double a[]);
 	const int m_n;
 	double * RESTRICT m_c;
 	double * RESTRICT m_g;
 	double ** RESTRICT m_h;
 	double * RESTRICT m_l;
 	double * RESTRICT m_r;
 	double * RESTRICT m_s;
 	double ** RESTRICT m_v;
 	int * RESTRICT m_ua;
 	double * RESTRICT m_y;
 };
 #endif
--- a/src/emu/netlist/analog/nld_solver.c
+++ b/src/emu/netlist/analog/nld_solver.c
@ -11,8 +11,8 @@
 #if 0
 #pragma GCC optimize "-ffast-math"
-#pragma GCC optimize "-ftree-parallelize-loops=4"
+//#pragma GCC optimize "-ftree-parallelize-loops=4"
-//#pragma GCC optimize "-funroll-loops"
+#pragma GCC optimize "-funroll-loops"
 #pragma GCC optimize "-funswitch-loops"
 #pragma GCC optimize "-fvariable-expansion-in-unroller"
 #pragma GCC optimize "-funsafe-loop-optimizations"
@ -32,6 +32,7 @@
 #include "nld_ms_direct2.h"
 #include "nld_ms_sor.h"
 #include "nld_ms_sor_mat.h"
 #include "nld_ms_gmres.h"
 #include "nld_twoterm.h"
 #include "../nl_lists.h"
@ -410,6 +411,7 @@ netlist_matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const int
 			else
 			{
 				typedef netlist_matrix_solver_SOR_t<m_N,_storage_N> solver_GS;
 				//typedef netlist_matrix_solver_GMRES_t<m_N,_storage_N> solver_GS;
 				return palloc(solver_GS, &m_params, size);
 			}
 		}
@ -423,7 +425,7 @@ netlist_matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const int
 ATTR_COLD void NETLIB_NAME(solver)::post_start()
 {
-	netlist_analog_net_t::list_t groups[100];
+	netlist_analog_net_t::list_t groups[256];
 	int cur_group = -1;
 	const int gs_threshold = m_gs_threshold.Value();
 	const bool use_specific = true;
@ -559,3 +561,5 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
 		}
 	}
 }
 #include "mgmres.cpp"