Completely integrated the GMRES solver into netlist solver templates and

recoded it from scratch. GMRES now runs at 122% (kidniki), that's a real
improement from 60% before. AT the same time, the code should be easier
to read and closer to the GMRES algorithm. Ultimately, kidniki will not
use this solver but instead use some frontiers to keep it playable. But
going forward, for larger matrices this solver is an opton. [Couriersud]
This commit is contained in:
couriersud 2015-06-13 23:19:27 +02:00
parent 06e3aa2324
commit 57ce24e0df
6 changed files with 519 additions and 1003 deletions

View File

@ -0,0 +1,135 @@
// license:GPL-2.0+
// copyright-holders:Couriersud
/*
* mat_cr.h
*
* Compressed row format matrices
*
*/
#ifndef MAT_CR_H_
#define MAT_CR_H_
#include <algorithm>
#include "../plib/pconfig.h"
template<int _storage_N>
struct mat_cr_t
{
unsigned nz_num;
unsigned ia[_storage_N + 1];
unsigned ja[_storage_N * _storage_N];
unsigned diag[_storage_N]; /* n */
void mult_vec(const double * RESTRICT A, const double * RESTRICT x, double * RESTRICT res)
{
/*
* res = A * x
*/
unsigned i = 0;
unsigned k = 0;
const unsigned oe = nz_num;
while (k < oe)
{
double tmp = 0.0;
const unsigned e = ia[i+1];
for (; k < e; k++)
tmp += A[k] * x[ja[k]];
res[i++] = tmp;
}
}
void incomplete_LU_factorization(const double * RESTRICT A, double * RESTRICT LU)
{
/*
* incomplete LU Factorization according to http://de.wikipedia.org/wiki/ILU-Zerlegung
*
* Result is stored in matrix LU
*
*/
const unsigned lnz = nz_num;
for (unsigned k = 0; k < lnz; k++)
LU[k] = A[k];
for (unsigned i = 1; ia[i] < lnz; i++) // row i
{
const unsigned iai1 = ia[i + 1];
for (unsigned pk = ia[i]; pk < diag[i]; pk++) // all columns left of diag in row i
{
// pk == (i, k)
const unsigned k = ja[pk];
const unsigned iak1 = ia[k + 1];
const double LUpk = LU[pk] = LU[pk] / LU[diag[k]];
unsigned pt = ia[k];
for (unsigned pj = pk + 1; pj < iai1; pj++) // pj = (i, j)
{
// we can assume that within a row ja increases continuously */
const unsigned ej = ja[pj];
while (ja[pt] < ej && pt < iak1)
pt++;
if (pt < iak1 && ja[pt] == ej)
LU[pj] = LU[pj] - LUpk * LU[pt];
}
}
}
}
void solveLUx (const double * RESTRICT LU, double * RESTRICT r)
{
/*
* Solve a linear equation Ax = r
* where
* A = L*U
*
* L unit lower triangular
* U upper triangular
*
* ==> LUx = r
*
* ==> Ux = L¹r = w
*
* ==> r = Lw
*
* This can be solved for w using backwards elimination in L.
*
* Now Ux = w
*
* This can be solved for x using backwards elimination in U.
*
*/
unsigned i;
for (i = 1; ia[i] < nz_num; i++ )
{
double tmp = 0.0;
const unsigned j1 = ia[i];
const unsigned j2 = diag[i];
for (unsigned j = j1; j < j2; j++ )
tmp += LU[j] * r[ja[j]];
r[i] -= tmp;
}
// i now is equal to n;
for (; 0 < i; i-- )
{
const unsigned im1 = i - 1;
double tmp = 0.0;
const unsigned j1 = diag[im1] + 1;
const unsigned j2 = ia[im1+1];
for (int j = j1; j < j2; j++ )
tmp += LU[j] * r[ja[j]];
r[im1] = (r[im1] - tmp) / LU[diag[im1]];
}
}
};
#endif /* MAT_CR_H_ */

View File

@ -1,912 +0,0 @@
# include <cstdlib>
# include <iostream>
# include <fstream>
# include <iomanip>
# include <cmath>
# include <ctime>
# include <cstdio>
using namespace std;
#include "mgmres.hpp"
//****************************************************************************80
// http://people.sc.fsu.edu/~jburkardt/cpp_src/mgmres/mgmres.html
//****************************************************************************80
void gmres_t::ax_cr(const int * RESTRICT ia, const int * RESTRICT ja, const double * RESTRICT a,
const double * RESTRICT x, double * RESTRICT w)
//****************************************************************************80
//
// Purpose:
//
// AX_CR computes A*x for a matrix stored in sparse compressed row form.
//
// Discussion:
//
// The Sparse Compressed Row storage format is used.
//
// The matrix A is assumed to be sparse. To save on storage, only
// the nonzero entries of A are stored. The vector JA stores the
// column index of the nonzero value. The nonzero values are sorted
// by row, and the compressed row vector IA then has the property that
// the entries in A and JA that correspond to row I occur in indices
// IA[I] through IA[I+1]-1.
//
// For this version of MGMRES, the row and column indices are assumed
// to use the C/C++ convention, in which indexing begins at 0.
//
// If your index vectors IA and JA are set up so that indexing is based
// at 1, then each use of those vectors should be shifted down by 1.
//
// Licensing:
//
// This code is distributed under the GNU LGPL license.
//
// Modified:
//
// 18 July 2007
//
// Author:
//
// Original C version by Lili Ju.
// C++ version by John Burkardt.
//
// Reference:
//
// Richard Barrett, Michael Berry, Tony Chan, James Demmel,
// June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo,
// Charles Romine, Henk van der Vorst,
// Templates for the Solution of Linear Systems:
// Building Blocks for Iterative Methods,
// SIAM, 1994,
// ISBN: 0898714710,
// LC: QA297.8.T45.
//
// Tim Kelley,
// Iterative Methods for Linear and Nonlinear Equations,
// SIAM, 2004,
// ISBN: 0898713528,
// LC: QA297.8.K45.
//
// Yousef Saad,
// Iterative Methods for Sparse Linear Systems,
// Second Edition,
// SIAM, 2003,
// ISBN: 0898715342,
// LC: QA188.S17.
//
// Parameters:
//
// Input, int N, the order of the system.
//
// Input, int NZ_NUM, the number of nonzeros.
//
// Input, int IA[N+1], JA[NZ_NUM], the row and column indices
// of the matrix values. The row vector has been compressed.
//
// Input, double A[NZ_NUM], the matrix values.
//
// Input, double X[N], the vector to be multiplied by A.
//
// Output, double W[N], the value of A*X.
//
{
const int n = m_n;
for ( int i = 0; i < n; i++ )
{
double tmp = 0.0;
int k1 = ia[i];
int k2 = ia[i+1];
for (int k = k1; k < k2; k++ )
{
tmp += a[k] * x[ja[k]];
}
w[i] = tmp;
}
return;
}
//****************************************************************************80
void gmres_t::diagonal_pointer_cr (const int nz_num, const int ia[], const int ja[])
//****************************************************************************80
//
// Purpose:
//
// DIAGONAL_POINTER_CR finds diagonal entries in a sparse compressed row matrix.
//
// Discussion:
//
// The matrix A is assumed to be stored in compressed row format. Only
// the nonzero entries of A are stored. The vector JA stores the
// column index of the nonzero value. The nonzero values are sorted
// by row, and the compressed row vector IA then has the property that
// the entries in A and JA that correspond to row I occur in indices
// IA[I] through IA[I+1]-1.
//
// The array UA can be used to locate the diagonal elements of the matrix.
//
// It is assumed that every row of the matrix includes a diagonal element,
// and that the elements of each row have been ascending sorted.
//
// Licensing:
//
// This code is distributed under the GNU LGPL license.
//
// Modified:
//
// 18 July 2007
//
// Author:
//
// Original C version by Lili Ju.
// C++ version by John Burkardt.
//
// Parameters:
//
// Input, int N, the order of the system.
//
// Input, int NZ_NUM, the number of nonzeros.
//
// Input, int IA[N+1], JA[NZ_NUM], the row and column indices
// of the matrix values. The row vector has been compressed. On output,
// the order of the entries of JA may have changed because of the sorting.
//
// Output, int UA[N], the index of the diagonal element of each row.
//
{
const int n = m_n;
for (int i = 0; i < n; i++)
{
m_ua[i] = -1;
const int j1 = ia[i];
const int j2 = ia[i + 1];
for (int j = j1; j < j2; j++)
{
if (ja[j] == i)
{
m_ua[i] = j;
}
}
}
return;
}
//****************************************************************************80
void gmres_t::ilu_cr (const int nz_num, const int * RESTRICT ia, const int * RESTRICT ja, const double * RESTRICT a)
//****************************************************************************80
//
// Purpose:
//
// ILU_CR computes the incomplete LU factorization of a matrix.
//
// Discussion:
//
// The matrix A is assumed to be stored in compressed row format. Only
// the nonzero entries of A are stored. The vector JA stores the
// column index of the nonzero value. The nonzero values are sorted
// by row, and the compressed row vector IA then has the property that
// the entries in A and JA that correspond to row I occur in indices
// IA[I] through IA[I+1]-1.
//
// Licensing:
//
// This code is distributed under the GNU LGPL license.
//
// Modified:
//
// 25 July 2007
//
// Author:
//
// Original C version by Lili Ju.
// C++ version by John Burkardt.
//
// Parameters:
//
// Input, int N, the order of the system.
//
// Input, int NZ_NUM, the number of nonzeros.
//
// Input, int IA[N+1], JA[NZ_NUM], the row and column indices
// of the matrix values. The row vector has been compressed.
//
// Input, double A[NZ_NUM], the matrix values.
//
// Input, int UA[N], the index of the diagonal element of each row.
//
// Output, double L[NZ_NUM], the ILU factorization of A.
//
{
int *iw;
int i;
int j;
int jj;
int jrow;
int jw;
int k;
double tl;
const int n = m_n;
iw = new int[n];
//
// Copy A.
//
for (k = 0; k < nz_num; k++)
{
m_l[k] = a[k];
}
for (i = 0; i < n; i++)
{
//
// IW points to the nonzero entries in row I.
//
for (j = 0; j < n; j++)
{
iw[j] = -1;
}
for (k = ia[i]; k <= ia[i + 1] - 1; k++)
{
iw[ja[k]] = k;
}
j = ia[i];
do
{
jrow = ja[j];
if (i <= jrow)
{
break;
}
tl = m_l[j] * m_l[m_ua[jrow]];
m_l[j] = tl;
for (jj = m_ua[jrow] + 1; jj <= ia[jrow + 1] - 1; jj++)
{
jw = iw[ja[jj]];
if (jw != -1)
{
m_l[jw] = m_l[jw] - tl * m_l[jj];
}
}
j = j + 1;
} while (j <= ia[i + 1] - 1);
m_ua[i] = j;
if (jrow != i)
{
cout << "\n";
cout << "ILU_CR - Fatal error!\n";
cout << " JROW != I\n";
cout << " JROW = " << jrow << "\n";
cout << " I = " << i << "\n";
exit(1);
}
if (m_l[j] == 0.0)
{
cout << "\n";
cout << "ILU_CR - Fatal error!\n";
cout << " Zero pivot on step I = " << i << "\n";
cout << " L[" << j << "] = 0.0\n";
exit(1);
}
m_l[j] = 1.0 / m_l[j];
}
for (k = 0; k < n; k++)
{
m_l[m_ua[k]] = 1.0 / m_l[m_ua[k]];
}
delete[] iw;
}
//****************************************************************************80
void gmres_t::lus_cr (const int * RESTRICT ia, const int * RESTRICT ja, double * RESTRICT r)
//****************************************************************************80
//
// Purpose:
//
// LUS_CR applies the incomplete LU preconditioner.
//
// Discussion:
//
// The linear system M * Z = R is solved for Z. M is the incomplete
// LU preconditioner matrix, and R is a vector supplied by the user.
// So essentially, we're solving L * U * Z = R.
//
// The matrix A is assumed to be stored in compressed row format. Only
// the nonzero entries of A are stored. The vector JA stores the
// column index of the nonzero value. The nonzero values are sorted
// by row, and the compressed row vector IA then has the property that
// the entries in A and JA that correspond to row I occur in indices
// IA[I] through IA[I+1]-1.
//
// Licensing:
//
// This code is distributed under the GNU LGPL license.
//
// Modified:
//
// 18 July 2007
//
// Author:
//
// Original C version by Lili Ju.
// C++ version by John Burkardt.
//
// Parameters:
//
// Input, int N, the order of the system.
//
// Input, int NZ_NUM, the number of nonzeros.
//
// Input, int IA[N+1], JA[NZ_NUM], the row and column indices
// of the matrix values. The row vector has been compressed.
//
// Input, double L[NZ_NUM], the matrix values.
//
// Input, int UA[N], the index of the diagonal element of each row.
//
// Input, double R[N], the right hand side.
//
// Output, double Z[N], the solution of the system M * Z = R.
//
{
const int n = m_n;
//
// Solve L * w = w where L is unit lower triangular.
//
for (int i = 1; i < n; i++ )
{
double tmp = 0.0;
for (int j = ia[i]; j < m_ua[i]; j++ )
{
tmp += m_l[j] * r[ja[j]];
}
r[i] -= tmp;
}
//
// Solve U * w = w, where U is upper triangular.
//
for (int i = n - 1; 0 <= i; i-- )
{
double tmp = 0.0;
for (int j = m_ua[i] + 1; j < ia[i+1]; j++ )
{
tmp += m_l[j] * r[ja[j]];
}
r[i] = (r[i] - tmp) / m_l[m_ua[i]];
}
return;
}
//****************************************************************************80
static inline void mult_givens ( const double c, const double s, const int k, double g[] )
//****************************************************************************80
//
// Purpose:
//
// MULT_GIVENS applies a Givens rotation to two successive entries of a vector.
//
// Licensing:
//
// This code is distributed under the GNU LGPL license.
//
// Modified:
//
// 08 August 2006
//
// Author:
//
// Original C version by Lili Ju.
// C++ version by John Burkardt.
//
// Reference:
//
// Richard Barrett, Michael Berry, Tony Chan, James Demmel,
// June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo,
// Charles Romine, Henk van der Vorst,
// Templates for the Solution of Linear Systems:
// Building Blocks for Iterative Methods,
// SIAM, 1994,
// ISBN: 0898714710,
// LC: QA297.8.T45.
//
// Tim Kelley,
// Iterative Methods for Linear and Nonlinear Equations,
// SIAM, 2004,
// ISBN: 0898713528,
// LC: QA297.8.K45.
//
// Yousef Saad,
// Iterative Methods for Sparse Linear Systems,
// Second Edition,
// SIAM, 2003,
// ISBN: 0898715342,
// LC: QA188.S17.
//
// Parameters:
//
// Input, double C, S, the cosine and sine of a Givens
// rotation.
//
// Input, int K, indicates the location of the first vector entry.
//
// Input/output, double G[K+2], the vector to be modified. On output,
// the Givens rotation has been applied to entries G(K) and G(K+1).
//
{
double g1;
double g2;
g1 = c * g[k] - s * g[k+1];
g2 = s * g[k] + c * g[k+1];
g[k] = g1;
g[k+1] = g2;
return;
}
//****************************************************************************80
gmres_t::gmres_t(const int n)
: m_n(n)
{
int mr=n; /* FIXME: maximum iterations locked in here */
m_c = new double[mr+1];
m_g = new double[mr+1];
m_h = new double *[mr];
for (int i = 0; i < mr; i++)
m_h[i] = new double[mr+1];
/* This is using too much memory, but we are interested in speed for now*/
m_l = new double[n*n]; //[ia[n]+1];
m_r = new double[n];
m_s = new double[mr+1];
m_ua = new int[n];
m_v = new double *[n];
for (int i = 0; i < n; i++)
m_v[i] = new double[mr+1];
m_y = new double[mr+1];
}
gmres_t::~gmres_t()
{
delete [] m_c;
delete [] m_g;
delete [] m_h;
delete [] m_l;
delete [] m_r;
delete [] m_s;
delete [] m_ua;
delete [] m_v;
delete [] m_y;
}
int gmres_t::pmgmres_ilu_cr (const int nz_num, int ia[], int ja[], double * RESTRICT a,
double * RESTRICT x, const double * RESTRICT rhs, const int itr_max, const int mr, const double tol_abs,
const double tol_rel )
//****************************************************************************80
//
// Purpose:
//
// PMGMRES_ILU_CR applies the preconditioned restarted GMRES algorithm.
//
// Discussion:
//
// The matrix A is assumed to be stored in compressed row format. Only
// the nonzero entries of A are stored. The vector JA stores the
// column index of the nonzero value. The nonzero values are sorted
// by row, and the compressed row vector IA then has the property that
// the entries in A and JA that correspond to row I occur in indices
// IA[I] through IA[I+1]-1.
//
// This routine uses the incomplete LU decomposition for the
// preconditioning. This preconditioner requires that the sparse
// matrix data structure supplies a storage position for each diagonal
// element of the matrix A, and that each diagonal element of the
// matrix A is not zero.
//
// Thanks to Jesus Pueblas Sanchez-Guerra for supplying two
// corrections to the code on 31 May 2007.
//
//
// This implementation of the code stores the doubly-dimensioned arrays
// H and V as vectors. However, it follows the C convention of storing
// them by rows, rather than my own preference for storing them by
// columns. I may come back and change this some time.
//
// Licensing:
//
// This code is distributed under the GNU LGPL license.
//
// Modified:
//
// 26 July 2007
//
// Author:
//
// Original C version by Lili Ju.
// C++ version by John Burkardt.
//
// Reference:
//
// Richard Barrett, Michael Berry, Tony Chan, James Demmel,
// June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo,
// Charles Romine, Henk van der Vorst,
// Templates for the Solution of Linear Systems:
// Building Blocks for Iterative Methods,
// SIAM, 1994.
// ISBN: 0898714710,
// LC: QA297.8.T45.
//
// Tim Kelley,
// Iterative Methods for Linear and Nonlinear Equations,
// SIAM, 2004,
// ISBN: 0898713528,
// LC: QA297.8.K45.
//
// Yousef Saad,
// Iterative Methods for Sparse Linear Systems,
// Second Edition,
// SIAM, 2003,
// ISBN: 0898715342,
// LC: QA188.S17.
//
// Parameters:
//
// Input, int N, the order of the linear system.
//
// Input, int NZ_NUM, the number of nonzero matrix values.
//
// Input, int IA[N+1], JA[NZ_NUM], the row and column indices
// of the matrix values. The row vector has been compressed.
//
// Input, double A[NZ_NUM], the matrix values.
//
// Input/output, double X[N]; on input, an approximation to
// the solution. On output, an improved approximation.
//
// Input, double RHS[N], the right hand side of the linear system.
//
// Input, int ITR_MAX, the maximum number of (outer) iterations to take.
//
// Input, int MR, the maximum number of (inner) iterations to take.
// MR must be less than N.
//
// Input, double TOL_ABS, an absolute tolerance applied to the
// current residual.
//
// Input, double TOL_REL, a relative tolerance comparing the
// current residual to the initial residual.
//
{
double av;
const double delta = 1.0e-03;
double htmp;
int itr;
int itr_used = 0;
int k_copy = 0;
double mu;
double rho;
double rho_tol = 0;
const int verbose = 0;
const bool pre_ilu = true;
const int n = m_n;
rearrange_cr(nz_num, ia, ja, a);
if (pre_ilu)
{
diagonal_pointer_cr(nz_num, ia, ja);
ilu_cr(nz_num, ia, ja, a);
}
if (verbose)
{
cout << "\n";
cout << "PMGMRES_ILU_CR\n";
cout << " Number of unknowns = " << n << "\n";
}
for (itr = 0; itr < itr_max; itr++)
{
ax_cr(ia, ja, a, x, m_r);
for (int i = 0; i < n; i++)
m_r[i] = rhs[i] - m_r[i];
if (pre_ilu)
lus_cr(ia, ja, m_r);
rho = std::sqrt(r8vec_dot2(m_r));
if (verbose)
cout << " ITR = " << itr << " Residual = " << rho << "\n";
if (itr == 0)
rho_tol = rho * tol_rel;
const double rhoq = 1.0 / rho;
for (int i = 0; i < n; i++)
m_v[0][i] = m_r[i] * rhoq;
for (int i = 0; i < mr + 1; i++)
m_g[i] = 0.0;
m_g[0] = rho;
for (int i = 0; i < mr; i++)
for (int j = 0; j < mr + 1; j++)
m_h[i][j] = 0.0;
for (int k = 0; k < mr; k++)
{
k_copy = k;
const int k1 = k + 1;
ax_cr(ia, ja, a, m_v[k], m_v[k1]);
if (pre_ilu)
lus_cr(ia, ja, m_v[k1]);
av = std::sqrt(r8vec_dot2(m_v[k1]));
for (int j = 0; j <= k; j++)
{
m_h[j][k] = r8vec_dot(m_v[k1], m_v[j]);
for (int i = 0; i < n; i++)
{
m_v[k1][i] -= m_h[j][k] * m_v[j][i];
}
}
m_h[k1][k] = std::sqrt(r8vec_dot2(m_v[k1]));
if ((av + delta * m_h[k1][k]) == av)
{
for (int j = 0; j < k1; j++)
{
htmp = r8vec_dot(m_v[k1], m_v[j]);
m_h[j][k] = m_h[j][k] + htmp;
for (int i = 0; i < n; i++)
{
m_v[k1][i] -= htmp * m_v[j][i];
}
}
m_h[k1][k] = sqrt(r8vec_dot2(m_v[k1]));
}
if (m_h[k1][k] != 0.0)
{
for (int i = 0; i < n; i++)
{
m_v[k1][i] = m_v[k1][i]
/ m_h[k1][k];
}
}
if (0 < k)
{
for (int i = 0; i < k + 2; i++)
{
m_y[i] = m_h[i][k];
}
for (int j = 0; j < k; j++)
{
mult_givens(m_c[j], m_s[j], j, m_y);
}
for (int i = 0; i < k + 2; i++)
{
m_h[i][k] = m_y[i];
}
}
mu = std::sqrt(
m_h[k][k] * m_h[k][k]
+ m_h[k1][k] * m_h[k1][k]);
m_c[k] = m_h[k][k] / mu;
m_s[k] = -m_h[k1][k] / mu;
m_h[k][k] = m_c[k] * m_h[k][k]
- m_s[k] * m_h[k1][k];
m_h[k1][k] = 0.0;
mult_givens(m_c[k], m_s[k], k, m_g);
rho = std::abs(m_g[k1]);
itr_used = itr_used + 1;
if (verbose)
{
cout << " K = " << k << " Residual = " << rho << "\n";
}
if (rho <= rho_tol && rho <= tol_abs)
{
break;
}
}
m_y[k_copy] = m_g[k_copy] / m_h[k_copy][k_copy];
for (int i = k_copy - 1; 0 <= i; i--)
{
double tmp = m_g[i];
for (int j = i + 1; j < k_copy + 1; j++)
{
tmp -= m_h[i][j] * m_y[j];
}
m_y[i] = tmp / m_h[i][i];
}
//double cerr = 0;
for (int i = 0; i < n; i++)
{
double tmp = 0.0;
for (int j = 0; j < k_copy + 1; j++)
{
tmp += m_v[j][i] * m_y[j];
}
//cerr = std::max(std::abs(tmp), cerr);
x[i] += tmp;
}
#if 0
if (cerr < 1e-8)
return 1; //break;
#else
if (rho <= rho_tol && rho <= tol_abs)
{
break;
}
#endif
}
if (verbose)
{
cout << "\n";
;
cout << "PMGMRES_ILU_CR:\n";
cout << " Iterations = " << itr_used << "\n";
cout << " Final residual = " << rho << "\n";
}
//if ( rho >= tol_abs )
// printf("missed!\n");
return (itr_used - 1) * mr + k_copy;
}
//****************************************************************************80
double gmres_t::r8vec_dot (const double * RESTRICT a1, const double * RESTRICT a2 )
{
const int n = m_n;
double value = 0.0;
for ( int i = 0; i < n; i++ )
value = value + a1[i] * a2[i];
return value;
}
double gmres_t::r8vec_dot2 (const double a1[])
{
const int n = m_n;
double value = 0.0;
for ( int i = 0; i < n; i++ )
value = value + a1[i] * a1[i];
return value;
}
//****************************************************************************80
//****************************************************************************80
void gmres_t::rearrange_cr (const int nz_num, int ia[], int ja[], double a[] )
//****************************************************************************80
//
// Purpose:
//
// REARRANGE_CR sorts a sparse compressed row matrix.
//
// Discussion:
//
// This routine guarantees that the entries in the CR matrix
// are properly sorted.
//
// After the sorting, the entries of the matrix are rearranged in such
// a way that the entries of each column are listed in ascending order
// of their column values.
//
// The matrix A is assumed to be stored in compressed row format. Only
// the nonzero entries of A are stored. The vector JA stores the
// column index of the nonzero value. The nonzero values are sorted
// by row, and the compressed row vector IA then has the property that
// the entries in A and JA that correspond to row I occur in indices
// IA[I] through IA[I+1]-1.
//
// Licensing:
//
// This code is distributed under the GNU LGPL license.
//
// Modified:
//
// 18 July 2007
//
// Author:
//
// Original C version by Lili Ju.
// C++ version by John Burkardt.
//
// Parameters:
//
// Input, int N, the order of the system.
//
// Input, int NZ_NUM, the number of nonzeros.
//
// Input, int IA[N+1], the compressed row index.
//
// Input/output, int JA[NZ_NUM], the column indices. On output,
// the order of the entries of JA may have changed because of the sorting.
//
// Input/output, double A[NZ_NUM], the matrix values. On output, the
// order of the entries may have changed because of the sorting.
//
{
double dtemp;
int i;
int is;
int itemp;
int j;
int j1;
int j2;
int k;
const int n = m_n;
for ( i = 0; i < n; i++ )
{
j1 = ia[i];
j2 = ia[i+1];
is = j2 - j1;
for ( k = 1; k < is; k++ )
{
for ( j = j1; j < j2 - k; j++ )
{
if ( ja[j+1] < ja[j] )
{
itemp = ja[j+1];
ja[j+1] = ja[j];
ja[j] = itemp;
dtemp = a[j+1];
a[j+1] = a[j];
a[j] = dtemp;
}
}
}
}
return;
}
//****************************************************************************80

View File

@ -1,43 +0,0 @@
#ifndef __MGMRES
#define __MGMRES
class gmres_t
{
public:
gmres_t(const int n);
~gmres_t();
int pmgmres_ilu_cr (const int nz_num, int ia[], int ja[], double * RESTRICT a,
double * RESTRICT x, const double * RESTRICT rhs, const int itr_max, const int mr, const double tol_abs,
const double tol_rel );
private:
void diagonal_pointer_cr(const int nz_num, const int ia[], const int ja[]);
void rearrange_cr (const int nz_num, int ia[], int ja[], double a[] );
inline double r8vec_dot (const double * RESTRICT a1, const double * RESTRICT a2 );
inline double r8vec_dot2 (const double a1[]);
void ax_cr(const int * RESTRICT ia, const int * RESTRICT ja, const double * RESTRICT a,
const double * RESTRICT x, double * RESTRICT w);
void lus_cr (const int * RESTRICT ia, const int * RESTRICT ja, double * RESTRICT r);
void ilu_cr (const int nz_num, const int * RESTRICT ia, const int * RESTRICT ja, const double * RESTRICT a);
const int m_n;
double * RESTRICT m_c;
double * RESTRICT m_g;
double ** RESTRICT m_h;
double * RESTRICT m_l;
double * RESTRICT m_r;
double * RESTRICT m_s;
double ** RESTRICT m_v;
int * RESTRICT m_ua;
double * RESTRICT m_y;
};
#endif

View File

@ -17,7 +17,8 @@
#include "nld_solver.h"
#include "nld_ms_direct.h"
#include "mgmres.hpp"
#include "mat_cr.h"
#include "vector_base.h"
NETLIB_NAMESPACE_DEVICES_START()
@ -28,13 +29,31 @@ public:
matrix_solver_GMRES_t(const solver_parameters_t *params, int size)
: matrix_solver_direct_t<m_N, _storage_N>(matrix_solver_t::GAUSS_SEIDEL, params, size)
, m_gmres(m_N ? m_N : size)
, m_use_iLU_preconditioning(true)
, m_use_more_precise_stop_condition(false)
, m_gs_fail(0)
, m_gs_total(0)
{
int mr=this->N(); /* FIXME: maximum iterations locked in here */
for (int i = 0; i < mr; i++)
m_ht[i] = new double[mr + 1];
for (int i = 0; i < this->N(); i++)
m_v[i] = new double[_storage_N];
}
virtual ~matrix_solver_GMRES_t() {}
virtual ~matrix_solver_GMRES_t()
{
int mr=this->N(); /* FIXME: maximum iterations locked in here */
for (int i = 0; i < mr; i++)
delete[] m_ht[i];
for (int i = 0; i < this->N(); i++)
delete[] m_v[i];
}
virtual void log_stats();
@ -44,7 +63,26 @@ protected:
ATTR_HOT virtual nl_double vsolve();
private:
gmres_t m_gmres;
int solve_ilu_gmres(double * RESTRICT x, double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, double accuracy);
plist_t<int> m_term_cr[_storage_N];
bool m_use_iLU_preconditioning;
bool m_use_more_precise_stop_condition;
mat_cr_t<_storage_N> mat;
double m_A[_storage_N * _storage_N];
double m_LU[_storage_N * _storage_N];
double m_c[_storage_N + 1]; /* mr + 1 */
double m_g[_storage_N + 1]; /* mr + 1 */
double * RESTRICT m_ht[_storage_N]; /* mr, (mr + 1) */
double m_s[_storage_N]; /* mr + 1 */
double * RESTRICT m_v[_storage_N + 1]; /*(mr + 1), n */
//double m_y[_storage_N]; /* mr + 1 */
int m_gs_fail;
int m_gs_total;
};
@ -79,6 +117,39 @@ void matrix_solver_GMRES_t<m_N, _storage_N>::vsetup(analog_net_t::list_t &nets)
matrix_solver_direct_t<m_N, _storage_N>::vsetup(nets);
this->save(NLNAME(m_gs_fail));
this->save(NLNAME(m_gs_total));
int nz = 0;
const int iN = this->N();
for (unsigned k=0; k<iN; k++)
{
terms_t * RESTRICT row = this->m_terms[k];
mat.ia[k] = nz;
for (unsigned j=0; j<row->m_nz.size(); j++)
{
mat.ja[nz] = row->m_nz[j];
if (row->m_nz[j] == k)
mat.diag[k] = nz;
nz++;
}
/* build pointers into the compressed row format matrix for each terminal */
for (int j=0; j< this->m_terms[k]->m_railstart;j++)
{
for (int i = mat.ia[k]; i<nz; i++)
if (this->m_terms[k]->net_other()[j] == mat.ja[i])
{
m_term_cr[k].add(i);
break;
}
nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart);
}
}
mat.ia[iN] = nz;
mat.nz_num = nz;
}
template <unsigned m_N, unsigned _storage_N>
@ -101,14 +172,14 @@ ATTR_HOT inline int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(c
* omega = 2.0 / (1.0 + nl_math::sqrt(1-rho))
*/
int nz_num = 0;
int ia[_storage_N + 1];
int ja[_storage_N * _storage_N];
double a[_storage_N * _storage_N];
//nz_num = 0;
ATTR_ALIGN nl_double RHS[_storage_N];
ATTR_ALIGN nl_double new_V[_storage_N];
ATTR_ALIGN nl_double l_V[_storage_N];
for (int i=0, e=mat.nz_num; i<e; i++)
m_A[i] = 0.0;
for (int k = 0; k < iN; k++)
{
nl_double gtot_t = 0.0;
@ -119,60 +190,49 @@ ATTR_HOT inline int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(c
const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
const nl_double * const RESTRICT go = this->m_terms[k]->go();
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
const nl_double * const *other_cur_analog = this->m_terms[k]->other_curanalog();
const int * RESTRICT net_other = this->m_terms[k]->net_other();
const nl_double * const * RESTRICT other_cur_analog = this->m_terms[k]->other_curanalog();
ia[k] = nz_num;
l_V[k] = new_V[k] = this->m_nets[k]->m_cur_Analog;
for (unsigned i = 0; i < term_count; i++)
{
gtot_t = gtot_t + gt[i];
RHS_t = RHS_t + Idr[i];
}
for (int i = this->m_terms[k]->m_railstart; i < term_count; i++)
for (unsigned i = railstart; i < term_count; i++)
RHS_t = RHS_t + go[i] * *other_cur_analog[i];
RHS[k] = RHS_t;
// add diagonal element
a[nz_num] = gtot_t;
ja[nz_num] = k;
nz_num++;
m_A[mat.diag[k]] = gtot_t;
int n=0;
for (int i = 0; i < railstart; i++)
for (unsigned i = 0; i < railstart; i++)
{
int j;
for (j = 0; j < n; j++)
{
if (net_other[i] == ja[nz_num+j])
{
a[nz_num+j] -= go[i];
break;
}
}
if (j>=n)
{
a[nz_num+n] = -go[i];
ja[nz_num+n] = net_other[i];
n++;
}
const unsigned pi = m_term_cr[k][i];
m_A[pi] -= go[i];
}
nz_num += n;
}
ia[iN] = nz_num;
mat.ia[iN] = mat.nz_num;
const nl_double accuracy = this->m_params.m_accuracy;
int gsl = m_gmres.pmgmres_ilu_cr(nz_num, ia, ja, a, new_V, RHS, 12, std::min(iN-1,10), accuracy * (double) (iN), 1e6);
#if 1
int mr = std::min(iN-1,(int) sqrt(iN));
int iter = 4;
int gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy * 2.0); // * (double) (iN));
int failed = mr * iter;
#else
int failed = 6;
//int gsl = tt_ilu_cr(new_V, RHS, failed, accuracy);
int gsl = tt_gs_cr(new_V, RHS, failed, accuracy);
#endif
m_gs_total += gsl;
this->m_stat_calculations++;
if (gsl>119)
if (gsl>=failed)
{
for (int k = 0; k < iN; k++)
this->m_nets[k]->m_cur_Analog = new_V[k];
//for (int k = 0; k < iN; k++)
// this->m_nets[k]->m_cur_Analog = new_V[k];
// Fallback to direct solver ...
this->m_gs_fail++;
return matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic(newton_raphson);
@ -181,11 +241,11 @@ ATTR_HOT inline int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(c
if (newton_raphson)
{
double err = 0;
for (int k = 0; k < iN; k++)
for (unsigned k = 0; k < iN; k++)
err = std::max(nl_math::abs(l_V[k] - new_V[k]), err);
//printf("here %s\n", this->name().cstr());
for (int k = 0; k < iN; k++)
for (unsigned k = 0; k < iN; k++)
this->m_nets[k]->m_cur_Analog += 1.0 * (new_V[k] - this->m_nets[k]->m_cur_Analog);
if (err > accuracy)
return 2;
@ -200,6 +260,177 @@ ATTR_HOT inline int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(c
}
}
static inline void givens_mult( const double c, const double s, double * RESTRICT g0, double * RESTRICT g1 )
{
const double tg0 = c * *g0 - s * *g1;
const double tg1 = s * *g0 + c * *g1;
*g0 = tg0;
*g1 = tg1;
}
template <unsigned m_N, unsigned _storage_N>
int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x, double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, double accuracy)
{
/*-------------------------------------------------------------------------
* The code below was inspired by code published by John Burkardt under
* the LPGL here:
*
* http://people.sc.fsu.edu/~jburkardt/cpp_src/mgmres/mgmres.html
*
* The code below was completely written from scratch based on the pseudo code
* found here:
*
* http://de.wikipedia.org/wiki/GMRES-Verfahren
*
* The Algorithm itself is described in
*
* Yousef Saad,
* Iterative Methods for Sparse Linear Systems,
* Second Edition,
* SIAM, 20003,
* ISBN: 0898715342,
* LC: QA188.S17.
*
*------------------------------------------------------------------------*/
unsigned itr_used = 0;
const unsigned n = this->N();
if (m_use_iLU_preconditioning)
mat.incomplete_LU_factorization(m_A, m_LU);
if (m_use_more_precise_stop_condition)
{
/* derive residual for a given delta x
*
* LU y = A dx
*
* ==> rho / accuracy = sqrt(y * y)
*
* This approach will approximate the iterative stop condition
* based |xnew - xold| pretty precisely. But it is slow, or expressed
* differently: The invest doesn't pay off.
* Therefore we use the approach in the else part.
*/
double t[_storage_N];
double Ax[_storage_N];
vec_set(n, accuracy, t);
mat.mult_vec(m_A, t, Ax);
mat.solveLUx(m_LU, Ax);
const double rho_to_accuracy = std::sqrt(vecmult2(n, Ax)) / accuracy;
//printf("rho/accuracy = %f\n", rho_to_accuracy);
accuracy *= rho_to_accuracy;
}
else
accuracy *= std::sqrt((double) n);
for (unsigned itr = 0; itr < restart_max; itr++)
{
unsigned last_k = mr;
double mu;
double rho;
double Ax[_storage_N];
double residual[_storage_N];
mat.mult_vec(m_A, x, Ax);
vec_sub(n, rhs, Ax, residual);
if (m_use_iLU_preconditioning)
{
mat.solveLUx(m_LU, residual);
}
rho = std::sqrt(vecmult2(n, residual));
vec_mult_scalar(n, residual, 1.0 / rho, m_v[0]);
vec_set(mr+1, 0.0, m_g);
m_g[0] = rho;
for (unsigned i = 0; i < mr; i++)
vec_set(mr + 1, 0.0, m_ht[i]);
for (unsigned k = 0; k < mr; k++)
{
const unsigned k1 = k + 1;
mat.mult_vec(m_A, m_v[k], m_v[k1]);
if (m_use_iLU_preconditioning)
mat.solveLUx(m_LU, m_v[k1]);
for (unsigned j = 0; j <= k; j++)
{
m_ht[j][k] = vecmult(n, m_v[k1], m_v[j]);
vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]);
}
m_ht[k1][k] = std::sqrt(vecmult2(n, m_v[k1]));
if (m_ht[k1][k] != 0.0)
vec_scale(n, m_v[k1], 1.0 / m_ht[k1][k]);
for (unsigned j = 0; j < k; j++)
givens_mult(m_c[j], m_s[j], &m_ht[j][k], &m_ht[j+1][k]);
mu = std::sqrt(std::pow(m_ht[k][k], 2) + std::pow(m_ht[k1][k], 2));
m_c[k] = m_ht[k][k] / mu;
m_s[k] = -m_ht[k1][k] / mu;
m_ht[k][k] = m_c[k] * m_ht[k][k] - m_s[k] * m_ht[k1][k];
m_ht[k1][k] = 0.0;
givens_mult(m_c[k], m_s[k], &m_g[k], &m_g[k1]);
rho = std::abs(m_g[k1]);
itr_used = itr_used + 1;
if (rho <= accuracy)
{
last_k = k;
break;
}
}
if (last_k >= mr)
/* didn't converge within accuracy */
last_k = mr - 1;
double m_y[_storage_N];
/* Solve the system H * y = g */
/* x += m_v[j] * m_y[j] */
for (int i = last_k; i >= 0; i--)
{
double tmp = m_g[i];
for (unsigned j = i + 1; j <= last_k; j++)
{
tmp -= m_ht[i][j] * m_y[j];
}
m_y[i] = tmp / m_ht[i][i];
}
for (unsigned i = 0; i <= last_k; i++)
vec_add_mult_scalar(n, m_v[i], m_y[i], x);
if (rho <= accuracy)
{
break;
}
}
return itr_used;
}
NETLIB_NAMESPACE_DEVICES_END()
#endif /* NLD_MS_GMRES_H_ */

View File

@ -9,6 +9,7 @@
* the vectorizations fast-math enables pretty expensive
*/
//#pragma GCC optimize "-ffast-math"
#if 0
#pragma GCC optimize "-ffast-math"
//#pragma GCC optimize "-ftree-parallelize-loops=4"
@ -427,8 +428,8 @@ matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const int gs_thre
}
else
{
typedef matrix_solver_SOR_t<m_N,_storage_N> solver_GS;
//typedef matrix_solver_GMRES_t<m_N,_storage_N> solver_GS;
//typedef matrix_solver_SOR_t<m_N,_storage_N> solver_GS;
typedef matrix_solver_GMRES_t<m_N,_storage_N> solver_GS;
return palloc(solver_GS, &m_params, size);
}
}
@ -500,6 +501,7 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
switch (net_count)
{
#if 1
case 1:
ms = create_solver<1,1>(1, gs_threshold, use_specific);
break;
@ -530,7 +532,9 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
case 87:
ms = create_solver<87,87>(87, gs_threshold, use_specific);
break;
#endif
default:
#if 0
if (net_count <= 16)
{
ms = create_solver<0,16>(net_count, gs_threshold, use_specific);
@ -543,7 +547,9 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
{
ms = create_solver<0,64>(net_count, gs_threshold, use_specific);
}
else if (net_count <= 128)
else
#endif
if (net_count <= 128)
{
ms = create_solver<0,128>(net_count, gs_threshold, use_specific);
}
@ -580,5 +586,3 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
}
NETLIB_NAMESPACE_DEVICES_END()
#include "mgmres.cpp"

View File

@ -0,0 +1,101 @@
// license:GPL-2.0+
// copyright-holders:Couriersud
/*
* vector_base.h
*
* Base vector operations
*
*/
#ifndef VECTOR_BASE_H_
#define VECTOR_BASE_H_
#include <algorithm>
#include "../plib/pconfig.h"
#if 0
template <unsigned _storage_N>
struct pvector
{
pvector(unsigned size)
: m_N(size) { }
unsigned size() {
if (_storage_N)
}
double m_V[_storage_N];
private:
unsigned m_N;
};
#endif
inline void vec_set (const unsigned n, const double &scalar, double * RESTRICT result)
{
for ( unsigned i = 0; i < n; i++ )
result[i] = scalar;
}
inline double vecmult (const unsigned n, const double * RESTRICT a1, const double * RESTRICT a2 )
{
double value = 0.0;
for ( unsigned i = 0; i < n; i++ )
value = value + a1[i] * a2[i];
return value;
}
inline double vecmult2 (const unsigned n, const double *a1)
{
double value = 0.0;
for ( unsigned i = 0; i < n; i++ )
{
const double temp = a1[i];
value = value + temp * temp;
}
return value;
}
inline void vec_mult_scalar (const int n, const double * RESTRICT v, const double scalar, double * RESTRICT result)
{
for ( unsigned i = 0; i < n; i++ )
{
result[i] = scalar * v[i];
}
}
inline void vec_add_mult_scalar (const int n, const double * RESTRICT v, const double scalar, double * RESTRICT result)
{
for ( unsigned i = 0; i < n; i++ )
{
result[i] += scalar * v[i];
}
}
inline void vec_add_ip(const int n, const double * RESTRICT v, double * RESTRICT result)
{
for ( unsigned i = 0; i < n; i++ )
{
result[i] += v[i];
}
}
inline void vec_sub(const int n, const double * RESTRICT v1, const double * RESTRICT v2, double * RESTRICT result)
{
for ( unsigned i = 0; i < n; i++ )
result[i] = v1[i] - v2[i];
}
inline void vec_scale (const int n, double * RESTRICT v, const double scalar)
{
for ( unsigned i = 0; i < n; i++ )
{
v[i] = scalar * v[i];
}
}
#endif /* MAT_CR_H_ */