Actual source code: epskrylov.c
slepc-3.8.0 2017-10-20
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2017, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Common subroutines for all Krylov-type solvers
12: */
14: #include <slepc/private/epsimpl.h>
15: #include <slepc/private/slepcimpl.h>
16: #include <slepcblaslapack.h>
18: /*
19: EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
20: columns are assumed to be locked and therefore they are not modified. On
21: exit, the following relation is satisfied:
23: OP * V - V * H = beta*v_m * e_m^T
25: where the columns of V are the Arnoldi vectors (which are B-orthonormal),
26: H is an upper Hessenberg matrix, e_m is the m-th vector of the canonical basis.
27: On exit, beta contains the B-norm of V[m] before normalization.
28: */
29: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscBool trans,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
30: {
32: PetscScalar *a;
33: PetscInt j,nc,n,m = *M;
34: Vec vj,vj1,buf;
37: BVSetActiveColumns(eps->V,0,m);
38: for (j=k;j<m;j++) {
39: BVGetColumn(eps->V,j,&vj);
40: BVGetColumn(eps->V,j+1,&vj1);
41: if (trans) {
42: STApplyTranspose(eps->st,vj,vj1);
43: } else {
44: STApply(eps->st,vj,vj1);
45: }
46: BVRestoreColumn(eps->V,j,&vj);
47: BVRestoreColumn(eps->V,j+1,&vj1);
48: BVOrthonormalizeColumn(eps->V,j+1,PETSC_FALSE,beta,breakdown);
49: if (*breakdown) {
50: *M = j+1;
51: break;
52: }
53: }
54: /* extract Hessenberg matrix from the BV object */
55: BVGetNumConstraints(eps->V,&nc);
56: BVGetSizes(eps->V,NULL,NULL,&n);
57: BVGetBufferVec(eps->V,&buf);
58: VecGetArray(buf,&a);
59: for (j=k;j<*M;j++) {
60: PetscMemcpy(H+j*ldh,a+nc+(j+1)*(nc+n),(j+2)*sizeof(PetscScalar));
61: }
62: VecRestoreArray(buf,&a);
63: return(0);
64: }
66: /*
67: EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
68: performs the computation in a different way. The main idea is that
69: reorthogonalization is delayed to the next Arnoldi step. This version is
70: more scalable but in some cases convergence may stagnate.
71: */
72: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
73: {
75: PetscInt i,j,m=*M;
76: Vec u,t;
77: PetscScalar shh[100],*lhh,dot,dot2;
78: PetscReal norm1=0.0,norm2=1.0;
79: Vec vj,vj1,vj2;
82: if (m<=100) lhh = shh;
83: else {
84: PetscMalloc1(m,&lhh);
85: }
86: BVCreateVec(eps->V,&u);
87: BVCreateVec(eps->V,&t);
89: BVSetActiveColumns(eps->V,0,m);
90: for (j=k;j<m;j++) {
91: BVGetColumn(eps->V,j,&vj);
92: BVGetColumn(eps->V,j+1,&vj1);
93: STApply(eps->st,vj,vj1);
94: BVRestoreColumn(eps->V,j,&vj);
95: BVRestoreColumn(eps->V,j+1,&vj1);
97: BVDotColumnBegin(eps->V,j+1,H+ldh*j);
98: if (j>k) {
99: BVDotColumnBegin(eps->V,j,lhh);
100: BVGetColumn(eps->V,j,&vj);
101: VecDotBegin(vj,vj,&dot);
102: }
103: if (j>k+1) {
104: BVNormVecBegin(eps->V,u,NORM_2,&norm2);
105: BVGetColumn(eps->V,j-2,&vj2);
106: VecDotBegin(u,vj2,&dot2);
107: }
109: BVDotColumnEnd(eps->V,j+1,H+ldh*j);
110: if (j>k) {
111: BVDotColumnEnd(eps->V,j,lhh);
112: VecDotEnd(vj,vj,&dot);
113: BVRestoreColumn(eps->V,j,&vj);
114: }
115: if (j>k+1) {
116: BVNormVecEnd(eps->V,u,NORM_2,&norm2);
117: VecDotEnd(u,vj2,&dot2);
118: BVRestoreColumn(eps->V,j-2,&vj2);
119: }
121: if (j>k) {
122: norm1 = PetscSqrtReal(PetscRealPart(dot));
123: for (i=0;i<j;i++)
124: H[ldh*j+i] = H[ldh*j+i]/norm1;
125: H[ldh*j+j] = H[ldh*j+j]/dot;
127: BVCopyVec(eps->V,j,t);
128: BVScaleColumn(eps->V,j,1.0/norm1);
129: BVScaleColumn(eps->V,j+1,1.0/norm1);
130: }
132: BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
134: if (j>k) {
135: BVSetActiveColumns(eps->V,0,j);
136: BVMultVec(eps->V,-1.0,1.0,t,lhh);
137: BVSetActiveColumns(eps->V,0,m);
138: for (i=0;i<j;i++)
139: H[ldh*(j-1)+i] += lhh[i];
140: }
142: if (j>k+1) {
143: BVGetColumn(eps->V,j-1,&vj1);
144: VecCopy(u,vj1);
145: BVRestoreColumn(eps->V,j-1,&vj1);
146: BVScaleColumn(eps->V,j-1,1.0/norm2);
147: H[ldh*(j-2)+j-1] = norm2;
148: }
150: if (j<m-1) {
151: VecCopy(t,u);
152: }
153: }
155: BVNormVec(eps->V,t,NORM_2,&norm2);
156: VecScale(t,1.0/norm2);
157: BVGetColumn(eps->V,m-1,&vj1);
158: VecCopy(t,vj1);
159: BVRestoreColumn(eps->V,m-1,&vj1);
160: H[ldh*(m-2)+m-1] = norm2;
162: BVDotColumn(eps->V,m,lhh);
164: BVMultColumn(eps->V,-1.0,1.0,m,lhh);
165: for (i=0;i<m;i++)
166: H[ldh*(m-1)+i] += lhh[i];
168: BVNormColumn(eps->V,m,NORM_2,beta);
169: BVScaleColumn(eps->V,m,1.0 / *beta);
170: *breakdown = PETSC_FALSE;
172: if (m>100) { PetscFree(lhh); }
173: VecDestroy(&u);
174: VecDestroy(&t);
175: return(0);
176: }
178: /*
179: EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi,
180: but without reorthogonalization (only delayed normalization).
181: */
182: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
183: {
185: PetscInt i,j,m=*M;
186: PetscScalar dot;
187: PetscReal norm=0.0;
188: Vec vj,vj1;
191: BVSetActiveColumns(eps->V,0,m);
192: for (j=k;j<m;j++) {
193: BVGetColumn(eps->V,j,&vj);
194: BVGetColumn(eps->V,j+1,&vj1);
195: STApply(eps->st,vj,vj1);
196: BVRestoreColumn(eps->V,j+1,&vj1);
197: BVDotColumnBegin(eps->V,j+1,H+ldh*j);
198: if (j>k) {
199: VecDotBegin(vj,vj,&dot);
200: }
201: BVDotColumnEnd(eps->V,j+1,H+ldh*j);
202: if (j>k) {
203: VecDotEnd(vj,vj,&dot);
204: }
205: BVRestoreColumn(eps->V,j,&vj);
207: if (j>k) {
208: norm = PetscSqrtReal(PetscRealPart(dot));
209: BVScaleColumn(eps->V,j,1.0/norm);
210: H[ldh*(j-1)+j] = norm;
212: for (i=0;i<j;i++)
213: H[ldh*j+i] = H[ldh*j+i]/norm;
214: H[ldh*j+j] = H[ldh*j+j]/dot;
215: BVScaleColumn(eps->V,j+1,1.0/norm);
216: *beta = norm;
217: }
218: BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
219: }
221: *breakdown = PETSC_FALSE;
222: return(0);
223: }
225: /*
226: EPSKrylovConvergence_Filter - Specialized version for STFILTER.
227: */
228: PetscErrorCode EPSKrylovConvergence_Filter(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal gamma,PetscInt *kout)
229: {
231: PetscInt k,ninside,nconv;
232: PetscScalar re,im;
233: PetscReal resnorm;
236: ninside = 0; /* count how many eigenvalues are located in the interval */
237: for (k=kini;k<kini+nits;k++) {
238: if (PetscRealPart(eps->eigr[k]) < gamma) break;
239: ninside++;
240: }
241: eps->nev = ninside+kini; /* adjust eigenvalue count */
242: nconv = 0; /* count how many eigenvalues satisfy the convergence criterion */
243: for (k=kini;k<kini+ninside;k++) {
244: /* eigenvalue */
245: re = eps->eigr[k];
246: im = eps->eigi[k];
247: DSVectors(eps->ds,DS_MAT_X,&k,&resnorm);
248: resnorm *= beta;
249: /* error estimate */
250: (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
251: if (eps->errest[k] < eps->tol) nconv++;
252: else break;
253: }
254: *kout = kini+nconv;
255: PetscInfo4(eps,"Found %D eigenvalue approximations inside the inverval (gamma=%g), k=%D nconv=%D\n",ninside,(double)gamma,k,nconv);
256: return(0);
257: }
259: /*
260: EPSKrylovConvergence - Implements the loop that checks for convergence
261: in Krylov methods.
263: Input Parameters:
264: eps - the eigensolver; some error estimates are updated in eps->errest
265: getall - whether all residuals must be computed
266: kini - initial value of k (the loop variable)
267: nits - number of iterations of the loop
268: V - set of basis vectors (used only if trueresidual is activated)
269: nv - number of vectors to process (dimension of Q, columns of V)
270: beta - norm of f (the residual vector of the Arnoldi/Lanczos factorization)
271: corrf - correction factor for residual estimates (only in harmonic KS)
273: Output Parameters:
274: kout - the first index where the convergence test failed
275: */
276: PetscErrorCode EPSKrylovConvergence(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal corrf,PetscInt *kout)
277: {
279: PetscInt k,newk,marker,ld,inside;
280: PetscScalar re,im,*Zr,*Zi,*X;
281: PetscReal resnorm,gamma;
282: PetscBool isshift,isfilter,refined,istrivial;
283: Vec x,y,w[3];
286: if (eps->which == EPS_ALL) {
287: PetscObjectTypeCompare((PetscObject)eps->st,STFILTER,&isfilter);
288: if (isfilter) {
289: STFilterGetThreshold(eps->st,&gamma);
290: EPSKrylovConvergence_Filter(eps,getall,kini,nits,beta,gamma,kout);
291: return(0);
292: }
293: }
294: RGIsTrivial(eps->rg,&istrivial);
295: if (eps->trueres) {
296: BVCreateVec(eps->V,&x);
297: BVCreateVec(eps->V,&y);
298: BVCreateVec(eps->V,&w[0]);
299: BVCreateVec(eps->V,&w[2]);
300: #if !defined(PETSC_USE_COMPLEX)
301: BVCreateVec(eps->V,&w[1]);
302: #else
303: w[1] = NULL;
304: #endif
305: }
306: DSGetLeadingDimension(eps->ds,&ld);
307: DSGetRefined(eps->ds,&refined);
308: PetscObjectTypeCompare((PetscObject)eps->st,STSHIFT,&isshift);
309: marker = -1;
310: if (eps->trackall) getall = PETSC_TRUE;
311: for (k=kini;k<kini+nits;k++) {
312: /* eigenvalue */
313: re = eps->eigr[k];
314: im = eps->eigi[k];
315: if (!istrivial || eps->trueres || isshift || eps->conv==EPS_CONV_NORM) {
316: STBackTransform(eps->st,1,&re,&im);
317: }
318: if (!istrivial) {
319: RGCheckInside(eps->rg,1,&re,&im,&inside);
320: if (marker==-1 && inside<0) marker = k;
321: if (!(eps->trueres || isshift || eps->conv==EPS_CONV_NORM)) { /* make sure eps->converged below uses the right value */
322: re = eps->eigr[k];
323: im = eps->eigi[k];
324: }
325: }
326: newk = k;
327: DSVectors(eps->ds,DS_MAT_X,&newk,&resnorm);
328: if (eps->trueres) {
329: DSGetArray(eps->ds,DS_MAT_X,&X);
330: Zr = X+k*ld;
331: if (newk==k+1) Zi = X+newk*ld;
332: else Zi = NULL;
333: EPSComputeRitzVector(eps,Zr,Zi,eps->V,x,y);
334: DSRestoreArray(eps->ds,DS_MAT_X,&X);
335: EPSComputeResidualNorm_Private(eps,re,im,x,y,w,&resnorm);
336: }
337: else if (!refined) resnorm *= beta*corrf;
338: /* error estimate */
339: (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
340: if (marker==-1 && eps->errest[k] >= eps->tol) marker = k;
341: if (newk==k+1) {
342: eps->errest[k+1] = eps->errest[k];
343: k++;
344: }
345: if (marker!=-1 && !getall) break;
346: }
347: if (marker!=-1) k = marker;
348: *kout = k;
349: if (eps->trueres) {
350: VecDestroy(&x);
351: VecDestroy(&y);
352: VecDestroy(&w[0]);
353: VecDestroy(&w[2]);
354: #if !defined(PETSC_USE_COMPLEX)
355: VecDestroy(&w[1]);
356: #endif
357: }
358: return(0);
359: }
361: /*
362: EPSFullLanczos - Computes an m-step Lanczos factorization with full
363: reorthogonalization. At each Lanczos step, the corresponding Lanczos
364: vector is orthogonalized with respect to all previous Lanczos vectors.
365: This is equivalent to computing an m-step Arnoldi factorization and
366: exploting symmetry of the operator.
368: The first k columns are assumed to be locked and therefore they are
369: not modified. On exit, the following relation is satisfied:
371: OP * V - V * T = beta_m*v_m * e_m^T
373: where the columns of V are the Lanczos vectors (which are B-orthonormal),
374: T is a real symmetric tridiagonal matrix, and e_m is the m-th vector of
375: the canonical basis. The tridiagonal is stored as two arrays: alpha
376: contains the diagonal elements, beta the off-diagonal. On exit, the last
377: element of beta contains the B-norm of V[m] before normalization.
378: */
379: PetscErrorCode EPSFullLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscInt k,PetscInt *M,PetscBool *breakdown)
380: {
382: PetscScalar *a;
383: PetscInt j,nc,n,m = *M;
384: Vec vj,vj1,buf;
387: BVSetActiveColumns(eps->V,0,m);
388: for (j=k;j<m;j++) {
389: BVGetColumn(eps->V,j,&vj);
390: BVGetColumn(eps->V,j+1,&vj1);
391: STApply(eps->st,vj,vj1);
392: BVRestoreColumn(eps->V,j,&vj);
393: BVRestoreColumn(eps->V,j+1,&vj1);
394: BVOrthonormalizeColumn(eps->V,j+1,PETSC_FALSE,beta+j,breakdown);
395: if (*breakdown) {
396: *M = j+1;
397: break;
398: }
399: }
400: /* extract tridiagonal matrix from the BV object (only alpha, beta is already in its place) */
401: BVGetNumConstraints(eps->V,&nc);
402: BVGetSizes(eps->V,NULL,NULL,&n);
403: BVGetBufferVec(eps->V,&buf);
404: VecGetArray(buf,&a);
405: for (j=k;j<*M;j++) alpha[j] = PetscRealPart(a[nc+j+(j+1)*(nc+n)]);
406: VecRestoreArray(buf,&a);
407: return(0);
408: }
410: PetscErrorCode EPSPseudoLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscReal *cos,Vec w)
411: {
413: PetscInt j,m = *M,i,ld,l;
414: Vec vj,vj1;
415: PetscScalar *hwork,lhwork[100];
416: PetscReal norm,norm1,norm2,t,*f,sym=0.0,fro=0.0;
417: PetscBLASInt j_,one=1;
420: DSGetLeadingDimension(eps->ds,&ld);
421: DSGetDimensions(eps->ds,NULL,NULL,&l,NULL,NULL);
422: if (cos) *cos = 1.0;
423: if (m > 100) {
424: PetscMalloc1(m,&hwork);
425: } else hwork = lhwork;
427: BVSetActiveColumns(eps->V,0,m);
428: for (j=k;j<m;j++) {
429: BVGetColumn(eps->V,j,&vj);
430: BVGetColumn(eps->V,j+1,&vj1);
431: STApply(eps->st,vj,vj1);
432: BVRestoreColumn(eps->V,j,&vj);
433: BVRestoreColumn(eps->V,j+1,&vj1);
434: BVOrthogonalizeColumn(eps->V,j+1,hwork,&norm,breakdown);
435: alpha[j] = PetscRealPart(hwork[j]);
436: beta[j] = PetscAbsReal(norm);
437: DSGetArrayReal(eps->ds,DS_MAT_T,&f);
438: if (j==k) {
439: for (i=l;i<j-1;i++) hwork[i]-= f[2*ld+i];
440: for (i=0;i<l;i++) hwork[i] = 0.0;
441: }
442: DSRestoreArrayReal(eps->ds,DS_MAT_T,&f);
443: hwork[j-1] -= beta[j-1];
444: PetscBLASIntCast(j,&j_);
445: sym = SlepcAbs(BLASnrm2_(&j_,hwork,&one),sym);
446: fro = SlepcAbs(fro,SlepcAbs(alpha[j],beta[j]));
447: if (j>0) fro = SlepcAbs(fro,beta[j-1]);
448: if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*eps->tol)) { *symmlost = PETSC_TRUE; *M=j+1; break; }
449: omega[j+1] = (norm<0.0)? -1.0: 1.0;
450: BVScaleColumn(eps->V,j+1,1.0/norm);
451: /* */
452: if (cos) {
453: BVGetColumn(eps->V,j+1,&vj1);
454: VecNorm(vj1,NORM_2,&norm1);
455: BVApplyMatrix(eps->V,vj1,w);
456: BVRestoreColumn(eps->V,j+1,&vj1);
457: VecNorm(w,NORM_2,&norm2);
458: t = 1.0/(norm1*norm2);
459: if (*cos>t) *cos = t;
460: }
461: }
462: if (m > 100) {
463: PetscFree(hwork);
464: }
465: return(0);
466: }