Actual source code: ks-slice.c
slepc-3.8.0 2017-10-20
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2017, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: SLEPc eigensolver: "krylovschur"
13: Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems
15: References:
17: [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
18: solving sparse symmetric generalized eigenproblems", SIAM J.
19: Matrix Anal. Appl. 15(1):228-272, 1994.
21: [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
22: on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
23: 2012.
24: */
26: #include <slepc/private/epsimpl.h>
27: #include "krylovschur.h"
29: static PetscBool cited = PETSC_FALSE;
30: static const char citation[] =
31: "@Article{slepc-slice,\n"
32: " author = \"C. Campos and J. E. Roman\",\n"
33: " title = \"Strategies for spectrum slicing based on restarted {Lanczos} methods\",\n"
34: " journal = \"Numer. Algorithms\",\n"
35: " volume = \"60\",\n"
36: " number = \"2\",\n"
37: " pages = \"279--295\",\n"
38: " year = \"2012,\"\n"
39: " doi = \"https://doi.org/10.1007/s11075-012-9564-z\"\n"
40: "}\n";
42: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON
44: static PetscErrorCode EPSSliceResetSR(EPS eps) {
45: PetscErrorCode ierr;
46: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
47: EPS_SR sr=ctx->sr;
48: EPS_shift s;
51: if (sr) {
52: if (ctx->npart>1) {
53: BVDestroy(&sr->V);
54: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
55: }
56: /* Reviewing list of shifts to free memory */
57: s = sr->s0;
58: if (s) {
59: while (s->neighb[1]) {
60: s = s->neighb[1];
61: PetscFree(s->neighb[0]);
62: }
63: PetscFree(s);
64: }
65: PetscFree(sr);
66: }
67: ctx->sr = NULL;
68: return(0);
69: }
71: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
72: {
73: PetscErrorCode ierr;
74: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
77: if (!ctx->global) return(0);
78: /* Destroy auxiliary EPS */
79: EPSSliceResetSR(ctx->eps);
80: EPSDestroy(&ctx->eps);
81: if (ctx->npart>1) {
82: PetscSubcommDestroy(&ctx->subc);
83: if (ctx->commset) {
84: MPI_Comm_free(&ctx->commrank);
85: ctx->commset = PETSC_FALSE;
86: }
87: }
88: PetscFree(ctx->subintervals);
89: PetscFree(ctx->nconv_loc);
90: EPSSliceResetSR(eps);
91: PetscFree(ctx->inertias);
92: PetscFree(ctx->shifts);
93: if (ctx->npart>1) {
94: ISDestroy(&ctx->isrow);
95: ISDestroy(&ctx->iscol);
96: MatDestroyMatrices(1,&ctx->submata);
97: MatDestroyMatrices(1,&ctx->submatb);
98: }
99: return(0);
100: }
102: /*
103: EPSSliceAllocateSolution - Allocate memory storage for common variables such
104: as eigenvalues and eigenvectors. The argument extra is used for methods
105: that require a working basis slightly larger than ncv.
106: */
107: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
108: {
109: PetscErrorCode ierr;
110: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
111: PetscReal eta;
112: PetscInt k;
113: PetscLogDouble cnt;
114: BVType type;
115: BVOrthogType orthog_type;
116: BVOrthogRefineType orthog_ref;
117: BVOrthogBlockType ob_type;
118: Mat matrix;
119: Vec t;
120: EPS_SR sr = ctx->sr;
123: /* allocate space for eigenvalues and friends */
124: k = PetscMax(1,sr->numEigs);
125: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
126: PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
127: cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
128: PetscLogObjectMemory((PetscObject)eps,cnt);
130: /* allocate sr->V and transfer options from eps->V */
131: BVDestroy(&sr->V);
132: BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
133: PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
134: if (!eps->V) { EPSGetBV(eps,&eps->V); }
135: if (!((PetscObject)(eps->V))->type_name) {
136: BVSetType(sr->V,BVSVEC);
137: } else {
138: BVGetType(eps->V,&type);
139: BVSetType(sr->V,type);
140: }
141: STMatCreateVecsEmpty(eps->st,&t,NULL);
142: BVSetSizesFromVec(sr->V,t,k);
143: VecDestroy(&t);
144: EPS_SetInnerProduct(eps);
145: BVGetMatrix(eps->V,&matrix,NULL);
146: BVSetMatrix(sr->V,matrix,PETSC_FALSE);
147: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
148: BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
149: return(0);
150: }
152: static PetscErrorCode EPSSliceGetEPS(EPS eps)
153: {
154: PetscErrorCode ierr;
155: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
156: BV V;
157: BVType type;
158: PetscReal eta;
159: BVOrthogType orthog_type;
160: BVOrthogRefineType orthog_ref;
161: BVOrthogBlockType ob_type;
162: Mat A,B=NULL,Ar,Br=NULL;
163: PetscInt i;
164: PetscReal h,a,b;
165: PetscMPIInt rank;
166: EPS_SR sr=ctx->sr;
167: PC pc;
168: PCType pctype;
169: KSP ksp;
170: KSPType ksptype;
171: STType sttype;
172: PetscObjectState Astate,Bstate=0;
173: PetscObjectId Aid,Bid=0;
174: const MatSolverPackage stype;
177: EPSGetOperators(eps,&A,&B);
178: if (ctx->npart==1) {
179: if (!ctx->eps) { EPSCreate(((PetscObject)eps)->comm,&ctx->eps); }
180: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
181: EPSSetOperators(ctx->eps,A,B);
182: a = eps->inta; b = eps->intb;
183: } else {
184: PetscObjectStateGet((PetscObject)A,&Astate);
185: PetscObjectGetId((PetscObject)A,&Aid);
186: if (B) {
187: PetscObjectStateGet((PetscObject)B,&Bstate);
188: PetscObjectGetId((PetscObject)B,&Bid);
189: }
190: if (!ctx->subc) {
191: /* Create context for subcommunicators */
192: PetscSubcommCreate(PetscObjectComm((PetscObject)eps),&ctx->subc);
193: PetscSubcommSetNumber(ctx->subc,ctx->npart);
194: PetscSubcommSetType(ctx->subc,PETSC_SUBCOMM_CONTIGUOUS);
195: PetscLogObjectMemory((PetscObject)eps,sizeof(PetscSubcomm));
197: /* Duplicate matrices */
198: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
199: ctx->Astate = Astate;
200: ctx->Aid = Aid;
201: if (B) {
202: MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br);
203: ctx->Bstate = Bstate;
204: ctx->Bid = Bid;
205: }
206: } else {
207: if (ctx->Astate != Astate || (B && ctx->Bstate != Bstate) || ctx->Aid != Aid || (B && ctx->Bid != Bid)) {
208: EPSGetOperators(ctx->eps,&Ar,&Br);
209: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
210: ctx->Astate = Astate;
211: ctx->Aid = Aid;
212: if (B) {
213: MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br);
214: ctx->Bstate = Bstate;
215: ctx->Bid = Bid;
216: }
217: EPSSetOperators(ctx->eps,Ar,Br);
218: MatDestroy(&Ar);
219: MatDestroy(&Br);
220: }
221: }
223: /* Determine subintervals */
224: if (!ctx->subintset) { /* uniform distribution if no set by user */
225: if (!sr->hasEnd) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Global interval must be bounded for splitting it in uniform subintervals");
226: h = (eps->intb-eps->inta)/ctx->npart;
227: a = eps->inta+ctx->subc->color*h;
228: b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
229: PetscFree(ctx->subintervals);
230: PetscMalloc1(ctx->npart+1,&ctx->subintervals);
231: for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
232: ctx->subintervals[ctx->npart] = eps->intb;
233: } else {
234: a = ctx->subintervals[ctx->subc->color];
235: b = ctx->subintervals[ctx->subc->color+1];
236: }
238: if (!ctx->eps) {
239: /* Create auxiliary EPS */
240: EPSCreate(PetscSubcommChild(ctx->subc),&ctx->eps);
241: EPSSetOperators(ctx->eps,Ar,Br);
242: MatDestroy(&Ar);
243: MatDestroy(&Br);
244: }
246: /* Create subcommunicator grouping processes with same rank */
247: if (ctx->commset) { MPI_Comm_free(&ctx->commrank); }
248: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
249: MPI_Comm_split(((PetscObject)eps)->comm,rank,ctx->subc->color,&ctx->commrank);
250: ctx->commset = PETSC_TRUE;
251: }
252: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
254: /* Transfer options for ST, KSP and PC */
255: STGetType(eps->st,&sttype);
256: STSetType(ctx->eps->st,sttype);
257: STGetKSP(eps->st,&ksp);
258: KSPGetType(ksp,&ksptype);
259: KSPGetPC(ksp,&pc);
260: PCGetType(pc,&pctype);
261: PCFactorGetMatSolverPackage(pc,&stype);
262: STGetKSP(ctx->eps->st,&ksp);
263: KSPSetType(ksp,ksptype);
264: KSPGetPC(ksp,&pc);
265: PCSetType(pc,pctype);
266: if (stype) { PCFactorSetMatSolverPackage(pc,stype); }
268: EPSSetConvergenceTest(ctx->eps,eps->conv);
269: EPSSetInterval(ctx->eps,a,b);
270: ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
271: ctx_local->npart = ctx->npart;
272: ctx_local->detect = ctx->detect;
273: ctx_local->global = PETSC_FALSE;
274: ctx_local->eps = eps;
275: ctx_local->subc = ctx->subc;
276: ctx_local->commrank = ctx->commrank;
278: EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
279: EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);
281: /* transfer options from eps->V */
282: EPSGetBV(ctx->eps,&V);
283: if (!eps->V) { EPSGetBV(eps,&eps->V); }
284: if (!((PetscObject)(eps->V))->type_name) {
285: BVSetType(V,BVSVEC);
286: } else {
287: BVGetType(eps->V,&type);
288: BVSetType(V,type);
289: }
290: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
291: BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
292: ctx->eps->which = eps->which;
293: ctx->eps->max_it = eps->max_it;
294: ctx->eps->tol = eps->tol;
295: ctx->eps->purify = eps->purify;
296: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
297: EPSSetProblemType(ctx->eps,eps->problem_type);
298: EPSSetUp(ctx->eps);
299: ctx->eps->nconv = 0;
300: ctx->eps->its = 0;
301: for (i=0;i<ctx->eps->ncv;i++) {
302: ctx->eps->eigr[i] = 0.0;
303: ctx->eps->eigi[i] = 0.0;
304: ctx->eps->errest[i] = 0.0;
305: }
306: return(0);
307: }
309: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
310: {
312: KSP ksp;
313: PC pc;
314: Mat F;
315: PetscReal nzshift;
318: if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
319: if (inertia) *inertia = eps->n;
320: } else if (shift <= PETSC_MIN_REAL) {
321: if (inertia) *inertia = 0;
322: if (zeros) *zeros = 0;
323: } else {
324: /* If the shift is zero, perturb it to a very small positive value.
325: The goal is that the nonzero pattern is the same in all cases and reuse
326: the symbolic factorizations */
327: nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
328: STSetShift(eps->st,nzshift);
329: STSetUp(eps->st);
330: STGetKSP(eps->st,&ksp);
331: KSPGetPC(ksp,&pc);
332: PCFactorGetMatrix(pc,&F);
333: MatGetInertia(F,inertia,zeros,NULL);
334: }
335: return(0);
336: }
338: /*
339: Dummy backtransform operation
340: */
341: static PetscErrorCode EPSBackTransform_Skip(EPS eps)
342: {
344: return(0);
345: }
347: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
348: {
349: PetscErrorCode ierr;
350: PetscBool issinv;
351: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
352: EPS_SR sr,sr_loc,sr_glob;
353: PetscInt nEigs,dssz=1,i,zeros=0,off=0;
354: PetscMPIInt nproc,rank=0,aux;
355: PetscReal r;
356: MPI_Request req;
357: Mat A,B=NULL;
358: SlepcSC sc;
359: PetscInt flg=0;
360: DSParallelType ptype;
363: if (ctx->global) {
364: if (eps->intb >= PETSC_MAX_REAL && eps->inta <= PETSC_MIN_REAL) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"The defined computational interval should have at least one of their sides bounded");
365: if (!eps->ishermitian) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Spectrum slicing only available for symmetric/Hermitian eigenproblems");
366: if (eps->arbitrary) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Arbitrary selection of eigenpairs cannot be used with spectrum slicing");
367: PetscObjectTypeCompareAny((PetscObject)eps->st,&issinv,STSINVERT,STCAYLEY,"");
368: if (!issinv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Shift-and-invert or Cayley ST is needed for spectrum slicing");
369: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL*1e-2; /* use tighter tolerance */
370: if (!eps->max_it) eps->max_it = 100;
371: if (ctx->nev==1) ctx->nev = PetscMin(40,eps->n); /* nev not set, use default value */
372: if (eps->n>10 && ctx->nev<10) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"nev cannot be less than 10 in spectrum slicing runs");
373: }
374: eps->ops->backtransform = EPSBackTransform_Skip;
376: /* create spectrum slicing context and initialize it */
377: EPSSliceResetSR(eps);
378: PetscNewLog(eps,&sr);
379: ctx->sr = sr;
380: sr->itsKs = 0;
381: sr->nleap = 0;
382: sr->nMAXCompl = eps->nev/4;
383: sr->iterCompl = eps->max_it/4;
384: sr->sPres = NULL;
385: sr->nS = 0;
387: if (ctx->npart==1 || ctx->global) {
388: /* check presence of ends and finding direction */
389: if ((eps->inta > PETSC_MIN_REAL && !(ctx->subintervals && ctx->subintervals[0]==ctx->subintervals[1])) || eps->intb >= PETSC_MAX_REAL) {
390: sr->int0 = eps->inta;
391: sr->int1 = eps->intb;
392: sr->dir = 1;
393: if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
394: sr->hasEnd = PETSC_FALSE;
395: } else sr->hasEnd = PETSC_TRUE;
396: } else {
397: sr->int0 = eps->intb;
398: sr->int1 = eps->inta;
399: sr->dir = -1;
400: sr->hasEnd = PetscNot(eps->inta <= PETSC_MIN_REAL);
401: }
402: }
403: if (ctx->global) {
404: /* prevent computation of factorization in global eps */
405: STSetTransform(eps->st,PETSC_FALSE);
406: EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
407: /* create subintervals and initialize auxiliary eps for slicing runs */
408: EPSSliceGetEPS(eps);
409: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
410: if (ctx->npart>1) {
411: if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
412: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
413: if (!rank) {
414: MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
415: }
416: MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
417: PetscFree(ctx->nconv_loc);
418: PetscMalloc1(ctx->npart,&ctx->nconv_loc);
419: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
420: if (sr->dir<0) off = 1;
421: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
422: PetscMPIIntCast(sr_loc->numEigs,&aux);
423: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
424: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
425: } else {
426: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
427: if (!rank) {
428: PetscMPIIntCast(sr_loc->numEigs,&aux);
429: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
430: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
431: }
432: PetscMPIIntCast(ctx->npart,&aux);
433: MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
434: MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
435: }
436: nEigs = 0;
437: for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
438: } else {
439: nEigs = sr_loc->numEigs;
440: sr->inertia0 = sr_loc->inertia0;
441: }
442: sr->inertia1 = sr->inertia0+sr->dir*nEigs;
443: sr->numEigs = nEigs;
444: eps->nev = nEigs;
445: eps->ncv = nEigs;
446: eps->mpd = nEigs;
447: } else {
448: ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
449: sr_glob = ctx_glob->sr;
450: if (ctx->npart>1) {
451: sr->dir = sr_glob->dir;
452: sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
453: sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
454: if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
455: else sr->hasEnd = PETSC_TRUE;
456: }
458: /* compute inertia0 */
459: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
460: PetscOptionsGetInt(NULL,NULL,"-eps_krylovschur_hiteigenvalue",&flg,NULL);
461: if (zeros) { /* error in factorization */
462: if (sr->int0==ctx->eps->inta || sr->int0==ctx->eps->intb) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in the interval endpoint");
463: else if(ctx_glob->subintset && !flg) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
464: else {
465: if (flg==1) { /* idle subgroup */
466: sr->inertia0 = -1;
467: } else { /* perturb shift */
468: sr->int0 *= (1.0+SLICE_PTOL);
469: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
470: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",sr->int1);
471: }
472: }
473: }
474: if (ctx->npart>1) {
475: /* inertia1 is received from neighbour */
476: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
477: if (!rank) {
478: if ( sr->inertia0!=-1 && ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1)) ) { /* send inertia0 to neighbour0 */
479: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
480: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
481: }
482: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
483: MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
484: MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
485: }
486: if ( sr->inertia0==-1 && !(sr->dir>0 && ctx->subc->color==ctx->npart-1) && !(sr->dir<0 && ctx->subc->color==0)) {
487: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
488: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
489: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
490: }
491: }
492: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
493: MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
494: MPI_Bcast(&sr->int1,1,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
495: } else sr_glob->inertia1 = sr->inertia1;
496: }
498: /* last process in eps comm computes inertia1 */
499: if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
500: EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
501: if (zeros) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
502: if (!rank && sr->inertia0==-1) {
503: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
504: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
505: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
506: }
507: if (sr->hasEnd) {
508: sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
509: i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
510: }
511: }
513: /* number of eigenvalues in interval */
514: sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
515: if (ctx->npart>1) {
516: /* memory allocate for subinterval eigenpairs */
517: EPSSliceAllocateSolution(eps,1);
518: }
519: dssz = eps->ncv+1;
520: if (sr->numEigs>0) {
521: DSGetSlepcSC(eps->ds,&sc);
522: sc->rg = NULL;
523: sc->comparison = SlepcCompareLargestMagnitude;
524: sc->comparisonctx = NULL;
525: sc->map = NULL;
526: sc->mapobj = NULL;
527: }
528: DSGetParallel(ctx->eps->ds,&ptype);
529: DSSetParallel(eps->ds,ptype);
530: }
531: DSSetType(eps->ds,DSHEP);
532: DSSetCompact(eps->ds,PETSC_TRUE);
533: DSAllocate(eps->ds,dssz);
534: /* keep state of subcomm matrices to check that the user does not modify them */
535: EPSGetOperators(eps,&A,&B);
536: PetscObjectStateGet((PetscObject)A,&ctx->Astate);
537: PetscObjectGetId((PetscObject)A,&ctx->Aid);
538: if (B) {
539: PetscObjectStateGet((PetscObject)B,&ctx->Bstate);
540: PetscObjectGetId((PetscObject)B,&ctx->Bid);
541: } else {
542: ctx->Bstate=0;
543: ctx->Bid=0;
544: }
545: return(0);
546: }
548: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
549: {
550: PetscErrorCode ierr;
551: Vec v,vg,v_loc;
552: IS is1,is2;
553: VecScatter vec_sc;
554: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
555: PetscInt nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
556: PetscScalar *array;
557: EPS_SR sr_loc;
558: BV V_loc;
561: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
562: V_loc = sr_loc->V;
564: /* Gather parallel eigenvectors */
565: BVGetColumn(eps->V,0,&v);
566: VecGetOwnershipRange(v,&n0,&m0);
567: BVRestoreColumn(eps->V,0,&v);
568: BVGetColumn(ctx->eps->V,0,&v);
569: VecGetLocalSize(v,&nloc);
570: BVRestoreColumn(ctx->eps->V,0,&v);
571: PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
572: VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
573: idx = -1;
574: for (si=0;si<ctx->npart;si++) {
575: j = 0;
576: for (i=n0;i<m0;i++) {
577: idx1[j] = i;
578: idx2[j++] = i+eps->n*si;
579: }
580: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
581: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
582: BVGetColumn(eps->V,0,&v);
583: VecScatterCreate(v,is1,vg,is2,&vec_sc);
584: BVRestoreColumn(eps->V,0,&v);
585: ISDestroy(&is1);
586: ISDestroy(&is2);
587: for (i=0;i<ctx->nconv_loc[si];i++) {
588: BVGetColumn(eps->V,++idx,&v);
589: if (ctx->subc->color==si) {
590: BVGetColumn(V_loc,i,&v_loc);
591: VecGetArray(v_loc,&array);
592: VecPlaceArray(vg,array);
593: }
594: VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
595: VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
596: if (ctx->subc->color==si) {
597: VecResetArray(vg);
598: VecRestoreArray(v_loc,&array);
599: BVRestoreColumn(V_loc,i,&v_loc);
600: }
601: BVRestoreColumn(eps->V,idx,&v);
602: }
603: VecScatterDestroy(&vec_sc);
604: }
605: PetscFree2(idx1,idx2);
606: VecDestroy(&vg);
607: return(0);
608: }
610: /*
611: EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
612: */
613: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
614: {
615: PetscErrorCode ierr;
616: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
619: if (ctx->global && ctx->npart>1) {
620: EPSComputeVectors(ctx->eps);
621: EPSSliceGatherEigenVectors(eps);
622: }
623: return(0);
624: }
626: #define SWAP(a,b,t) {t=a;a=b;b=t;}
628: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
629: {
630: PetscErrorCode ierr;
631: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
632: PetscInt i=0,j,tmpi;
633: PetscReal v,tmpr;
634: EPS_shift s;
637: if (!eps->state) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Must call EPSSetUp() first");
638: if (!ctx->sr) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Only available in interval computations, see EPSSetInterval()");
639: if (!ctx->sr->s0) { /* EPSSolve not called yet */
640: *n = 2;
641: } else {
642: *n = 1;
643: s = ctx->sr->s0;
644: while (s) {
645: (*n)++;
646: s = s->neighb[1];
647: }
648: }
649: PetscMalloc1(*n,shifts);
650: PetscMalloc1(*n,inertias);
651: if (!ctx->sr->s0) { /* EPSSolve not called yet */
652: (*shifts)[0] = ctx->sr->int0;
653: (*shifts)[1] = ctx->sr->int1;
654: (*inertias)[0] = ctx->sr->inertia0;
655: (*inertias)[1] = ctx->sr->inertia1;
656: } else {
657: s = ctx->sr->s0;
658: while (s) {
659: (*shifts)[i] = s->value;
660: (*inertias)[i++] = s->inertia;
661: s = s->neighb[1];
662: }
663: (*shifts)[i] = ctx->sr->int1;
664: (*inertias)[i] = ctx->sr->inertia1;
665: }
666: /* remove possible duplicate in last position */
667: if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
668: /* sort result */
669: for (i=0;i<*n;i++) {
670: v = (*shifts)[i];
671: for (j=i+1;j<*n;j++) {
672: if (v > (*shifts)[j]) {
673: SWAP((*shifts)[i],(*shifts)[j],tmpr);
674: SWAP((*inertias)[i],(*inertias)[j],tmpi);
675: v = (*shifts)[i];
676: }
677: }
678: }
679: return(0);
680: }
682: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
683: {
684: PetscErrorCode ierr;
685: PetscMPIInt rank,nproc;
686: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
687: PetscInt i,idx,j;
688: PetscInt *perm_loc,off=0,*inertias_loc,ns;
689: PetscScalar *eigr_loc;
690: EPS_SR sr_loc;
691: PetscReal *shifts_loc;
692: PetscMPIInt *disp,*ns_loc,aux;
695: eps->nconv = 0;
696: for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
697: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
699: /* Gather the shifts used and the inertias computed */
700: EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
701: if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
702: if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
703: ns--;
704: for (i=0;i<ns;i++) {
705: inertias_loc[i] = inertias_loc[i+1];
706: shifts_loc[i] = shifts_loc[i+1];
707: }
708: }
709: PetscMalloc1(ctx->npart,&ns_loc);
710: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
711: PetscMPIIntCast(ns,&aux);
712: if (!rank) { MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank); }
713: PetscMPIIntCast(ctx->npart,&aux);
714: MPI_Bcast(ns_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
715: ctx->nshifts = 0;
716: for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
717: PetscFree(ctx->inertias);
718: PetscFree(ctx->shifts);
719: PetscMalloc1(ctx->nshifts,&ctx->inertias);
720: PetscMalloc1(ctx->nshifts,&ctx->shifts);
722: /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
723: eigr_loc = sr_loc->eigr;
724: perm_loc = sr_loc->perm;
725: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
726: PetscMalloc1(ctx->npart,&disp);
727: disp[0] = 0;
728: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
729: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
730: PetscMPIIntCast(sr_loc->numEigs,&aux);
731: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
732: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
733: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
734: PetscMPIIntCast(ns,&aux);
735: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
736: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
737: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
738: } else { /* subcommunicators with different size */
739: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
740: if (!rank) {
741: PetscMPIIntCast(sr_loc->numEigs,&aux);
742: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
743: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
744: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
745: PetscMPIIntCast(ns,&aux);
746: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
747: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
748: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
749: }
750: PetscMPIIntCast(eps->nconv,&aux);
751: MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,PetscSubcommChild(ctx->subc));
752: MPI_Bcast(eps->perm,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
753: MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
754: PetscMPIIntCast(ctx->nshifts,&aux);
755: MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
756: MPI_Bcast(&eps->its,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
757: }
758: /* Update global array eps->perm */
759: idx = ctx->nconv_loc[0];
760: for (i=1;i<ctx->npart;i++) {
761: off += ctx->nconv_loc[i-1];
762: for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
763: }
765: /* Gather parallel eigenvectors */
766: PetscFree(ns_loc);
767: PetscFree(disp);
768: PetscFree(shifts_loc);
769: PetscFree(inertias_loc);
770: return(0);
771: }
773: /*
774: Fills the fields of a shift structure
775: */
776: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
777: {
778: PetscErrorCode ierr;
779: EPS_shift s,*pending2;
780: PetscInt i;
781: EPS_SR sr;
782: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
785: sr = ctx->sr;
786: PetscNewLog(eps,&s);
787: s->value = val;
788: s->neighb[0] = neighb0;
789: if (neighb0) neighb0->neighb[1] = s;
790: s->neighb[1] = neighb1;
791: if (neighb1) neighb1->neighb[0] = s;
792: s->comp[0] = PETSC_FALSE;
793: s->comp[1] = PETSC_FALSE;
794: s->index = -1;
795: s->neigs = 0;
796: s->nconv[0] = s->nconv[1] = 0;
797: s->nsch[0] = s->nsch[1]=0;
798: /* Inserts in the stack of pending shifts */
799: /* If needed, the array is resized */
800: if (sr->nPend >= sr->maxPend) {
801: sr->maxPend *= 2;
802: PetscMalloc1(sr->maxPend,&pending2);
803: PetscLogObjectMemory((PetscObject)eps,sizeof(EPS_shift));
804: for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
805: PetscFree(sr->pending);
806: sr->pending = pending2;
807: }
808: sr->pending[sr->nPend++]=s;
809: return(0);
810: }
812: /* Prepare for Rational Krylov update */
813: static PetscErrorCode EPSPrepareRational(EPS eps)
814: {
815: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
816: PetscErrorCode ierr;
817: PetscInt dir,i,k,ld,nv;
818: PetscScalar *A;
819: EPS_SR sr = ctx->sr;
820: Vec v;
823: DSGetLeadingDimension(eps->ds,&ld);
824: dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
825: dir*=sr->dir;
826: k = 0;
827: for (i=0;i<sr->nS;i++) {
828: if (dir*PetscRealPart(sr->S[i])>0.0) {
829: sr->S[k] = sr->S[i];
830: sr->S[sr->nS+k] = sr->S[sr->nS+i];
831: BVGetColumn(sr->Vnext,k,&v);
832: BVCopyVec(eps->V,eps->nconv+i,v);
833: BVRestoreColumn(sr->Vnext,k,&v);
834: k++;
835: if (k>=sr->nS/2)break;
836: }
837: }
838: /* Copy to DS */
839: DSGetArray(eps->ds,DS_MAT_A,&A);
840: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
841: for (i=0;i<k;i++) {
842: A[i*(1+ld)] = sr->S[i];
843: A[k+i*ld] = sr->S[sr->nS+i];
844: }
845: sr->nS = k;
846: DSRestoreArray(eps->ds,DS_MAT_A,&A);
847: DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL,NULL);
848: DSSetDimensions(eps->ds,nv,0,0,k);
849: /* Append u to V */
850: BVGetColumn(sr->Vnext,sr->nS,&v);
851: BVCopyVec(eps->V,sr->nv,v);
852: BVRestoreColumn(sr->Vnext,sr->nS,&v);
853: return(0);
854: }
856: /* Provides next shift to be computed */
857: static PetscErrorCode EPSExtractShift(EPS eps)
858: {
859: PetscErrorCode ierr;
860: PetscInt iner,zeros=0;
861: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
862: EPS_SR sr;
863: PetscReal newShift;
864: EPS_shift sPres;
867: sr = ctx->sr;
868: if (sr->nPend > 0) {
869: sr->sPrev = sr->sPres;
870: sr->sPres = sr->pending[--sr->nPend];
871: sPres = sr->sPres;
872: EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
873: if (zeros) {
874: newShift = sPres->value*(1.0+SLICE_PTOL);
875: if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
876: else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
877: EPSSliceGetInertia(eps,newShift,&iner,&zeros);
878: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",newShift);
879: sPres->value = newShift;
880: }
881: sr->sPres->inertia = iner;
882: eps->target = sr->sPres->value;
883: eps->reason = EPS_CONVERGED_ITERATING;
884: eps->its = 0;
885: } else sr->sPres = NULL;
886: return(0);
887: }
889: /*
890: Symmetric KrylovSchur adapted to spectrum slicing:
891: Allows searching an specific amount of eigenvalues in the subintervals left and right.
892: Returns whether the search has succeeded
893: */
894: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
895: {
896: PetscErrorCode ierr;
897: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
898: PetscInt i,k,l,ld,nv,*iwork,j;
899: Mat U;
900: PetscScalar *Q,*A;
901: PetscReal *a,*b,beta;
902: PetscBool breakdown;
903: PetscInt count0,count1;
904: PetscReal lambda;
905: EPS_shift sPres;
906: PetscBool complIterating;
907: PetscBool sch0,sch1;
908: PetscInt iterCompl=0,n0,n1;
909: EPS_SR sr = ctx->sr;
912: /* Spectrum slicing data */
913: sPres = sr->sPres;
914: complIterating =PETSC_FALSE;
915: sch1 = sch0 = PETSC_TRUE;
916: DSGetLeadingDimension(eps->ds,&ld);
917: PetscMalloc1(2*ld,&iwork);
918: count0=0;count1=0; /* Found on both sides */
919: if (sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
920: /* Rational Krylov */
921: DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
922: DSGetDimensions(eps->ds,NULL,NULL,NULL,&l,NULL);
923: DSSetDimensions(eps->ds,l+1,0,0,0);
924: BVSetActiveColumns(eps->V,0,l+1);
925: DSGetMat(eps->ds,DS_MAT_Q,&U);
926: BVMultInPlace(eps->V,U,0,l+1);
927: MatDestroy(&U);
928: } else {
929: /* Get the starting Lanczos vector */
930: EPSGetStartVector(eps,0,NULL);
931: l = 0;
932: }
933: /* Restart loop */
934: while (eps->reason == EPS_CONVERGED_ITERATING) {
935: eps->its++; sr->itsKs++;
936: /* Compute an nv-step Lanczos factorization */
937: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
938: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
939: b = a + ld;
940: EPSFullLanczos(eps,a,b,eps->nconv+l,&nv,&breakdown);
941: sr->nv = nv;
942: beta = b[nv-1];
943: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
944: DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
945: if (l==0) {
946: DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
947: } else {
948: DSSetState(eps->ds,DS_STATE_RAW);
949: }
950: BVSetActiveColumns(eps->V,eps->nconv,nv);
952: /* Solve projected problem and compute residual norm estimates */
953: if (eps->its == 1 && l > 0) {/* After rational update */
954: DSGetArray(eps->ds,DS_MAT_A,&A);
955: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
956: b = a + ld;
957: k = eps->nconv+l;
958: A[k*ld+k-1] = A[(k-1)*ld+k];
959: A[k*ld+k] = a[k];
960: for (j=k+1; j< nv; j++) {
961: A[j*ld+j] = a[j];
962: A[j*ld+j-1] = b[j-1] ;
963: A[(j-1)*ld+j] = b[j-1];
964: }
965: DSRestoreArray(eps->ds,DS_MAT_A,&A);
966: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
967: DSSolve(eps->ds,eps->eigr,NULL);
968: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
969: DSSetCompact(eps->ds,PETSC_TRUE);
970: } else { /* Restart */
971: DSSolve(eps->ds,eps->eigr,NULL);
972: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
973: }
974: DSSynchronize(eps->ds,eps->eigr,NULL);
976: /* Residual */
977: EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,1.0,&k);
978: /* Checking values obtained for completing */
979: for (i=0;i<k;i++) {
980: sr->back[i]=eps->eigr[i];
981: }
982: STBackTransform(eps->st,k,sr->back,eps->eigi);
983: count0=count1=0;
984: for (i=0;i<k;i++) {
985: lambda = PetscRealPart(sr->back[i]);
986: if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
987: if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
988: }
989: if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
990: else {
991: /* Checks completion */
992: if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
993: eps->reason = EPS_CONVERGED_TOL;
994: } else {
995: if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
996: if (complIterating) {
997: if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
998: } else if (k >= eps->nev) {
999: n0 = sPres->nsch[0]-count0;
1000: n1 = sPres->nsch[1]-count1;
1001: if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
1002: /* Iterating for completion*/
1003: complIterating = PETSC_TRUE;
1004: if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
1005: if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
1006: iterCompl = sr->iterCompl;
1007: } else eps->reason = EPS_CONVERGED_TOL;
1008: }
1009: }
1010: }
1011: /* Update l */
1012: if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
1013: else l = nv-k;
1014: if (breakdown) l=0;
1015: if (!ctx->lock && l>0 && eps->reason == EPS_CONVERGED_ITERATING) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
1017: if (eps->reason == EPS_CONVERGED_ITERATING) {
1018: if (breakdown) {
1019: /* Start a new Lanczos factorization */
1020: PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
1021: EPSGetStartVector(eps,k,&breakdown);
1022: if (breakdown) {
1023: eps->reason = EPS_DIVERGED_BREAKDOWN;
1024: PetscInfo(eps,"Unable to generate more start vectors\n");
1025: }
1026: } else {
1027: /* Prepare the Rayleigh quotient for restart */
1028: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
1029: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1030: b = a + ld;
1031: for (i=k;i<k+l;i++) {
1032: a[i] = PetscRealPart(eps->eigr[i]);
1033: b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
1034: }
1035: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
1036: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1037: }
1038: }
1039: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
1040: DSGetMat(eps->ds,DS_MAT_Q,&U);
1041: BVMultInPlace(eps->V,U,eps->nconv,k+l);
1042: MatDestroy(&U);
1044: /* Normalize u and append it to V */
1045: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
1046: BVCopyColumn(eps->V,nv,k+l);
1047: }
1048: eps->nconv = k;
1049: if (eps->reason != EPS_CONVERGED_ITERATING) {
1050: /* Store approximated values for next shift */
1051: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1052: sr->nS = l;
1053: for (i=0;i<l;i++) {
1054: sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
1055: sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
1056: }
1057: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1058: }
1059: }
1060: /* Check for completion */
1061: for (i=0;i< eps->nconv; i++) {
1062: if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
1063: else sPres->nconv[0]++;
1064: }
1065: sPres->comp[0] = PetscNot(count0 < sPres->nsch[0]);
1066: sPres->comp[1] = PetscNot(count1 < sPres->nsch[1]);
1067: if (count0 > sPres->nsch[0] || count1 > sPres->nsch[1]) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1068: PetscFree(iwork);
1069: return(0);
1070: }
1072: /*
1073: Obtains value of subsequent shift
1074: */
1075: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
1076: {
1077: PetscReal lambda,d_prev;
1078: PetscInt i,idxP;
1079: EPS_SR sr;
1080: EPS_shift sPres,s;
1081: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1084: sr = ctx->sr;
1085: sPres = sr->sPres;
1086: if (sPres->neighb[side]) {
1087: /* Completing a previous interval */
1088: if (!sPres->neighb[side]->neighb[side] && sPres->neighb[side]->nconv[side]==0) { /* One of the ends might be too far from eigenvalues */
1089: if (side) *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[sr->indexEig-1]]))/2;
1090: else *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[0]]))/2;
1091: } else *newS=(sPres->value + sPres->neighb[side]->value)/2;
1092: } else { /* (Only for side=1). Creating a new interval. */
1093: if (sPres->neigs==0) {/* No value has been accepted*/
1094: if (sPres->neighb[0]) {
1095: /* Multiplying by 10 the previous distance */
1096: *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1097: sr->nleap++;
1098: /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1099: if (!sr->hasEnd && sr->nleap > 5) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unable to compute the wanted eigenvalues with open interval");
1100: } else { /* First shift */
1101: if (eps->nconv != 0) {
1102: /* Unaccepted values give information for next shift */
1103: idxP=0;/* Number of values left from shift */
1104: for (i=0;i<eps->nconv;i++) {
1105: lambda = PetscRealPart(eps->eigr[i]);
1106: if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1107: else break;
1108: }
1109: /* Avoiding subtraction of eigenvalues (might be the same).*/
1110: if (idxP>0) {
1111: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[0]))/(idxP+0.3);
1112: } else {
1113: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1114: }
1115: *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1116: } else { /* No values found, no information for next shift */
1117: SETERRQ(PetscObjectComm((PetscObject)eps),1,"First shift renders no information");
1118: }
1119: }
1120: } else { /* Accepted values found */
1121: sr->nleap = 0;
1122: /* Average distance of values in previous subinterval */
1123: s = sPres->neighb[0];
1124: while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1125: s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1126: }
1127: if (s) {
1128: d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1129: } else { /* First shift. Average distance obtained with values in this shift */
1130: /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1131: if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1132: d_prev = PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1133: } else {
1134: d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1135: }
1136: }
1137: /* Average distance is used for next shift by adding it to value on the right or to shift */
1138: if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1139: *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1140: } else { /* Last accepted value is on the left of shift. Adding to shift */
1141: *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1142: }
1143: }
1144: /* End of interval can not be surpassed */
1145: if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1146: }/* of neighb[side]==null */
1147: return(0);
1148: }
1150: /*
1151: Function for sorting an array of real values
1152: */
1153: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1154: {
1155: PetscReal re;
1156: PetscInt i,j,tmp;
1159: if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1160: /* Insertion sort */
1161: for (i=1;i<nr;i++) {
1162: re = PetscRealPart(r[perm[i]]);
1163: j = i-1;
1164: while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1165: tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1166: }
1167: }
1168: return(0);
1169: }
1171: /* Stores the pairs obtained since the last shift in the global arrays */
1172: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1173: {
1174: PetscErrorCode ierr;
1175: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1176: PetscReal lambda,err,norm;
1177: PetscInt i,count;
1178: PetscBool iscayley;
1179: EPS_SR sr = ctx->sr;
1180: EPS_shift sPres;
1181: Vec v,w;
1184: sPres = sr->sPres;
1185: sPres->index = sr->indexEig;
1186: count = sr->indexEig;
1187: /* Back-transform */
1188: STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1189: PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1190: /* Sort eigenvalues */
1191: sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1192: /* Values stored in global array */
1193: for (i=0;i<eps->nconv;i++) {
1194: lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1195: err = eps->errest[eps->perm[i]];
1197: if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1198: if (count>=sr->numEigs) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unexpected error in Spectrum Slicing");
1199: sr->eigr[count] = lambda;
1200: sr->errest[count] = err;
1201: /* Explicit purification */
1202: BVGetColumn(eps->V,eps->perm[i],&w);
1203: if (eps->purify) {
1204: BVGetColumn(sr->V,count,&v);
1205: STApply(eps->st,w,v);
1206: BVRestoreColumn(sr->V,count,&v);
1207: } else {
1208: BVInsertVec(sr->V,count,w);
1209: }
1210: BVRestoreColumn(eps->V,eps->perm[i],&w);
1211: BVNormColumn(sr->V,count,NORM_2,&norm);
1212: BVScaleColumn(sr->V,count,1.0/norm);
1213: count++;
1214: }
1215: }
1216: sPres->neigs = count - sr->indexEig;
1217: sr->indexEig = count;
1218: /* Global ordering array updating */
1219: sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1220: return(0);
1221: }
1223: static PetscErrorCode EPSLookForDeflation(EPS eps)
1224: {
1225: PetscErrorCode ierr;
1226: PetscReal val;
1227: PetscInt i,count0=0,count1=0;
1228: EPS_shift sPres;
1229: PetscInt ini,fin,k,idx0,idx1;
1230: EPS_SR sr;
1231: Vec v;
1232: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1235: sr = ctx->sr;
1236: sPres = sr->sPres;
1238: if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1239: else ini = 0;
1240: fin = sr->indexEig;
1241: /* Selection of ends for searching new values */
1242: if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1243: else sPres->ext[0] = sPres->neighb[0]->value;
1244: if (!sPres->neighb[1]) {
1245: if (sr->hasEnd) sPres->ext[1] = sr->int1;
1246: else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1247: } else sPres->ext[1] = sPres->neighb[1]->value;
1248: /* Selection of values between right and left ends */
1249: for (i=ini;i<fin;i++) {
1250: val=PetscRealPart(sr->eigr[sr->perm[i]]);
1251: /* Values to the right of left shift */
1252: if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1253: if ((sr->dir)*(val - sPres->value) < 0) count0++;
1254: else count1++;
1255: } else break;
1256: }
1257: /* The number of values on each side are found */
1258: if (sPres->neighb[0]) {
1259: sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1260: if (sPres->nsch[0]<0) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1261: } else sPres->nsch[0] = 0;
1263: if (sPres->neighb[1]) {
1264: sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1265: if (sPres->nsch[1]<0) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1266: } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);
1268: /* Completing vector of indexes for deflation */
1269: idx0 = ini;
1270: idx1 = ini+count0+count1;
1271: k=0;
1272: for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1273: BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1274: BVSetNumConstraints(sr->Vnext,k);
1275: for (i=0;i<k;i++) {
1276: BVGetColumn(sr->Vnext,-i-1,&v);
1277: BVCopyVec(sr->V,sr->idxDef[i],v);
1278: BVRestoreColumn(sr->Vnext,-i-1,&v);
1279: }
1281: /* For rational Krylov */
1282: if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) {
1283: EPSPrepareRational(eps);
1284: }
1285: eps->nconv = 0;
1286: /* Get rid of temporary Vnext */
1287: BVDestroy(&eps->V);
1288: eps->V = sr->Vnext;
1289: sr->Vnext = NULL;
1290: return(0);
1291: }
1293: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1294: {
1295: PetscErrorCode ierr;
1296: PetscInt i,lds,ti;
1297: PetscReal newS;
1298: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1299: EPS_SR sr=ctx->sr;
1300: Mat A,B=NULL;
1301: PetscObjectState Astate,Bstate=0;
1302: PetscObjectId Aid,Bid=0;
1305: PetscCitationsRegister(citation,&cited);
1306: if (ctx->global) {
1307: EPSSolve_KrylovSchur_Slice(ctx->eps);
1308: ctx->eps->state = EPS_STATE_SOLVED;
1309: eps->reason = EPS_CONVERGED_TOL;
1310: if (ctx->npart>1) {
1311: /* Gather solution from subsolvers */
1312: EPSSliceGatherSolution(eps);
1313: } else {
1314: eps->nconv = sr->numEigs;
1315: eps->its = ctx->eps->its;
1316: PetscFree(ctx->inertias);
1317: PetscFree(ctx->shifts);
1318: EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1319: }
1320: } else {
1321: if (ctx->npart==1) {
1322: sr->eigr = ctx->eps->eigr;
1323: sr->eigi = ctx->eps->eigi;
1324: sr->perm = ctx->eps->perm;
1325: sr->errest = ctx->eps->errest;
1326: sr->V = ctx->eps->V;
1327: }
1328: /* Check that the user did not modify subcomm matrices */
1329: EPSGetOperators(eps,&A,&B);
1330: PetscObjectStateGet((PetscObject)A,&Astate);
1331: PetscObjectGetId((PetscObject)A,&Aid);
1332: if (B) {
1333: PetscObjectStateGet((PetscObject)B,&Bstate);
1334: PetscObjectGetId((PetscObject)B,&Bid);
1335: }
1336: if (Astate!=ctx->Astate || (B && Bstate!=ctx->Bstate) || Aid!=ctx->Aid || (B && Bid!=ctx->Bid)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcomm matrices have been modified by user");
1337: /* Only with eigenvalues present in the interval ...*/
1338: if (sr->numEigs==0) {
1339: eps->reason = EPS_CONVERGED_TOL;
1340: return(0);
1341: }
1342: /* Array of pending shifts */
1343: sr->maxPend = 100; /* Initial size */
1344: sr->nPend = 0;
1345: PetscMalloc1(sr->maxPend,&sr->pending);
1346: PetscLogObjectMemory((PetscObject)eps,(sr->maxPend)*sizeof(EPS_shift));
1347: EPSCreateShift(eps,sr->int0,NULL,NULL);
1348: /* extract first shift */
1349: sr->sPrev = NULL;
1350: sr->sPres = sr->pending[--sr->nPend];
1351: sr->sPres->inertia = sr->inertia0;
1352: eps->target = sr->sPres->value;
1353: sr->s0 = sr->sPres;
1354: sr->indexEig = 0;
1355: /* Memory reservation for auxiliary variables */
1356: lds = PetscMin(eps->mpd,eps->ncv);
1357: PetscCalloc1(lds*lds,&sr->S);
1358: PetscMalloc1(eps->ncv,&sr->back);
1359: PetscLogObjectMemory((PetscObject)eps,(sr->numEigs+2*eps->ncv)*sizeof(PetscScalar));
1360: for (i=0;i<sr->numEigs;i++) {
1361: sr->eigr[i] = 0.0;
1362: sr->eigi[i] = 0.0;
1363: sr->errest[i] = 0.0;
1364: sr->perm[i] = i;
1365: }
1366: /* Vectors for deflation */
1367: PetscMalloc1(sr->numEigs,&sr->idxDef);
1368: PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1369: sr->indexEig = 0;
1370: /* Main loop */
1371: while (sr->sPres) {
1372: /* Search for deflation */
1373: EPSLookForDeflation(eps);
1374: /* KrylovSchur */
1375: EPSKrylovSchur_Slice(eps);
1377: EPSStoreEigenpairs(eps);
1378: /* Select new shift */
1379: if (!sr->sPres->comp[1]) {
1380: EPSGetNewShiftValue(eps,1,&newS);
1381: EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1382: }
1383: if (!sr->sPres->comp[0]) {
1384: /* Completing earlier interval */
1385: EPSGetNewShiftValue(eps,0,&newS);
1386: EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1387: }
1388: /* Preparing for a new search of values */
1389: EPSExtractShift(eps);
1390: }
1392: /* Updating eps values prior to exit */
1393: PetscFree(sr->S);
1394: PetscFree(sr->idxDef);
1395: PetscFree(sr->pending);
1396: PetscFree(sr->back);
1397: BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1398: BVSetNumConstraints(sr->Vnext,0);
1399: BVDestroy(&eps->V);
1400: eps->V = sr->Vnext;
1401: eps->nconv = sr->indexEig;
1402: eps->reason = EPS_CONVERGED_TOL;
1403: eps->its = sr->itsKs;
1404: eps->nds = 0;
1405: if (sr->dir<0) {
1406: for (i=0;i<eps->nconv/2;i++) {
1407: ti = sr->perm[i]; sr->perm[i] = sr->perm[eps->nconv-1-i]; sr->perm[eps->nconv-1-i] = ti;
1408: }
1409: }
1410: }
1411: return(0);
1412: }