Actual source code: ciss.c

slepc-3.8.0 2017-10-20
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2017, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc eigensolver: "ciss"

 13:    Method: Contour Integral Spectral Slicing

 15:    Algorithm:

 17:        Contour integral based on Sakurai-Sugiura method to construct a
 18:        subspace, with various eigenpair extractions (Rayleigh-Ritz,
 19:        explicit moment).

 21:    Based on code contributed by Y. Maeda, T. Sakurai.

 23:    References:

 25:        [1] T. Sakurai and H. Sugiura, "A projection method for generalized
 26:            eigenvalue problems", J. Comput. Appl. Math. 159:119-128, 2003.

 28:        [2] T. Sakurai and H. Tadano, "CIRR: a Rayleigh-Ritz type method with
 29:            contour integral for generalized eigenvalue problems", Hokkaido
 30:            Math. J. 36:745-757, 2007.
 31: */

 33: #include <slepc/private/epsimpl.h>                /*I "slepceps.h" I*/
 34: #include <slepcblaslapack.h>

 36: typedef struct {
 37:   /* parameters */
 38:   PetscInt          N;          /* number of integration points (32) */
 39:   PetscInt          L;          /* block size (16) */
 40:   PetscInt          M;          /* moment degree (N/4 = 4) */
 41:   PetscReal         delta;      /* threshold of singular value (1e-12) */
 42:   PetscInt          L_max;      /* maximum number of columns of the source matrix V */
 43:   PetscReal         spurious_threshold; /* discard spurious eigenpairs */
 44:   PetscBool         isreal;     /* A and B are real */
 45:   PetscInt          refine_inner;
 46:   PetscInt          refine_blocksize;
 47:   /* private data */
 48:   PetscReal         *sigma;     /* threshold for numerical rank */
 49:   PetscInt          num_subcomm;
 50:   PetscInt          subcomm_id;
 51:   PetscInt          num_solve_point;
 52:   PetscScalar       *weight;
 53:   PetscScalar       *omega;
 54:   PetscScalar       *pp;
 55:   BV                V;
 56:   BV                S;
 57:   BV                pV;
 58:   BV                Y;
 59:   Vec               xsub;
 60:   Vec               xdup;
 61:   KSP               *ksp;
 62:   PetscBool         useconj;
 63:   PetscReal         est_eig;
 64:   VecScatter        scatterin;
 65:   Mat               pA,pB;
 66:   PetscSubcomm      subcomm;
 67:   PetscBool         usest;
 68:   PetscBool         usest_set;  /* whether the user set the usest flag or not */
 69:   EPSCISSQuadRule   quad;
 70:   EPSCISSExtraction extraction;
 71: } EPS_CISS;

 73: static PetscErrorCode SetSolverComm(EPS eps)
 74: {
 76:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
 77:   PetscInt       N = ctx->N;

 80:   if (ctx->useconj) N = N/2;
 81:   PetscSubcommDestroy(&ctx->subcomm);
 82:   PetscSubcommCreate(PetscObjectComm((PetscObject)eps),&ctx->subcomm);
 83:   PetscSubcommSetNumber(ctx->subcomm,ctx->num_subcomm);
 84:   PetscSubcommSetType(ctx->subcomm,PETSC_SUBCOMM_INTERLACED);
 85:   PetscLogObjectMemory((PetscObject)eps,sizeof(PetscSubcomm));
 86:   PetscSubcommSetFromOptions(ctx->subcomm);
 87:   ctx->subcomm_id = ctx->subcomm->color;
 88:   ctx->num_solve_point = N / ctx->num_subcomm;
 89:   if ((N%ctx->num_subcomm) > ctx->subcomm_id) ctx->num_solve_point+=1;
 90:   return(0);
 91: }

 93: static PetscErrorCode CISSRedundantMat(EPS eps)
 94: {
 96:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
 97:   Mat            A,B;
 98:   PetscInt       nmat;

101:   STGetNumMatrices(eps->st,&nmat);
102:   if (ctx->subcomm->n != 1) {
103:     STGetMatrix(eps->st,0,&A);
104:     MatDestroy(&ctx->pA);
105:     MatCreateRedundantMatrix(A,ctx->subcomm->n,PetscSubcommChild(ctx->subcomm),MAT_INITIAL_MATRIX,&ctx->pA);
106:     if (nmat>1) {
107:       STGetMatrix(eps->st,1,&B);
108:       MatDestroy(&ctx->pB);
109:       MatCreateRedundantMatrix(B,ctx->subcomm->n,PetscSubcommChild(ctx->subcomm),MAT_INITIAL_MATRIX,&ctx->pB);
110:     } else ctx->pB = NULL;
111:   } else {
112:     ctx->pA = NULL;
113:     ctx->pB = NULL;
114:   }
115:   return(0);
116: }

118: static PetscErrorCode CISSScatterVec(EPS eps)
119: {
121:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
122:   IS             is1,is2;
123:   Vec            v0;
124:   PetscInt       i,j,k,mstart,mend,mlocal;
125:   PetscInt       *idx1,*idx2,mloc_sub;

128:   VecDestroy(&ctx->xsub);
129:   MatCreateVecs(ctx->pA,&ctx->xsub,NULL);

131:   VecDestroy(&ctx->xdup);
132:   MatGetLocalSize(ctx->pA,&mloc_sub,NULL);
133:   VecCreateMPI(PetscSubcommContiguousParent(ctx->subcomm),mloc_sub,PETSC_DECIDE,&ctx->xdup);

135:   VecScatterDestroy(&ctx->scatterin);
136:   BVGetColumn(ctx->V,0,&v0);
137:   VecGetOwnershipRange(v0,&mstart,&mend);
138:   mlocal = mend - mstart;
139:   PetscMalloc2(ctx->subcomm->n*mlocal,&idx1,ctx->subcomm->n*mlocal,&idx2);
140:   j = 0;
141:   for (k=0;k<ctx->subcomm->n;k++) {
142:     for (i=mstart;i<mend;i++) {
143:       idx1[j]   = i;
144:       idx2[j++] = i + eps->n*k;
145:     }
146:   }
147:   ISCreateGeneral(PetscObjectComm((PetscObject)eps),ctx->subcomm->n*mlocal,idx1,PETSC_COPY_VALUES,&is1);
148:   ISCreateGeneral(PetscObjectComm((PetscObject)eps),ctx->subcomm->n*mlocal,idx2,PETSC_COPY_VALUES,&is2);
149:   VecScatterCreate(v0,is1,ctx->xdup,is2,&ctx->scatterin);
150:   ISDestroy(&is1);
151:   ISDestroy(&is2);
152:   PetscFree2(idx1,idx2);
153:   BVRestoreColumn(ctx->V,0,&v0);
154:   return(0);
155: }

157: static PetscErrorCode SetPathParameter(EPS eps)
158: {
160:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
161:   PetscInt       i,j;
162:   PetscScalar    center=0.0,tmp,tmp2,*omegai;
163:   PetscReal      theta,radius=1.0,vscale,a,b,c,d,max_w=0.0,rgscale;
164: #if defined(PETSC_USE_COMPLEX)
165:   PetscReal      start_ang,end_ang;
166: #endif
167:   PetscBool      isring=PETSC_FALSE,isellipse=PETSC_FALSE,isinterval=PETSC_FALSE;

170:   PetscObjectTypeCompare((PetscObject)eps->rg,RGELLIPSE,&isellipse);
171:   PetscObjectTypeCompare((PetscObject)eps->rg,RGRING,&isring);
172:   PetscObjectTypeCompare((PetscObject)eps->rg,RGINTERVAL,&isinterval);
173:   RGGetScale(eps->rg,&rgscale);
174:   PetscMalloc1(ctx->N+1l,&omegai);
175:   RGComputeContour(eps->rg,ctx->N,ctx->omega,omegai);
176:   if (isellipse) {
177:     RGEllipseGetParameters(eps->rg,&center,&radius,&vscale);
178:     for (i=0;i<ctx->N;i++) {
179: #if defined(PETSC_USE_COMPLEX)
180:       theta = 2.0*PETSC_PI*(i+0.5)/ctx->N;
181:       ctx->pp[i] = PetscCosReal(theta)+vscale*PetscSinReal(theta)*PETSC_i;
182:       ctx->weight[i] = rgscale*radius*(vscale*PetscCosReal(theta)+PetscSinReal(theta)*PETSC_i)/(PetscReal)ctx->N;
183: #else
184:       theta = (PETSC_PI/ctx->N)*(i+0.5);
185:       ctx->pp[i] = PetscCosReal(theta);
186:       ctx->weight[i] = PetscCosReal((ctx->N-1)*theta)/ctx->N;
187:       ctx->omega[i] = rgscale*(center + radius*ctx->pp[i]);
188: #endif
189:     }
190:   } else if (ctx->quad == EPS_CISS_QUADRULE_CHEBYSHEV) {
191:     for (i=0;i<ctx->N;i++) {
192:       theta = (PETSC_PI/ctx->N)*(i+0.5);
193:       ctx->pp[i] = PetscCosReal(theta);
194:       ctx->weight[i] = PetscCosReal((ctx->N-1)*theta)/ctx->N;
195:     }
196:     if (isinterval) {
197:       RGIntervalGetEndpoints(eps->rg,&a,&b,&c,&d);
198:       if ((c!=d || c!=0.0) && (a!=b || a!=0.0)) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Endpoints of the imaginary axis or the real axis must be both zero");
199:       for (i=0;i<ctx->N;i++) {
200:         if (c==d) ctx->omega[i] = ((b-a)*(ctx->pp[i]+1.0)/2.0+a)*rgscale;
201:         if (a==b) {
202: #if defined(PETSC_USE_COMPLEX)
203:           ctx->omega[i] = ((d-c)*(ctx->pp[i]+1.0)/2.0+c)*rgscale*PETSC_i;
204: #else
205:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Integration points on a vertical line require complex arithmetic");
206: #endif
207:         }
208:       }
209:     }
210:     if (isring) {  /* only supported in complex scalars */
211: #if defined(PETSC_USE_COMPLEX)
212:       RGRingGetParameters(eps->rg,&center,&radius,&vscale,&start_ang,&end_ang,NULL);
213:       for (i=0;i<ctx->N;i++) {
214:         theta = (start_ang*2.0+(end_ang-start_ang)*(PetscRealPart(ctx->pp[i])+1.0))*PETSC_PI;
215:         ctx->omega[i] = rgscale*(center + radius*(PetscCosReal(theta)+PETSC_i*vscale*PetscSinReal(theta)));
216:       }
217: #endif
218:     }
219:   } else {
220:     if (isinterval) {
221:       RGIntervalGetEndpoints(eps->rg,&a,&b,&c,&d);
222:       center = rgscale*((b+a)/2.0+(d+c)/2.0*PETSC_PI);
223:       radius = PetscSqrtReal(PetscPowRealInt(rgscale*(b-a)/2.0,2)+PetscPowRealInt(rgscale*(d-c)/2.0,2));
224:     } else if (isring) {
225:       RGRingGetParameters(eps->rg,&center,&radius,NULL,NULL,NULL,NULL);
226:       center *= rgscale;
227:       radius *= rgscale;
228:     }
229:     for (i=0;i<ctx->N;i++) {
230:       ctx->pp[i] = (ctx->omega[i]-center)/radius;
231:       tmp = 1; tmp2 = 1;
232:       for (j=0;j<ctx->N;j++) {
233:         tmp *= ctx->omega[j];
234:         if (i != j) tmp2 *= ctx->omega[j]-ctx->omega[i];
235:       }
236:       ctx->weight[i] = tmp/tmp2;
237:       max_w = PetscMax(PetscAbsScalar(ctx->weight[i]),max_w);
238:     }
239:     for (i=0;i<ctx->N;i++) ctx->weight[i] /= (PetscScalar)max_w;
240:   }
241:   PetscFree(omegai);
242:   return(0);
243: }

245: static PetscErrorCode CISSVecSetRandom(BV V,PetscInt i0,PetscInt i1)
246: {
248:   PetscInt       i,j,nlocal;
249:   PetscScalar    *vdata;
250:   Vec            x;

253:   BVGetSizes(V,&nlocal,NULL,NULL);
254:   for (i=i0;i<i1;i++) {
255:     BVSetRandomColumn(V,i);
256:     BVGetColumn(V,i,&x);
257:     VecGetArray(x,&vdata);
258:     for (j=0;j<nlocal;j++) {
259:       vdata[j] = PetscRealPart(vdata[j]);
260:       if (PetscRealPart(vdata[j]) < 0.5) vdata[j] = -1.0;
261:       else vdata[j] = 1.0;
262:     }
263:     VecRestoreArray(x,&vdata);
264:     BVRestoreColumn(V,i,&x);
265:   }
266:   return(0);
267: }

269: static PetscErrorCode VecScatterVecs(EPS eps,BV Vin,PetscInt n)
270: {
271:   PetscErrorCode    ierr;
272:   EPS_CISS          *ctx = (EPS_CISS*)eps->data;
273:   PetscInt          i;
274:   Vec               vi,pvi;
275:   const PetscScalar *array;

278:   for (i=0;i<n;i++) {
279:     BVGetColumn(Vin,i,&vi);
280:     VecScatterBegin(ctx->scatterin,vi,ctx->xdup,INSERT_VALUES,SCATTER_FORWARD);
281:     VecScatterEnd(ctx->scatterin,vi,ctx->xdup,INSERT_VALUES,SCATTER_FORWARD);
282:     BVRestoreColumn(Vin,i,&vi);
283:     VecGetArrayRead(ctx->xdup,&array);
284:     VecPlaceArray(ctx->xsub,array);
285:     BVGetColumn(ctx->pV,i,&pvi);
286:     VecCopy(ctx->xsub,pvi);
287:     BVRestoreColumn(ctx->pV,i,&pvi);
288:     VecResetArray(ctx->xsub);
289:     VecRestoreArrayRead(ctx->xdup,&array);
290:   }
291:   return(0);
292: }

294: static PetscErrorCode SolveLinearSystem(EPS eps,Mat A,Mat B,BV V,PetscInt L_start,PetscInt L_end,PetscBool initksp)
295: {
297:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
298:   PetscInt       i,j,p_id;
299:   Mat            Fz,kspMat;
300:   PC             pc;
301:   Vec            Bvj,vj,yj;
302:   KSP            ksp;

305:   BVCreateVec(V,&Bvj);
306:   if (ctx->usest) {
307:     MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,&Fz);
308:   }
309:   for (i=0;i<ctx->num_solve_point;i++) {
310:     p_id = i*ctx->subcomm->n + ctx->subcomm_id;
311:     if (!ctx->usest && initksp == PETSC_TRUE) {
312:       MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,&kspMat);
313:       MatCopy(A,kspMat,DIFFERENT_NONZERO_PATTERN);
314:       if (B) {
315:         MatAXPY(kspMat,-ctx->omega[p_id],B,DIFFERENT_NONZERO_PATTERN);
316:       } else {
317:         MatShift(kspMat,-ctx->omega[p_id]);
318:       }
319:       KSPSetOperators(ctx->ksp[i],kspMat,kspMat);
320:       MatDestroy(&kspMat);
321:       KSPSetType(ctx->ksp[i],KSPPREONLY);
322:       KSPGetPC(ctx->ksp[i],&pc);
323:       PCSetType(pc,PCLU);
324:       KSPSetFromOptions(ctx->ksp[i]);
325:     } else if (ctx->usest) {
326:       STSetShift(eps->st,ctx->omega[p_id]);
327:       STGetKSP(eps->st,&ksp);
328:     }
329:     for (j=L_start;j<L_end;j++) {
330:       BVGetColumn(V,j,&vj);
331:       BVGetColumn(ctx->Y,i*ctx->L_max+j,&yj);
332:       if (B) {
333:         MatMult(B,vj,Bvj);
334:         if (ctx->usest) {
335:           KSPSolve(ksp,Bvj,yj);
336:         } else {
337:           KSPSolve(ctx->ksp[i],Bvj,yj);
338:         }
339:       } else {
340:         if (ctx->usest) {
341:           KSPSolve(ksp,vj,yj);
342:         } else {
343:           KSPSolve(ctx->ksp[i],vj,yj);
344:         }
345:       }
346:       BVRestoreColumn(V,j,&vj);
347:       BVRestoreColumn(ctx->Y,i*ctx->L_max+j,&yj);
348:     }
349:     if (ctx->usest && i<ctx->num_solve_point-1) { KSPReset(ksp); }
350:   }
351:   if (ctx->usest) { MatDestroy(&Fz); }
352:   VecDestroy(&Bvj);
353:   return(0);
354: }

356: #if defined(PETSC_USE_COMPLEX)
357: static PetscErrorCode EstimateNumberEigs(EPS eps,PetscInt *L_add)
358: {
360:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
361:   PetscInt       i,j,p_id;
362:   PetscScalar    tmp,m = 1,sum = 0.0;
363:   PetscReal      eta;
364:   Vec            v,vtemp,vj,yj;

367:   BVGetColumn(ctx->Y,0,&yj);
368:   VecDuplicate(yj,&v);
369:   BVRestoreColumn(ctx->Y,0,&yj);
370:   BVCreateVec(ctx->V,&vtemp);
371:   for (j=0;j<ctx->L;j++) {
372:     VecSet(v,0);
373:     for (i=0;i<ctx->num_solve_point; i++) {
374:       p_id = i*ctx->subcomm->n + ctx->subcomm_id;
375:       BVSetActiveColumns(ctx->Y,i*ctx->L_max+j,i*ctx->L_max+j+1);
376:       BVMultVec(ctx->Y,ctx->weight[p_id],1,v,&m);
377:     }
378:     BVGetColumn(ctx->V,j,&vj);
379:     if (ctx->pA) {
380:       VecSet(vtemp,0);
381:       VecScatterBegin(ctx->scatterin,v,vtemp,ADD_VALUES,SCATTER_REVERSE);
382:       VecScatterEnd(ctx->scatterin,v,vtemp,ADD_VALUES,SCATTER_REVERSE);
383:       VecDot(vj,vtemp,&tmp);
384:     } else {
385:       VecDot(vj,v,&tmp);
386:     }
387:     BVRestoreColumn(ctx->V,j,&vj);
388:     if (ctx->useconj) sum += PetscRealPart(tmp)*2;
389:     else sum += tmp;
390:   }
391:   ctx->est_eig = PetscAbsScalar(sum/(PetscReal)ctx->L);
392:   eta = PetscPowReal(10.0,-PetscLog10Real(eps->tol)/ctx->N);
393:   PetscInfo1(eps,"Estimation_#Eig %f\n",(double)ctx->est_eig);
394:   *L_add = (PetscInt)PetscCeilReal((ctx->est_eig*eta)/ctx->M) - ctx->L;
395:   if (*L_add < 0) *L_add = 0;
396:   if (*L_add>ctx->L_max-ctx->L) {
397:     PetscInfo(eps,"Number of eigenvalues around the contour path may be too large\n");
398:     *L_add = ctx->L_max-ctx->L;
399:   }
400:   VecDestroy(&v);
401:   VecDestroy(&vtemp);
402:   return(0);
403: }
404: #endif

406: static PetscErrorCode CalcMu(EPS eps,PetscScalar *Mu)
407: {
409:   PetscMPIInt    sub_size,len;
410:   PetscInt       i,j,k,s;
411:   PetscScalar    *m,*temp,*temp2,*ppk,alp;
412:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
413:   Mat            M;

416:   MPI_Comm_size(PetscSubcommChild(ctx->subcomm),&sub_size);
417:   PetscMalloc3(ctx->num_solve_point*ctx->L*(ctx->L+1),&temp,2*ctx->M*ctx->L*ctx->L,&temp2,ctx->num_solve_point,&ppk);
418:   MatCreateSeqDense(PETSC_COMM_SELF,ctx->L,ctx->L_max*ctx->num_solve_point,NULL,&M);
419:   for (i=0;i<2*ctx->M*ctx->L*ctx->L;i++) temp2[i] = 0;
420:   BVSetActiveColumns(ctx->Y,0,ctx->L_max*ctx->num_solve_point);
421:   if (ctx->pA) {
422:     BVSetActiveColumns(ctx->pV,0,ctx->L);
423:     BVDot(ctx->Y,ctx->pV,M);
424:   } else {
425:     BVSetActiveColumns(ctx->V,0,ctx->L);
426:     BVDot(ctx->Y,ctx->V,M);
427:   }
428:   MatDenseGetArray(M,&m);
429:   for (i=0;i<ctx->num_solve_point;i++) {
430:     for (j=0;j<ctx->L;j++) {
431:       for (k=0;k<ctx->L;k++) {
432:         temp[k+j*ctx->L+i*ctx->L*ctx->L]=m[k+j*ctx->L+i*ctx->L*ctx->L_max];
433:       }
434:     }
435:   }
436:   MatDenseRestoreArray(M,&m);
437:   for (i=0;i<ctx->num_solve_point;i++) ppk[i] = 1;
438:   for (k=0;k<2*ctx->M;k++) {
439:     for (j=0;j<ctx->L;j++) {
440:       for (i=0;i<ctx->num_solve_point;i++) {
441:         alp = ppk[i]*ctx->weight[i*ctx->subcomm->n + ctx->subcomm_id];
442:         for (s=0;s<ctx->L;s++) {
443:           if (ctx->useconj) temp2[s+(j+k*ctx->L)*ctx->L] += PetscRealPart(alp*temp[s+(j+i*ctx->L)*ctx->L])*2;
444:           else temp2[s+(j+k*ctx->L)*ctx->L] += alp*temp[s+(j+i*ctx->L)*ctx->L];
445:         }
446:       }
447:     }
448:     for (i=0;i<ctx->num_solve_point;i++)
449:       ppk[i] *= ctx->pp[i*ctx->subcomm->n + ctx->subcomm_id];
450:   }
451:   for (i=0;i<2*ctx->M*ctx->L*ctx->L;i++) temp2[i] /= sub_size;
452:   PetscMPIIntCast(2*ctx->M*ctx->L*ctx->L,&len);
453:   MPI_Allreduce(temp2,Mu,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)eps));
454:   PetscFree3(temp,temp2,ppk);
455:   MatDestroy(&M);
456:   return(0);
457: }

459: static PetscErrorCode BlockHankel(EPS eps,PetscScalar *Mu,PetscInt s,PetscScalar *H)
460: {
461:   EPS_CISS *ctx = (EPS_CISS*)eps->data;
462:   PetscInt i,j,k,L=ctx->L,M=ctx->M;

465:   for (k=0;k<L*M;k++)
466:     for (j=0;j<M;j++)
467:       for (i=0;i<L;i++)
468:         H[j*L+i+k*L*M] = Mu[i+k*L+(j+s)*L*L];
469:   return(0);
470: }

472: static PetscErrorCode SVD_H0(EPS eps,PetscScalar *S,PetscInt *K)
473: {
474: #if defined(PETSC_MISSING_LAPACK_GESVD)
476:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
477: #else
479:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
480:   PetscInt       i,ml=ctx->L*ctx->M;
481:   PetscBLASInt   m,n,lda,ldu,ldvt,lwork,info;
482:   PetscScalar    *work;
483: #if defined(PETSC_USE_COMPLEX)
484:   PetscReal      *rwork;
485: #endif

488:   PetscMalloc1(5*ml,&work);
489: #if defined(PETSC_USE_COMPLEX)
490:   PetscMalloc1(5*ml,&rwork);
491: #endif
492:   PetscBLASIntCast(ml,&m);
493:   n = m; lda = m; ldu = m; ldvt = m; lwork = 5*m;
494:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
495: #if defined(PETSC_USE_COMPLEX)
496:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&m,&n,S,&lda,ctx->sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
497: #else
498:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&m,&n,S,&lda,ctx->sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
499: #endif
500:   SlepcCheckLapackInfo("gesvd",info);
501:   PetscFPTrapPop();
502:   (*K) = 0;
503:   for (i=0;i<ml;i++) {
504:     if (ctx->sigma[i]/PetscMax(ctx->sigma[0],1)>ctx->delta) (*K)++;
505:   }
506:   PetscFree(work);
507: #if defined(PETSC_USE_COMPLEX)
508:   PetscFree(rwork);
509: #endif
510:   return(0);
511: #endif
512: }

514: static PetscErrorCode ConstructS(EPS eps)
515: {
517:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
518:   PetscInt       i,j,k,vec_local_size,p_id;
519:   Vec            v,sj,yj;
520:   PetscScalar    *ppk, *v_data, m = 1;

523:   BVGetSizes(ctx->Y,&vec_local_size,NULL,NULL);
524:   PetscMalloc1(ctx->num_solve_point,&ppk);
525:   for (i=0;i<ctx->num_solve_point;i++) ppk[i] = 1;
526:   BVGetColumn(ctx->Y,0,&yj);
527:   VecDuplicate(yj,&v);
528:   BVRestoreColumn(ctx->Y,0,&yj);
529:   for (k=0;k<ctx->M;k++) {
530:     for (j=0;j<ctx->L;j++) {
531:       VecSet(v,0);
532:       for (i=0;i<ctx->num_solve_point;i++) {
533:         p_id = i*ctx->subcomm->n + ctx->subcomm_id;
534:         BVSetActiveColumns(ctx->Y,i*ctx->L_max+j,i*ctx->L_max+j+1);
535:         BVMultVec(ctx->Y,ppk[i]*ctx->weight[p_id],1.0,v,&m);
536:       }
537:       if (ctx->useconj) {
538:         VecGetArray(v,&v_data);
539:         for (i=0;i<vec_local_size;i++) v_data[i] = PetscRealPart(v_data[i])*2;
540:         VecRestoreArray(v,&v_data);
541:       }
542:       BVGetColumn(ctx->S,k*ctx->L+j,&sj);
543:       if (ctx->pA) {
544:         VecSet(sj,0);
545:         VecScatterBegin(ctx->scatterin,v,sj,ADD_VALUES,SCATTER_REVERSE);
546:         VecScatterEnd(ctx->scatterin,v,sj,ADD_VALUES,SCATTER_REVERSE);
547:       } else {
548:         VecCopy(v,sj);
549:       }
550:       BVRestoreColumn(ctx->S,k*ctx->L+j,&sj);
551:     }
552:     for (i=0;i<ctx->num_solve_point;i++) {
553:       p_id = i*ctx->subcomm->n + ctx->subcomm_id;
554:       ppk[i] *= ctx->pp[p_id];
555:     }
556:   }
557:   PetscFree(ppk);
558:   VecDestroy(&v);
559:   return(0);
560: }

562: static PetscErrorCode SVD_S(BV S,PetscInt ml,PetscReal delta,PetscReal *sigma,PetscInt *K)
563: {
564: #if defined(PETSC_MISSING_LAPACK_GESVD)
566:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
567: #else
569:   PetscInt       i,j,k,local_size;
570:   PetscMPIInt    len;
571:   PetscScalar    *work,*temp,*B,*tempB,*s_data,*Q1,*Q2,*temp2,alpha=1,beta=0;
572:   PetscBLASInt   l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc;
573: #if defined(PETSC_USE_COMPLEX)
574:   PetscReal      *rwork;
575: #endif

578:   BVGetSizes(S,&local_size,NULL,NULL);
579:   BVGetArray(S,&s_data);
580:   PetscMalloc7(ml*ml,&temp,ml*ml,&temp2,local_size*ml,&Q1,local_size*ml,&Q2,ml*ml,&B,ml*ml,&tempB,5*ml,&work);
581:   PetscMemzero(B,ml*ml*sizeof(PetscScalar));
582: #if defined(PETSC_USE_COMPLEX)
583:   PetscMalloc1(5*ml,&rwork);
584: #endif
585:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);

587:   for (i=0;i<ml;i++) B[i*ml+i]=1;

589:   for (k=0;k<2;k++) {
590:     PetscBLASIntCast(local_size,&m);
591:     PetscBLASIntCast(ml,&l);
592:     n = l; lda = m; ldb = m; ldc = l;
593:     if (k == 0) {
594:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,s_data,&lda,s_data,&ldb,&beta,temp,&ldc));
595:     } else if ((k%2)==1) {
596:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,temp,&ldc));
597:     } else {
598:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q2,&lda,Q2,&ldb,&beta,temp,&ldc));
599:     }
600:     PetscMemzero(temp2,ml*ml*sizeof(PetscScalar));
601:     PetscMPIIntCast(ml*ml,&len);
602:     MPI_Allreduce(temp,temp2,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)S));

604:     PetscBLASIntCast(ml,&m);
605:     n = m; lda = m; lwork = 5*m, ldu = 1; ldvt = 1;
606: #if defined(PETSC_USE_COMPLEX)
607:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
608: #else
609:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
610: #endif
611:     SlepcCheckLapackInfo("gesvd",info);

613:     PetscBLASIntCast(local_size,&l);
614:     PetscBLASIntCast(ml,&n);
615:     m = n; lda = l; ldb = m; ldc = l;
616:     if (k==0) {
617:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,s_data,&lda,temp2,&ldb,&beta,Q1,&ldc));
618:     } else if ((k%2)==1) {
619:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));
620:     } else {
621:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q2,&lda,temp2,&ldb,&beta,Q1,&ldc));
622:     }

624:     PetscBLASIntCast(ml,&l);
625:     m = l; n = l; lda = l; ldb = m; ldc = l;
626:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
627:     for (i=0;i<ml;i++) {
628:       sigma[i] = sqrt(sigma[i]);
629:       for (j=0;j<local_size;j++) {
630:         if ((k%2)==1) Q2[j+i*local_size]/=sigma[i];
631:         else Q1[j+i*local_size]/=sigma[i];
632:       }
633:       for (j=0;j<ml;j++) {
634:         B[j+i*ml]=tempB[j+i*ml]*sigma[i];
635:       }
636:     }
637:   }

639:   PetscBLASIntCast(ml,&m);
640:   n = m; lda = m; ldu=1; ldvt=1;
641: #if defined(PETSC_USE_COMPLEX)
642:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
643: #else
644:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
645: #endif
646:   SlepcCheckLapackInfo("gesvd",info);

648:   PetscBLASIntCast(local_size,&l);
649:   PetscBLASIntCast(ml,&n);
650:   m = n; lda = l; ldb = m; ldc = l;
651:   if ((k%2)==1) {
652:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,s_data,&ldc));
653:   } else {
654:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,s_data,&ldc));
655:   }

657:   PetscFPTrapPop();
658:   BVRestoreArray(S,&s_data);

660:   (*K) = 0;
661:   for (i=0;i<ml;i++) {
662:     if (sigma[i]/PetscMax(sigma[0],1)>delta) (*K)++;
663:   }
664:   PetscFree7(temp,temp2,Q1,Q2,B,tempB,work);
665: #if defined(PETSC_USE_COMPLEX)
666:   PetscFree(rwork);
667: #endif
668:   return(0);
669: #endif
670: }

672: static PetscErrorCode isGhost(EPS eps,PetscInt ld,PetscInt nv,PetscBool *fl)
673: {
675:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
676:   PetscInt       i,j;
677:   PetscScalar    *pX;
678:   PetscReal      *tau,s1,s2,tau_max=0.0;

681:   PetscMalloc1(nv,&tau);
682:   DSVectors(eps->ds,DS_MAT_X,NULL,NULL);
683:   DSGetArray(eps->ds,DS_MAT_X,&pX);

685:   for (i=0;i<nv;i++) {
686:     s1 = 0;
687:     s2 = 0;
688:     for (j=0;j<nv;j++) {
689:       s1 += PetscAbsScalar(PetscPowScalarInt(pX[i*ld+j],2));
690:       s2 += PetscPowRealInt(PetscAbsScalar(pX[i*ld+j]),2)/ctx->sigma[j];
691:     }
692:     tau[i] = s1/s2;
693:     tau_max = PetscMax(tau_max,tau[i]);
694:   }
695:   DSRestoreArray(eps->ds,DS_MAT_X,&pX);
696:   for (i=0;i<nv;i++) {
697:     tau[i] /= tau_max;
698:   }
699:   for (i=0;i<nv;i++) {
700:     if (tau[i]>=ctx->spurious_threshold) fl[i] = PETSC_TRUE;
701:     else fl[i] = PETSC_FALSE;
702:   }
703:   PetscFree(tau);
704:   return(0);
705: }

707: static PetscErrorCode rescale_eig(EPS eps,PetscInt nv)
708: {
710:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
711:   PetscInt       i;
712:   PetscScalar    center;
713:   PetscReal      radius,a,b,c,d,rgscale;
714: #if defined(PETSC_USE_COMPLEX)
715:   PetscReal      start_ang,end_ang,vscale,theta;
716: #endif
717:   PetscBool      isring,isellipse,isinterval;

720:   PetscObjectTypeCompare((PetscObject)eps->rg,RGELLIPSE,&isellipse);
721:   PetscObjectTypeCompare((PetscObject)eps->rg,RGRING,&isring);
722:   PetscObjectTypeCompare((PetscObject)eps->rg,RGINTERVAL,&isinterval);
723:   RGGetScale(eps->rg,&rgscale);
724:   if (isinterval) {
725:     RGIntervalGetEndpoints(eps->rg,NULL,NULL,&c,&d);
726:     if (c==d) {
727:       for (i=0;i<nv;i++) {
728: #if defined(PETSC_USE_COMPLEX)
729:         eps->eigr[i] = PetscRealPart(eps->eigr[i]);
730: #else
731:         eps->eigi[i] = 0;
732: #endif
733:       }
734:     }
735:   }
736:   if (ctx->extraction == EPS_CISS_EXTRACTION_HANKEL) {
737:     if (isellipse) {
738:       RGEllipseGetParameters(eps->rg,&center,&radius,NULL);
739:       for (i=0;i<nv;i++) eps->eigr[i] = rgscale*(center + radius*eps->eigr[i]);
740:     } else if (isinterval) {
741:       RGIntervalGetEndpoints(eps->rg,&a,&b,&c,&d);
742:       if (ctx->quad == EPS_CISS_QUADRULE_CHEBYSHEV) {
743:         for (i=0;i<nv;i++) {
744:           if (c==d) eps->eigr[i] = ((b-a)*(eps->eigr[i]+1.0)/2.0+a)*rgscale;
745:           if (a==b) {
746: #if defined(PETSC_USE_COMPLEX)
747:             eps->eigr[i] = ((d-c)*(eps->eigr[i]+1.0)/2.0+c)*rgscale*PETSC_i;
748: #else
749:             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Integration points on a vertical line require complex arithmetic");
750: #endif
751:           }
752:         }
753:       } else {
754:         center = (b+a)/2.0+(d+c)/2.0*PETSC_PI;
755:         radius = PetscSqrtReal(PetscPowRealInt((b-a)/2.0,2)+PetscPowRealInt((d-c)/2.0,2));
756:         for (i=0;i<nv;i++) eps->eigr[i] = center + radius*eps->eigr[i];
757:       }
758:     } else if (isring) {  /* only supported in complex scalars */
759: #if defined(PETSC_USE_COMPLEX)
760:       RGRingGetParameters(eps->rg,&center,&radius,&vscale,&start_ang,&end_ang,NULL);
761:       if (ctx->quad == EPS_CISS_QUADRULE_CHEBYSHEV) {
762:         for (i=0;i<nv;i++) {
763:           theta = (start_ang*2.0+(end_ang-start_ang)*(PetscRealPart(eps->eigr[i])+1.0))*PETSC_PI;
764:           eps->eigr[i] = rgscale*center + (rgscale*radius+PetscImaginaryPart(eps->eigr[i]))*(PetscCosReal(theta)+PETSC_i*vscale*PetscSinReal(theta));
765:         }
766:       } else {
767:         for (i=0;i<nv;i++) eps->eigr[i] = rgscale*(center + radius*eps->eigr[i]);
768:       }
769: #endif
770:     }
771:   }
772:   return(0);
773: }

775: PetscErrorCode EPSSetUp_CISS(EPS eps)
776: {
778:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
779:   PetscInt       i;
780:   PetscBool      issinvert,istrivial,isring,isellipse,isinterval,flg;
781:   PetscScalar    center;
782:   PetscReal      c,d;
783:   Mat            A;

786:   if (!eps->ncv) eps->ncv = ctx->L_max*ctx->M;
787:   else {
788:     EPSSetDimensions_Default(eps,eps->nev,&eps->ncv,&eps->mpd);
789:     ctx->L_max = eps->ncv/ctx->M;
790:     if (ctx->L_max == 0) {
791:       ctx->L_max = 1;
792:       eps->ncv = ctx->L_max*ctx->M;
793:     }
794:     if (ctx->L > ctx->L_max) ctx->L = ctx->L_max;
795:   }
796:   if (!eps->max_it) eps->max_it = 1;
797:   if (!eps->mpd) eps->mpd = eps->ncv;
798:   if (!eps->which) eps->which = EPS_ALL;
799:   if (!eps->extraction) { EPSSetExtraction(eps,EPS_RITZ); }
800:   else if (eps->extraction!=EPS_RITZ) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Unsupported extraction type");
801:   if (eps->arbitrary) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Arbitrary selection of eigenpairs not supported in this solver");
802:   if (eps->stopping!=EPSStoppingBasic) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"This solver does not support user-defined stopping test");
803:   /* check region */
804:   RGIsTrivial(eps->rg,&istrivial);
805:   if (istrivial) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"CISS requires a nontrivial region, e.g. -rg_type ellipse ...");
806:   RGGetComplement(eps->rg,&flg);
807:   if (flg) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"A region with complement flag set is not allowed");
808:   PetscObjectTypeCompare((PetscObject)eps->rg,RGELLIPSE,&isellipse);
809:   PetscObjectTypeCompare((PetscObject)eps->rg,RGRING,&isring);
810:   PetscObjectTypeCompare((PetscObject)eps->rg,RGINTERVAL,&isinterval);
811:   if (!isellipse && !isring && !isinterval) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Currently only implemented for interval, elliptic or ring regions");
812:   if (isring) {
813: #if !defined(PETSC_USE_COMPLEX)
814:     SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Ring region only supported for complex scalars");
815: #endif
816:     ctx->useconj = PETSC_FALSE;
817:   }
818:   if (isellipse) {
819:     RGEllipseGetParameters(eps->rg,&center,NULL,NULL);
820: #if defined(PETSC_USE_COMPLEX)
821:     if (ctx->isreal && PetscImaginaryPart(center) == 0.0) ctx->useconj = PETSC_TRUE;
822:     else ctx->useconj = PETSC_FALSE;
823: #else
824:     ctx->useconj = PETSC_FALSE;
825: #endif
826:   }
827:   if (isinterval) {
828:     RGIntervalGetEndpoints(eps->rg,NULL,NULL,&c,&d);
829: #if defined(PETSC_USE_COMPLEX)
830:     if (ctx->isreal && c==d) ctx->useconj = PETSC_TRUE;
831:     else ctx->useconj = PETSC_FALSE;
832: #else
833:     if (c!=d || c!=0.0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"In real scalars, endpoints of the imaginary axis must be both zero");
834:     ctx->useconj = PETSC_FALSE;
835: #endif
836:     if (!ctx->quad && c==d) ctx->quad = EPS_CISS_QUADRULE_CHEBYSHEV;
837:   }
838:   if (!ctx->quad) ctx->quad = EPS_CISS_QUADRULE_TRAPEZOIDAL;
839:   /* create split comm */
840:   SetSolverComm(eps);

842:   EPSAllocateSolution(eps,0);
843:   if (ctx->weight) { PetscFree4(ctx->weight,ctx->omega,ctx->pp,ctx->sigma); }
844:   PetscMalloc4(ctx->N,&ctx->weight,ctx->N+1,&ctx->omega,ctx->N,&ctx->pp,ctx->L_max*ctx->M,&ctx->sigma);
845:   PetscLogObjectMemory((PetscObject)eps,3*ctx->N*sizeof(PetscScalar)+ctx->L_max*ctx->N*sizeof(PetscReal));

847:   /* allocate basis vectors */
848:   BVDestroy(&ctx->S);
849:   BVDuplicateResize(eps->V,ctx->L_max*ctx->M,&ctx->S);
850:   PetscLogObjectParent((PetscObject)eps,(PetscObject)ctx->S);
851:   BVDestroy(&ctx->V);
852:   BVDuplicateResize(eps->V,ctx->L_max,&ctx->V);
853:   PetscLogObjectParent((PetscObject)eps,(PetscObject)ctx->V);

855:   STGetMatrix(eps->st,0,&A);
856:   PetscObjectTypeCompare((PetscObject)A,MATSHELL,&flg);
857:   if (flg) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Matrix type shell is not supported in this solver");

859:   if (!ctx->usest_set) ctx->usest = (ctx->num_subcomm>1)? PETSC_FALSE: PETSC_TRUE;
860:   if (ctx->usest && ctx->num_subcomm>1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The usest flag is not supported when partitions > 1");

862:   CISSRedundantMat(eps);
863:   if (ctx->pA) {
864:     CISSScatterVec(eps);
865:     BVDestroy(&ctx->pV);
866:     BVCreate(PetscObjectComm((PetscObject)ctx->xsub),&ctx->pV);
867:     BVSetSizesFromVec(ctx->pV,ctx->xsub,eps->n);
868:     BVSetFromOptions(ctx->pV);
869:     BVResize(ctx->pV,ctx->L_max,PETSC_FALSE);
870:     PetscLogObjectParent((PetscObject)eps,(PetscObject)ctx->pV);
871:   }

873:   if (ctx->usest) {
874:     PetscObjectTypeCompare((PetscObject)eps->st,STSINVERT,&issinvert);
875:     if (!issinvert) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"If the usest flag is set, you must select the STSINVERT spectral transformation");
876:   } else {
877:     if (ctx->ksp) {
878:       for (i=0;i<ctx->num_solve_point;i++) {
879:         KSPDestroy(&ctx->ksp[i]);
880:       }
881:       PetscFree(ctx->ksp);
882:     }
883:     PetscMalloc1(ctx->num_solve_point,&ctx->ksp);
884:     PetscLogObjectMemory((PetscObject)eps,ctx->num_solve_point*sizeof(KSP));
885:     for (i=0;i<ctx->num_solve_point;i++) {
886:       KSPCreate(PetscSubcommChild(ctx->subcomm),&ctx->ksp[i]);
887:       PetscObjectIncrementTabLevel((PetscObject)ctx->ksp[i],(PetscObject)eps,1);
888:       PetscLogObjectParent((PetscObject)eps,(PetscObject)ctx->ksp[i]);
889:       KSPSetOptionsPrefix(ctx->ksp[i],((PetscObject)eps)->prefix);
890:       KSPAppendOptionsPrefix(ctx->ksp[i],"eps_ciss_");
891:       KSPSetErrorIfNotConverged(ctx->ksp[i],PETSC_TRUE);
892:       KSPSetTolerances(ctx->ksp[i],SLEPC_DEFAULT_TOL,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
893:     }
894:   }

896:   BVDestroy(&ctx->Y);
897:   if (ctx->pA) {
898:     BVCreate(PetscObjectComm((PetscObject)ctx->xsub),&ctx->Y);
899:     BVSetSizesFromVec(ctx->Y,ctx->xsub,eps->n);
900:     BVSetFromOptions(ctx->Y);
901:     BVResize(ctx->Y,ctx->num_solve_point*ctx->L_max,PETSC_FALSE);
902:   } else {
903:     BVDuplicateResize(eps->V,ctx->num_solve_point*ctx->L_max,&ctx->Y);
904:   }
905:   PetscLogObjectParent((PetscObject)eps,(PetscObject)ctx->Y);

907:   if (ctx->extraction == EPS_CISS_EXTRACTION_HANKEL) {
908:     DSSetType(eps->ds,DSGNHEP);
909:   } else if (eps->isgeneralized) {
910:     if (eps->ishermitian && eps->ispositive) {
911:       DSSetType(eps->ds,DSGHEP);
912:     } else {
913:       DSSetType(eps->ds,DSGNHEP);
914:     }
915:   } else {
916:     if (eps->ishermitian) {
917:       DSSetType(eps->ds,DSHEP);
918:     } else {
919:       DSSetType(eps->ds,DSNHEP);
920:     }
921:   }
922:   DSAllocate(eps->ds,eps->ncv);
923:   EPSSetWorkVecs(eps,2);

925: #if !defined(PETSC_USE_COMPLEX)
926:   if (!eps->ishermitian) { PetscInfo(eps,"Warning: complex eigenvalues are not calculated exactly without --with-scalar-type=complex in PETSc\n"); }
927: #endif
928:   return(0);
929: }

931: PetscErrorCode EPSSolve_CISS(EPS eps)
932: {
934:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
935:   Mat            A,B,X,M,pA,pB;
936:   PetscInt       i,j,ld,nmat,L_add=0,nv=0,L_base=ctx->L,inner,nlocal,*inside;
937:   PetscScalar    *Mu,*H0,*H1=NULL,*rr,*temp;
938:   PetscReal      error,max_error;
939:   PetscBool      *fl1;
940:   Vec            si,w[3];
941:   SlepcSC        sc;
942:   PetscRandom    rand;
943: #if defined(PETSC_USE_COMPLEX)
944:   PetscBool      isellipse;
945: #endif

948:   w[0] = eps->work[0];
949:   w[1] = NULL;
950:   w[2] = eps->work[1];
951:   /* override SC settings */
952:   DSGetSlepcSC(eps->ds,&sc);
953:   sc->comparison    = SlepcCompareLargestMagnitude;
954:   sc->comparisonctx = NULL;
955:   sc->map           = NULL;
956:   sc->mapobj        = NULL;
957:   VecGetLocalSize(w[0],&nlocal);
958:   DSGetLeadingDimension(eps->ds,&ld);
959:   STGetNumMatrices(eps->st,&nmat);
960:   STGetMatrix(eps->st,0,&A);
961:   if (nmat>1) { STGetMatrix(eps->st,1,&B); }
962:   else B = NULL;
963:   SetPathParameter(eps);
964:   CISSVecSetRandom(ctx->V,0,ctx->L);
965:   BVGetRandomContext(ctx->V,&rand);

967:   if (ctx->pA) {
968:     VecScatterVecs(eps,ctx->V,ctx->L);
969:     SolveLinearSystem(eps,ctx->pA,ctx->pB,ctx->pV,0,ctx->L,PETSC_TRUE);
970:   } else {
971:     SolveLinearSystem(eps,A,B,ctx->V,0,ctx->L,PETSC_TRUE);
972:   }
973: #if defined(PETSC_USE_COMPLEX)
974:   PetscObjectTypeCompare((PetscObject)eps->rg,RGELLIPSE,&isellipse);
975:   if (isellipse) {
976:     EstimateNumberEigs(eps,&L_add);
977:   } else {
978:     L_add = 0;
979:   }
980: #else
981:   L_add = 0;
982: #endif
983:   if (L_add>0) {
984:     PetscInfo2(eps,"Changing L %D -> %D by Estimate #Eig\n",ctx->L,ctx->L+L_add);
985:     CISSVecSetRandom(ctx->V,ctx->L,ctx->L+L_add);
986:     if (ctx->pA) {
987:       VecScatterVecs(eps,ctx->V,ctx->L+L_add);
988:       SolveLinearSystem(eps,ctx->pA,ctx->pB,ctx->pV,ctx->L,ctx->L+L_add,PETSC_FALSE);
989:     } else {
990:       SolveLinearSystem(eps,A,B,ctx->V,ctx->L,ctx->L+L_add,PETSC_FALSE);
991:     }
992:     ctx->L += L_add;
993:   }
994:   PetscMalloc2(ctx->L*ctx->L*ctx->M*2,&Mu,ctx->L*ctx->M*ctx->L*ctx->M,&H0);
995:   for (i=0;i<ctx->refine_blocksize;i++) {
996:     CalcMu(eps,Mu);
997:     BlockHankel(eps,Mu,0,H0);
998:     SVD_H0(eps,H0,&nv);
999:     if (ctx->sigma[0]<=ctx->delta || nv < ctx->L*ctx->M || ctx->L == ctx->L_max) break;
1000:     L_add = L_base;
1001:     if (ctx->L+L_add>ctx->L_max) L_add = ctx->L_max-ctx->L;
1002:     PetscInfo2(eps,"Changing L %D -> %D by SVD(H0)\n",ctx->L,ctx->L+L_add);
1003:     CISSVecSetRandom(ctx->V,ctx->L,ctx->L+L_add);
1004:     if (ctx->pA) {
1005:       VecScatterVecs(eps,ctx->V,ctx->L+L_add);
1006:       SolveLinearSystem(eps,ctx->pA,ctx->pB,ctx->pV,ctx->L,ctx->L+L_add,PETSC_FALSE);
1007:     } else {
1008:       SolveLinearSystem(eps,A,B,ctx->V,ctx->L,ctx->L+L_add,PETSC_FALSE);
1009:     }
1010:     ctx->L += L_add;
1011:   }
1012:   if (ctx->extraction == EPS_CISS_EXTRACTION_HANKEL) {
1013:     PetscMalloc1(ctx->L*ctx->M*ctx->L*ctx->M,&H1);
1014:   }

1016:   while (eps->reason == EPS_CONVERGED_ITERATING) {
1017:     eps->its++;
1018:     for (inner=0;inner<=ctx->refine_inner;inner++) {
1019:       if (ctx->extraction == EPS_CISS_EXTRACTION_HANKEL) {
1020:         CalcMu(eps,Mu);
1021:         BlockHankel(eps,Mu,0,H0);
1022:         SVD_H0(eps,H0,&nv);
1023:         break;
1024:       } else {
1025:         ConstructS(eps);
1026:         BVSetActiveColumns(ctx->S,0,ctx->L);
1027:         BVCopy(ctx->S,ctx->V);
1028:         SVD_S(ctx->S,ctx->L*ctx->M,ctx->delta,ctx->sigma,&nv);
1029:         if (ctx->sigma[0]>ctx->delta && nv==ctx->L*ctx->M && inner!=ctx->refine_inner) {
1030:           if (ctx->pA) {
1031:             VecScatterVecs(eps,ctx->V,ctx->L);
1032:             SolveLinearSystem(eps,ctx->pA,ctx->pB,ctx->pV,0,ctx->L,PETSC_FALSE);
1033:           } else {
1034:             SolveLinearSystem(eps,A,B,ctx->V,0,ctx->L,PETSC_FALSE);
1035:           }
1036:         } else break;
1037:       }
1038:     }
1039:     eps->nconv = 0;
1040:     if (nv == 0) eps->reason = EPS_CONVERGED_TOL;
1041:     else {
1042:       DSSetDimensions(eps->ds,nv,0,0,0);
1043:       DSSetState(eps->ds,DS_STATE_RAW);

1045:       if (ctx->extraction == EPS_CISS_EXTRACTION_HANKEL) {
1046:         BlockHankel(eps,Mu,0,H0);
1047:         BlockHankel(eps,Mu,1,H1);
1048:         DSGetArray(eps->ds,DS_MAT_A,&temp);
1049:         for (j=0;j<nv;j++) {
1050:           for (i=0;i<nv;i++) {
1051:             temp[i+j*ld] = H1[i+j*ctx->L*ctx->M];
1052:           }
1053:         }
1054:         DSRestoreArray(eps->ds,DS_MAT_A,&temp);
1055:         DSGetArray(eps->ds,DS_MAT_B,&temp);
1056:         for (j=0;j<nv;j++) {
1057:           for (i=0;i<nv;i++) {
1058:             temp[i+j*ld] = H0[i+j*ctx->L*ctx->M];
1059:           }
1060:         }
1061:         DSRestoreArray(eps->ds,DS_MAT_B,&temp);
1062:       } else {
1063:         BVSetActiveColumns(ctx->S,0,nv);
1064:         DSGetMat(eps->ds,DS_MAT_A,&pA);
1065:         MatZeroEntries(pA);
1066:         BVMatProject(ctx->S,A,ctx->S,pA);
1067:         DSRestoreMat(eps->ds,DS_MAT_A,&pA);
1068:         if (B) {
1069:           DSGetMat(eps->ds,DS_MAT_B,&pB);
1070:           MatZeroEntries(pB);
1071:           BVMatProject(ctx->S,B,ctx->S,pB);
1072:           DSRestoreMat(eps->ds,DS_MAT_B,&pB);
1073:         }
1074:       }

1076:       DSSolve(eps->ds,eps->eigr,eps->eigi);
1077:       DSSynchronize(eps->ds,eps->eigr,eps->eigi);
1078:       DSVectors(eps->ds,DS_MAT_X,NULL,NULL);

1080:       PetscMalloc3(nv,&fl1,nv,&inside,nv,&rr);
1081:       rescale_eig(eps,nv);
1082:       isGhost(eps,ld,nv,fl1);
1083:       RGCheckInside(eps->rg,nv,eps->eigr,eps->eigi,inside);
1084:       for (i=0;i<nv;i++) {
1085:         if (fl1[i] && inside[i]>=0) {
1086:           rr[i] = 1.0;
1087:           eps->nconv++;
1088:         } else rr[i] = 0.0;
1089:       }
1090:       DSSort(eps->ds,eps->eigr,eps->eigi,rr,NULL,&eps->nconv);
1091:       DSSynchronize(eps->ds,eps->eigr,eps->eigi);
1092:       rescale_eig(eps,nv);
1093:       PetscFree3(fl1,inside,rr);
1094:       BVSetActiveColumns(eps->V,0,nv);
1095:       if (ctx->extraction == EPS_CISS_EXTRACTION_HANKEL) {
1096:         ConstructS(eps);
1097:         BVSetActiveColumns(ctx->S,0,ctx->L);
1098:         BVCopy(ctx->S,ctx->V);
1099:         BVSetActiveColumns(ctx->S,0,nv);
1100:       }
1101:       BVCopy(ctx->S,eps->V);

1103:       DSVectors(eps->ds,DS_MAT_X,NULL,NULL);
1104:       DSGetMat(eps->ds,DS_MAT_X,&X);
1105:       BVMultInPlace(ctx->S,X,0,eps->nconv);
1106:       if (eps->ishermitian) {
1107:         BVMultInPlace(eps->V,X,0,eps->nconv);
1108:       }
1109:       MatDestroy(&X);
1110:       max_error = 0.0;
1111:       for (i=0;i<eps->nconv;i++) {
1112:         BVGetColumn(ctx->S,i,&si);
1113:         EPSComputeResidualNorm_Private(eps,eps->eigr[i],eps->eigi[i],si,NULL,w,&error);
1114:         (*eps->converged)(eps,eps->eigr[i],eps->eigi[i],error,&error,eps->convergedctx);
1115:         BVRestoreColumn(ctx->S,i,&si);
1116:         max_error = PetscMax(max_error,error);
1117:       }

1119:       if (max_error <= eps->tol) eps->reason = EPS_CONVERGED_TOL;
1120:       else if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
1121:       else {
1122:         if (eps->nconv > ctx->L) {
1123:           MatCreateSeqDense(PETSC_COMM_SELF,eps->nconv,ctx->L,NULL,&M);
1124:           MatDenseGetArray(M,&temp);
1125:           for (i=0;i<ctx->L*eps->nconv;i++) {
1126:             PetscRandomGetValue(rand,&temp[i]);
1127:             temp[i] = PetscRealPart(temp[i]);
1128:           }
1129:           MatDenseRestoreArray(M,&temp);
1130:           BVSetActiveColumns(ctx->S,0,eps->nconv);
1131:           BVMultInPlace(ctx->S,M,0,ctx->L);
1132:           MatDestroy(&M);
1133:           BVSetActiveColumns(ctx->S,0,ctx->L);
1134:           BVCopy(ctx->S,ctx->V);
1135:         }
1136:         if (ctx->pA) {
1137:           VecScatterVecs(eps,ctx->V,ctx->L);
1138:           SolveLinearSystem(eps,ctx->pA,ctx->pB,ctx->pV,0,ctx->L,PETSC_FALSE);
1139:         } else {
1140:           SolveLinearSystem(eps,A,B,ctx->V,0,ctx->L,PETSC_FALSE);
1141:         }
1142:       }
1143:     }
1144:   }
1145:   if (ctx->extraction == EPS_CISS_EXTRACTION_HANKEL) {
1146:     PetscFree(H1);
1147:   }
1148:   PetscFree2(Mu,H0);
1149:   return(0);
1150: }

1152: static PetscErrorCode EPSCISSSetSizes_CISS(EPS eps,PetscInt ip,PetscInt bs,PetscInt ms,PetscInt npart,PetscInt bsmax,PetscBool realmats)
1153: {
1154:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1157:   if (ip == PETSC_DECIDE || ip == PETSC_DEFAULT) {
1158:     if (ctx->N!=32) { ctx->N =32; ctx->M = ctx->N/4; }
1159:   } else {
1160:     if (ip<1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The ip argument must be > 0");
1161:     if (ip%2) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The ip argument must be an even number");
1162:     if (ctx->N!=ip) { ctx->N = ip; ctx->M = ctx->N/4; }
1163:   }
1164:   if (bs == PETSC_DECIDE || bs == PETSC_DEFAULT) {
1165:     ctx->L = 16;
1166:   } else {
1167:     if (bs<1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The bs argument must be > 0");
1168:     if (bs>ctx->L_max) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The bs argument must be less than or equal to the maximum number of block size");
1169:     ctx->L = bs;
1170:   }
1171:   if (ms == PETSC_DECIDE || ms == PETSC_DEFAULT) {
1172:     ctx->M = ctx->N/4;
1173:   } else {
1174:     if (ms<1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The ms argument must be > 0");
1175:     if (ms>ctx->N) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The ms argument must be less than or equal to the number of integration points");
1176:     ctx->M = ms;
1177:   }
1178:   if (npart == PETSC_DECIDE || npart == PETSC_DEFAULT) {
1179:     ctx->num_subcomm = 1;
1180:   } else {
1181:     if (npart<1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The npart argument must be > 0");
1182:     ctx->num_subcomm = npart;
1183:   }
1184:   if (bsmax == PETSC_DECIDE || bsmax == PETSC_DEFAULT) {
1185:     ctx->L = 256;
1186:   } else {
1187:     if (bsmax<1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The bsmax argument must be > 0");
1188:     if (bsmax<ctx->L) ctx->L_max = ctx->L;
1189:     else ctx->L_max = bsmax;
1190:   }
1191:   ctx->isreal = realmats;
1192:   eps->state = EPS_STATE_INITIAL;
1193:   return(0);
1194: }

1196: /*@
1197:    EPSCISSSetSizes - Sets the values of various size parameters in the CISS solver.

1199:    Logically Collective on EPS

1201:    Input Parameters:
1202: +  eps   - the eigenproblem solver context
1203: .  ip    - number of integration points
1204: .  bs    - block size
1205: .  ms    - moment size
1206: .  npart - number of partitions when splitting the communicator
1207: .  bsmax - max block size
1208: -  realmats - A and B are real

1210:    Options Database Keys:
1211: +  -eps_ciss_integration_points - Sets the number of integration points
1212: .  -eps_ciss_blocksize - Sets the block size
1213: .  -eps_ciss_moments - Sets the moment size
1214: .  -eps_ciss_partitions - Sets the number of partitions
1215: .  -eps_ciss_maxblocksize - Sets the maximum block size
1216: -  -eps_ciss_realmats - A and B are real

1218:    Note:
1219:    The default number of partitions is 1. This means the internal KSP object is shared
1220:    among all processes of the EPS communicator. Otherwise, the communicator is split
1221:    into npart communicators, so that npart KSP solves proceed simultaneously.

1223:    Level: advanced

1225: .seealso: EPSCISSGetSizes()
1226: @*/
1227: PetscErrorCode EPSCISSSetSizes(EPS eps,PetscInt ip,PetscInt bs,PetscInt ms,PetscInt npart,PetscInt bsmax,PetscBool realmats)
1228: {

1239:   PetscTryMethod(eps,"EPSCISSSetSizes_C",(EPS,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscBool),(eps,ip,bs,ms,npart,bsmax,realmats));
1240:   return(0);
1241: }

1243: static PetscErrorCode EPSCISSGetSizes_CISS(EPS eps,PetscInt *ip,PetscInt *bs,PetscInt *ms,PetscInt *npart,PetscInt *bsmax,PetscBool *realmats)
1244: {
1245:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1248:   if (ip) *ip = ctx->N;
1249:   if (bs) *bs = ctx->L;
1250:   if (ms) *ms = ctx->M;
1251:   if (npart) *npart = ctx->num_subcomm;
1252:   if (bsmax) *bsmax = ctx->L_max;
1253:   if (realmats) *realmats = ctx->isreal;
1254:   return(0);
1255: }

1257: /*@
1258:    EPSCISSGetSizes - Gets the values of various size parameters in the CISS solver.

1260:    Not Collective

1262:    Input Parameter:
1263: .  eps - the eigenproblem solver context

1265:    Output Parameters:
1266: +  ip    - number of integration points
1267: .  bs    - block size
1268: .  ms    - moment size
1269: .  npart - number of partitions when splitting the communicator
1270: .  bsmax - max block size
1271: -  realmats - A and B are real

1273:    Level: advanced

1275: .seealso: EPSCISSSetSizes()
1276: @*/
1277: PetscErrorCode EPSCISSGetSizes(EPS eps,PetscInt *ip,PetscInt *bs,PetscInt *ms,PetscInt *npart,PetscInt *bsmax,PetscBool *realmats)
1278: {

1283:   PetscUseMethod(eps,"EPSCISSGetSizes_C",(EPS,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscBool*),(eps,ip,bs,ms,npart,bsmax,realmats));
1284:   return(0);
1285: }

1287: static PetscErrorCode EPSCISSSetThreshold_CISS(EPS eps,PetscReal delta,PetscReal spur)
1288: {
1289:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1292:   if (delta == PETSC_DEFAULT) {
1293:     ctx->delta = 1e-12;
1294:   } else {
1295:     if (delta<=0.0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The delta argument must be > 0.0");
1296:     ctx->delta = delta;
1297:   }
1298:   if (spur == PETSC_DEFAULT) {
1299:     ctx->spurious_threshold = 1e-4;
1300:   } else {
1301:     if (spur<=0.0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The spurious threshold argument must be > 0.0");
1302:     ctx->spurious_threshold = spur;
1303:   }
1304:   return(0);
1305: }

1307: /*@
1308:    EPSCISSSetThreshold - Sets the values of various threshold parameters in
1309:    the CISS solver.

1311:    Logically Collective on EPS

1313:    Input Parameters:
1314: +  eps   - the eigenproblem solver context
1315: .  delta - threshold for numerical rank
1316: -  spur  - spurious threshold (to discard spurious eigenpairs)

1318:    Options Database Keys:
1319: +  -eps_ciss_delta - Sets the delta
1320: -  -eps_ciss_spurious_threshold - Sets the spurious threshold

1322:    Level: advanced

1324: .seealso: EPSCISSGetThreshold()
1325: @*/
1326: PetscErrorCode EPSCISSSetThreshold(EPS eps,PetscReal delta,PetscReal spur)
1327: {

1334:   PetscTryMethod(eps,"EPSCISSSetThreshold_C",(EPS,PetscReal,PetscReal),(eps,delta,spur));
1335:   return(0);
1336: }

1338: static PetscErrorCode EPSCISSGetThreshold_CISS(EPS eps,PetscReal *delta,PetscReal *spur)
1339: {
1340:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1343:   if (delta) *delta = ctx->delta;
1344:   if (spur)  *spur = ctx->spurious_threshold;
1345:   return(0);
1346: }

1348: /*@
1349:    EPSCISSGetThreshold - Gets the values of various threshold parameters
1350:    in the CISS solver.

1352:    Not Collective

1354:    Input Parameter:
1355: .  eps - the eigenproblem solver context

1357:    Output Parameters:
1358: +  delta - threshold for numerical rank
1359: -  spur  - spurious threshold (to discard spurious eigenpairs)

1361:    Level: advanced

1363: .seealso: EPSCISSSetThreshold()
1364: @*/
1365: PetscErrorCode EPSCISSGetThreshold(EPS eps,PetscReal *delta,PetscReal *spur)
1366: {

1371:   PetscUseMethod(eps,"EPSCISSGetThreshold_C",(EPS,PetscReal*,PetscReal*),(eps,delta,spur));
1372:   return(0);
1373: }

1375: static PetscErrorCode EPSCISSSetRefinement_CISS(EPS eps,PetscInt inner,PetscInt blsize)
1376: {
1377:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1380:   if (inner == PETSC_DEFAULT) {
1381:     ctx->refine_inner = 0;
1382:   } else {
1383:     if (inner<0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The refine inner argument must be >= 0");
1384:     ctx->refine_inner = inner;
1385:   }
1386:   if (blsize == PETSC_DEFAULT) {
1387:     ctx->refine_blocksize = 0;
1388:   } else {
1389:     if (blsize<0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The refine blocksize argument must be >= 0");
1390:     ctx->refine_blocksize = blsize;
1391:   }
1392:   return(0);
1393: }

1395: /*@
1396:    EPSCISSSetRefinement - Sets the values of various refinement parameters
1397:    in the CISS solver.

1399:    Logically Collective on EPS

1401:    Input Parameters:
1402: +  eps    - the eigenproblem solver context
1403: .  inner  - number of iterative refinement iterations (inner loop)
1404: -  blsize - number of iterative refinement iterations (blocksize loop)

1406:    Options Database Keys:
1407: +  -eps_ciss_refine_inner - Sets number of inner iterations
1408: -  -eps_ciss_refine_blocksize - Sets number of blocksize iterations

1410:    Level: advanced

1412: .seealso: EPSCISSGetRefinement()
1413: @*/
1414: PetscErrorCode EPSCISSSetRefinement(EPS eps,PetscInt inner,PetscInt blsize)
1415: {

1422:   PetscTryMethod(eps,"EPSCISSSetRefinement_C",(EPS,PetscInt,PetscInt),(eps,inner,blsize));
1423:   return(0);
1424: }

1426: static PetscErrorCode EPSCISSGetRefinement_CISS(EPS eps,PetscInt *inner,PetscInt *blsize)
1427: {
1428:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1431:   if (inner)  *inner = ctx->refine_inner;
1432:   if (blsize) *blsize = ctx->refine_blocksize;
1433:   return(0);
1434: }

1436: /*@
1437:    EPSCISSGetRefinement - Gets the values of various refinement parameters
1438:    in the CISS solver.

1440:    Not Collective

1442:    Input Parameter:
1443: .  eps - the eigenproblem solver context

1445:    Output Parameters:
1446: +  inner  - number of iterative refinement iterations (inner loop)
1447: -  blsize - number of iterative refinement iterations (blocksize loop)

1449:    Level: advanced

1451: .seealso: EPSCISSSetRefinement()
1452: @*/
1453: PetscErrorCode EPSCISSGetRefinement(EPS eps, PetscInt *inner, PetscInt *blsize)
1454: {

1459:   PetscUseMethod(eps,"EPSCISSGetRefinement_C",(EPS,PetscInt*,PetscInt*),(eps,inner,blsize));
1460:   return(0);
1461: }

1463: static PetscErrorCode EPSCISSSetUseST_CISS(EPS eps,PetscBool usest)
1464: {
1465:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1468:   ctx->usest     = usest;
1469:   ctx->usest_set = PETSC_TRUE;
1470:   eps->state     = EPS_STATE_INITIAL;
1471:   return(0);
1472: }

1474: /*@
1475:    EPSCISSSetUseST - Sets a flag indicating that the CISS solver will
1476:    use the ST object for the linear solves.

1478:    Logically Collective on EPS

1480:    Input Parameters:
1481: +  eps    - the eigenproblem solver context
1482: -  usest  - boolean flag to use the ST object or not

1484:    Options Database Keys:
1485: .  -eps_ciss_usest <bool> - whether the ST object will be used or not

1487:    Level: advanced

1489: .seealso: EPSCISSGetUseST()
1490: @*/
1491: PetscErrorCode EPSCISSSetUseST(EPS eps,PetscBool usest)
1492: {

1498:   PetscTryMethod(eps,"EPSCISSSetUseST_C",(EPS,PetscBool),(eps,usest));
1499:   return(0);
1500: }

1502: static PetscErrorCode EPSCISSGetUseST_CISS(EPS eps,PetscBool *usest)
1503: {
1504:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1507:   *usest = ctx->usest;
1508:   return(0);
1509: }

1511: /*@
1512:    EPSCISSGetUseST - Gets the flag for using the ST object
1513:    in the CISS solver.

1515:    Not Collective

1517:    Input Parameter:
1518: .  eps - the eigenproblem solver context

1520:    Output Parameters:
1521: .  usest - boolean flag indicating if the ST object is being used

1523:    Level: advanced

1525: .seealso: EPSCISSSetUseST()
1526: @*/
1527: PetscErrorCode EPSCISSGetUseST(EPS eps,PetscBool *usest)
1528: {

1534:   PetscUseMethod(eps,"EPSCISSGetUseST_C",(EPS,PetscBool*),(eps,usest));
1535:   return(0);
1536: }

1538: static PetscErrorCode EPSCISSSetQuadRule_CISS(EPS eps,EPSCISSQuadRule quad)
1539: {
1540:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1543:   ctx->quad = quad;
1544:   return(0);
1545: }

1547: /*@
1548:    EPSCISSSetQuadRule - Sets the quadrature rule used in the CISS solver.

1550:    Logically Collective on EPS

1552:    Input Parameters:
1553: +  eps  - the eigenproblem solver context
1554: -  quad - the quadrature rule

1556:    Options Database Key:
1557: .  -eps_ciss_quadrule - Sets the quadrature rule (either 'trapezoidal' or
1558:                            'chebyshev')

1560:    Notes:
1561:    By default, the trapezoidal rule is used (EPS_CISS_QUADRULE_TRAPEZOIDAL).

1563:    If the 'chebyshev' option is specified (EPS_CISS_QUADRULE_CHEBYSHEV), then
1564:    Chebyshev points are used as quadrature points.

1566:    Level: advanced

1568: .seealso: EPSCISSGetQuadRule(), EPSCISSQuadRule
1569: @*/
1570: PetscErrorCode EPSCISSSetQuadRule(EPS eps,EPSCISSQuadRule quad)
1571: {

1577:   PetscTryMethod(eps,"EPSCISSSetQuadRule_C",(EPS,EPSCISSQuadRule),(eps,quad));
1578:   return(0);
1579: }

1581: static PetscErrorCode EPSCISSGetQuadRule_CISS(EPS eps,EPSCISSQuadRule *quad)
1582: {
1583:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1586:   *quad = ctx->quad;
1587:   return(0);
1588: }

1590: /*@
1591:    EPSCISSGetQuadRule - Gets the quadrature rule used in the CISS solver.

1593:    Not Collective

1595:    Input Parameter:
1596: .  eps - the eigenproblem solver context

1598:    Output Parameters:
1599: .  quad - quadrature rule

1601:    Level: advanced

1603: .seealso: EPSCISSSetQuadRule() EPSCISSQuadRule
1604: @*/
1605: PetscErrorCode EPSCISSGetQuadRule(EPS eps, EPSCISSQuadRule *quad)
1606: {

1612:   PetscUseMethod(eps,"EPSCISSGetQuadRule_C",(EPS,EPSCISSQuadRule*),(eps,quad));
1613:   return(0);
1614: }

1616: static PetscErrorCode EPSCISSSetExtraction_CISS(EPS eps,EPSCISSExtraction extraction)
1617: {
1618:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1621:   ctx->extraction = extraction;
1622:   return(0);
1623: }

1625: /*@
1626:    EPSCISSSetExtraction - Sets the extraction technique used in the CISS solver.

1628:    Logically Collective on EPS

1630:    Input Parameters:
1631: +  eps        - the eigenproblem solver context
1632: -  extraction - the extraction technique

1634:    Options Database Key:
1635: .  -eps_ciss_extraction - Sets the extraction technique (either 'ritz' or
1636:                            'hankel')

1638:    Notes:
1639:    By default, the Rayleigh-Ritz extraction is used (EPS_CISS_EXTRACTION_RITZ).

1641:    If the 'hankel' option is specified (EPS_CISS_EXTRACTION_HANKEL), then
1642:    the Block Hankel method is used for extracting eigenpairs.

1644:    Level: advanced

1646: .seealso: EPSCISSGetExtraction(), EPSCISSExtraction
1647: @*/
1648: PetscErrorCode EPSCISSSetExtraction(EPS eps,EPSCISSExtraction extraction)
1649: {

1655:   PetscTryMethod(eps,"EPSCISSSetExtraction_C",(EPS,EPSCISSExtraction),(eps,extraction));
1656:   return(0);
1657: }

1659: static PetscErrorCode EPSCISSGetExtraction_CISS(EPS eps,EPSCISSExtraction *extraction)
1660: {
1661:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1664:   *extraction = ctx->extraction;
1665:   return(0);
1666: }

1668: /*@
1669:    EPSCISSGetExtraction - Gets the extraction technique used in the CISS solver.

1671:    Not Collective

1673:    Input Parameter:
1674: .  eps - the eigenproblem solver context

1676:    Output Parameters:
1677: +  extraction - extraction technique

1679:    Level: advanced

1681: .seealso: EPSCISSSetExtraction() EPSCISSExtraction
1682: @*/
1683: PetscErrorCode EPSCISSGetExtraction(EPS eps,EPSCISSExtraction *extraction)
1684: {

1690:   PetscUseMethod(eps,"EPSCISSGetExtraction_C",(EPS,EPSCISSExtraction*),(eps,extraction));
1691:   return(0);
1692: }

1694: PetscErrorCode EPSReset_CISS(EPS eps)
1695: {
1697:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
1698:   PetscInt       i;

1701:   BVDestroy(&ctx->S);
1702:   BVDestroy(&ctx->V);
1703:   BVDestroy(&ctx->Y);
1704:   if (!ctx->usest) {
1705:     for (i=0;i<ctx->num_solve_point;i++) {
1706:       KSPDestroy(&ctx->ksp[i]);
1707:     }
1708:     PetscFree(ctx->ksp);
1709:   }
1710:   VecScatterDestroy(&ctx->scatterin);
1711:   VecDestroy(&ctx->xsub);
1712:   VecDestroy(&ctx->xdup);
1713:   if (ctx->pA) {
1714:     MatDestroy(&ctx->pA);
1715:     MatDestroy(&ctx->pB);
1716:     BVDestroy(&ctx->pV);
1717:   }
1718:   return(0);
1719: }

1721: PetscErrorCode EPSSetFromOptions_CISS(PetscOptionItems *PetscOptionsObject,EPS eps)
1722: {
1723:   PetscErrorCode    ierr;
1724:   PetscReal         r3,r4;
1725:   PetscInt          i1,i2,i3,i4,i5,i6,i7;
1726:   PetscBool         b1,b2,flg;
1727:   EPS_CISS          *ctx = (EPS_CISS*)eps->data;
1728:   EPSCISSQuadRule   quad;
1729:   EPSCISSExtraction extraction;

1732:   PetscOptionsHead(PetscOptionsObject,"EPS CISS Options");

1734:     EPSCISSGetSizes(eps,&i1,&i2,&i3,&i4,&i5,&b1);
1735:     PetscOptionsInt("-eps_ciss_integration_points","Number of integration points","EPSCISSSetSizes",i1,&i1,NULL);
1736:     PetscOptionsInt("-eps_ciss_blocksize","Block size","EPSCISSSetSizes",i2,&i2,NULL);
1737:     PetscOptionsInt("-eps_ciss_moments","Moment size","EPSCISSSetSizes",i3,&i3,NULL);
1738:     PetscOptionsInt("-eps_ciss_partitions","Number of partitions","EPSCISSSetSizes",i4,&i4,NULL);
1739:     PetscOptionsInt("-eps_ciss_maxblocksize","Maximum block size","EPSCISSSetSizes",i5,&i5,NULL);
1740:     PetscOptionsBool("-eps_ciss_realmats","True if A and B are real","EPSCISSSetSizes",b1,&b1,NULL);
1741:     EPSCISSSetSizes(eps,i1,i2,i3,i4,i5,b1);

1743:     EPSCISSGetThreshold(eps,&r3,&r4);
1744:     PetscOptionsReal("-eps_ciss_delta","Threshold for numerical rank","EPSCISSSetThreshold",r3,&r3,NULL);
1745:     PetscOptionsReal("-eps_ciss_spurious_threshold","Threshold for the spurious eigenpairs","EPSCISSSetThreshold",r4,&r4,NULL);
1746:     EPSCISSSetThreshold(eps,r3,r4);

1748:     EPSCISSGetRefinement(eps,&i6,&i7);
1749:     PetscOptionsInt("-eps_ciss_refine_inner","Number of inner iterative refinement iterations","EPSCISSSetRefinement",i6,&i6,NULL);
1750:     PetscOptionsInt("-eps_ciss_refine_blocksize","Number of blocksize iterative refinement iterations","EPSCISSSetRefinement",i7,&i7,NULL);
1751:     EPSCISSSetRefinement(eps,i6,i7);

1753:     EPSCISSGetUseST(eps,&b2);
1754:     PetscOptionsBool("-eps_ciss_usest","Use ST for linear solves","EPSCISSSetUseST",b2,&b2,&flg);
1755:     if (flg) { EPSCISSSetUseST(eps,b2); }

1757:     PetscOptionsEnum("-eps_ciss_quadrule","Quadrature rule","EPSCISSSetQuadRule",EPSCISSQuadRules,(PetscEnum)ctx->quad,(PetscEnum*)&quad,&flg);
1758:     if (flg) { EPSCISSSetQuadRule(eps,quad); }

1760:     PetscOptionsEnum("-eps_ciss_extraction","Extraction technique","EPSCISSSetExtraction",EPSCISSExtractions,(PetscEnum)ctx->extraction,(PetscEnum*)&extraction,&flg);
1761:     if (flg) { EPSCISSSetExtraction(eps,extraction); }

1763:   PetscOptionsTail();
1764:   return(0);
1765: }

1767: PetscErrorCode EPSDestroy_CISS(EPS eps)
1768: {
1770:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;

1773:   PetscSubcommDestroy(&ctx->subcomm);
1774:   PetscFree4(ctx->weight,ctx->omega,ctx->pp,ctx->sigma);
1775:   PetscFree(eps->data);
1776:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetSizes_C",NULL);
1777:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetSizes_C",NULL);
1778:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetThreshold_C",NULL);
1779:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetThreshold_C",NULL);
1780:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetRefinement_C",NULL);
1781:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetRefinement_C",NULL);
1782:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetUseST_C",NULL);
1783:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetUseST_C",NULL);
1784:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetQuadRule_C",NULL);
1785:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetQuadRule_C",NULL);
1786:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetExtraction_C",NULL);
1787:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetExtraction_C",NULL);
1788:   return(0);
1789: }

1791: PetscErrorCode EPSView_CISS(EPS eps,PetscViewer viewer)
1792: {
1794:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
1795:   PetscBool      isascii;

1798:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
1799:   if (isascii) {
1800:     PetscViewerASCIIPrintf(viewer,"  sizes { integration points: %D, block size: %D, moment size: %D, partitions: %D, maximum block size: %D }\n",ctx->N,ctx->L,ctx->M,ctx->num_subcomm,ctx->L_max);
1801:     if (ctx->isreal) {
1802:       PetscViewerASCIIPrintf(viewer,"  exploiting symmetry of integration points\n");
1803:     }
1804:     PetscViewerASCIIPrintf(viewer,"  threshold { delta: %g, spurious threshold: %g }\n",(double)ctx->delta,(double)ctx->spurious_threshold);
1805:     PetscViewerASCIIPrintf(viewer,"  iterative refinement  { inner: %D, blocksize: %D }\n",ctx->refine_inner, ctx->refine_blocksize);
1806:     if (ctx->usest) {
1807:       PetscViewerASCIIPrintf(viewer,"  using ST for linear solves\n");
1808:     }
1809:     PetscViewerASCIIPrintf(viewer,"  extraction: %s\n",EPSCISSExtractions[ctx->extraction]);
1810:     PetscViewerASCIIPrintf(viewer,"  quadrature rule: %s\n",EPSCISSQuadRules[ctx->quad]);
1811:     PetscViewerASCIIPushTab(viewer);

1813:     if (!ctx->usest && ctx->ksp[0]) { KSPView(ctx->ksp[0],viewer); }
1814:     PetscViewerASCIIPopTab(viewer);
1815:   }
1816:   return(0);
1817: }

1819: PetscErrorCode EPSSetDefaultST_CISS(EPS eps)
1820: {
1822:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
1823:   PetscBool      usest = ctx->usest;

1826:   if (!((PetscObject)eps->st)->type_name) {
1827:     if (!ctx->usest_set) usest = (ctx->num_subcomm>1)? PETSC_FALSE: PETSC_TRUE;
1828:     if (usest) {
1829:       STSetType(eps->st,STSINVERT);
1830:     } else {
1831:       /* we are not going to use ST, so avoid factorizing the matrix */
1832:       STSetType(eps->st,STSHIFT);
1833:     }
1834:   }
1835:   return(0);
1836: }

1838: PETSC_EXTERN PetscErrorCode EPSCreate_CISS(EPS eps)
1839: {
1841:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;

1844:   PetscNewLog(eps,&ctx);
1845:   eps->data = ctx;

1847:   eps->useds = PETSC_TRUE;
1848:   eps->categ = EPS_CATEGORY_CONTOUR;

1850:   eps->ops->solve          = EPSSolve_CISS;
1851:   eps->ops->setup          = EPSSetUp_CISS;
1852:   eps->ops->setfromoptions = EPSSetFromOptions_CISS;
1853:   eps->ops->destroy        = EPSDestroy_CISS;
1854:   eps->ops->reset          = EPSReset_CISS;
1855:   eps->ops->view           = EPSView_CISS;
1856:   eps->ops->computevectors = EPSComputeVectors_Schur;
1857:   eps->ops->setdefaultst   = EPSSetDefaultST_CISS;

1859:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetSizes_C",EPSCISSSetSizes_CISS);
1860:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetSizes_C",EPSCISSGetSizes_CISS);
1861:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetThreshold_C",EPSCISSSetThreshold_CISS);
1862:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetThreshold_C",EPSCISSGetThreshold_CISS);
1863:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetRefinement_C",EPSCISSSetRefinement_CISS);
1864:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetRefinement_C",EPSCISSGetRefinement_CISS);
1865:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetUseST_C",EPSCISSSetUseST_CISS);
1866:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetUseST_C",EPSCISSGetUseST_CISS);
1867:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetQuadRule_C",EPSCISSSetQuadRule_CISS);
1868:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetQuadRule_C",EPSCISSGetQuadRule_CISS);
1869:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetExtraction_C",EPSCISSSetExtraction_CISS);
1870:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetExtraction_C",EPSCISSGetExtraction_CISS);
1871:   /* set default values of parameters */
1872:   ctx->N                  = 32;
1873:   ctx->L                  = 16;
1874:   ctx->M                  = ctx->N/4;
1875:   ctx->delta              = 1e-12;
1876:   ctx->L_max              = 64;
1877:   ctx->spurious_threshold = 1e-4;
1878:   ctx->usest              = PETSC_TRUE;
1879:   ctx->usest_set          = PETSC_FALSE;
1880:   ctx->isreal             = PETSC_FALSE;
1881:   ctx->refine_inner       = 0;
1882:   ctx->refine_blocksize   = 0;
1883:   ctx->num_subcomm        = 1;
1884:   ctx->quad               = (EPSCISSQuadRule)0;
1885:   ctx->extraction         = EPS_CISS_EXTRACTION_RITZ;
1886:   return(0);
1887: }