Actual source code: stoar.c

slepc-3.8.0 2017-10-20
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2017, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc polynomial eigensolver: "stoar"

 13:    Method: S-TOAR

 15:    Algorithm:

 17:        Symmetric Two-Level Orthogonal Arnoldi.

 19:    References:

 21:        [1] C. Campos and J.E. Roman, "Restarted Q-Arnoldi-type methods
 22:            exploiting symmetry in quadratic eigenvalue problems", BIT
 23:            Numer. Math. (in press), 2016.
 24: */

 26: #include <slepc/private/pepimpl.h>         /*I "slepcpep.h" I*/
 27: #include "../src/pep/impls/krylov/pepkrylov.h"
 28: #include <slepcblaslapack.h>

 30: static PetscBool  cited = PETSC_FALSE;
 31: static const char citation[] =
 32:   "@Article{slepc-stoar,\n"
 33:   "   author = \"C. Campos and J. E. Roman\",\n"
 34:   "   title = \"Restarted {Q-Arnoldi-type} methods exploiting symmetry in quadratic eigenvalue problems\",\n"
 35:   "   journal = \"{BIT} Numer. Math.\",\n"
 36:   "   volume = \"56\",\n"
 37:   "   number = \"4\",\n"
 38:   "   pages = \"1213--1236\",\n"
 39:   "   year = \"2016,\"\n"
 40:   "   doi = \"https://doi.org/10.1007/s10543-016-0601-5\"\n"
 41:   "}\n";

 43: /*
 44:   Compute B-norm of v=[v1;v2] whith  B=diag(-pep->T[0],pep->T[2])
 45: */
 46: static PetscErrorCode PEPSTOARNorm(PEP pep,PetscInt j,PetscReal *norm)
 47: {
 49:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
 50:   PetscBLASInt   n_,one=1,ld_;
 51:   PetscScalar    sone=1.0,szero=0.0,*sp,*sq,*w1,*w2,*qK,*qM;
 52:   PetscInt       n,i,lds=ctx->d*ctx->ld;

 55:   qK = ctx->qB;
 56:   qM = ctx->qB+ctx->ld*ctx->ld;
 57:   n = j+2;
 58:   PetscMalloc2(n,&w1,n,&w2);
 59:   sp = ctx->S+lds*j;
 60:   sq = sp+ctx->ld;
 61:   PetscBLASIntCast(n,&n_);
 62:   PetscBLASIntCast(ctx->ld,&ld_);
 63:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qK,&ld_,sp,&one,&szero,w1,&one));
 64:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qM,&ld_,sq,&one,&szero,w2,&one));
 65:   *norm = 0.0;
 66:   for (i=0;i<n;i++) *norm += PetscRealPart(w1[i]*PetscConj(sp[i])+w2[i]*PetscConj(sq[i]));
 67:   *norm = (*norm>0.0)?PetscSqrtReal(*norm):-PetscSqrtReal(-*norm);
 68:   PetscFree2(w1,w2);
 69:   return(0);
 70: }

 72: static PetscErrorCode PEPSTOARqKqMupdates(PEP pep,PetscInt j,Vec *wv)
 73: {
 75:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
 76:   PetscInt       i,ld=ctx->ld;
 77:   PetscScalar    *qK,*qM;
 78:   Vec            vj,v1,v2;

 81:   qK = ctx->qB;
 82:   qM = ctx->qB+ctx->ld*ctx->ld;
 83:   v1 = wv[0];
 84:   v2 = wv[1];
 85:   BVGetColumn(pep->V,j,&vj);
 86:   STMatMult(pep->st,0,vj,v1);
 87:   STMatMult(pep->st,2,vj,v2);
 88:   BVRestoreColumn(pep->V,j,&vj);
 89:   for (i=0;i<=j;i++) {
 90:     BVGetColumn(pep->V,i,&vj);
 91:     VecDot(v1,vj,qK+j*ld+i);
 92:     VecDot(v2,vj,qM+j*ld+i);
 93:     *(qM+j*ld+i) *= pep->sfactor*pep->sfactor;
 94:     BVRestoreColumn(pep->V,i,&vj);
 95:   }
 96:   for (i=0;i<j;i++) {
 97:     qK[i+j*ld] = -qK[i+ld*j];
 98:     qK[j+i*ld] = PetscConj(qK[i+j*ld]);
 99:     qM[j+i*ld] = PetscConj(qM[i+j*ld]);
100:   }
101:   qK[j+j*ld] = -PetscRealPart(qK[j+ld*j]);
102:   qM[j+j*ld] = PetscRealPart(qM[j+ld*j]);
103:   return(0);
104: }

106: PetscErrorCode PEPSetUp_STOAR(PEP pep)
107: {
109:   PetscBool      shift,sinv,flg;
110:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
111:   PetscInt       ld;

114:   pep->lineariz = PETSC_TRUE;
115:   PEPSetDimensions_Default(pep,pep->nev,&pep->ncv,&pep->mpd);
116:   if (!ctx->lock && pep->mpd<pep->ncv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Should not use mpd parameter in non-locking variant");
117:   if (!pep->max_it) pep->max_it = PetscMax(100,2*(pep->nmat-1)*pep->n/pep->ncv);

119:   PetscObjectTypeCompare((PetscObject)pep->st,STSHIFT,&shift);
120:   PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
121:   if (!shift && !sinv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Only STSHIFT and STSINVERT spectral transformations can be used");
122:   if (!pep->which) {
123:     if (sinv) pep->which = PEP_TARGET_MAGNITUDE;
124:     else pep->which = PEP_LARGEST_MAGNITUDE;
125:   }
126:   if (pep->problem_type!=PEP_HERMITIAN) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Requested method is only available for Hermitian problems");

128:   if (pep->nmat!=3) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Solver only available for quadratic problems");
129:   if (pep->basis!=PEP_BASIS_MONOMIAL) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Solver not implemented for non-monomial bases");
130:   STGetTransform(pep->st,&flg);
131:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Solver requires the ST transformation flag set, see STSetTransform()");

133:   PEPAllocateSolution(pep,2);
134:   PEPSetWorkVecs(pep,4);
135:   ld = pep->ncv+2;
136:   DSSetType(pep->ds,DSGHIEP);
137:   DSSetCompact(pep->ds,PETSC_TRUE);
138:   DSAllocate(pep->ds,ld);
139:   STGetNumMatrices(pep->st,&ctx->d);
140:   ctx->d--;
141:   ctx->ld = ld;
142:   if (ctx->S) { PetscFree2(ctx->S,ctx->qB); }
143:   PetscCalloc2(ctx->d*ld*ld,&ctx->S,2*ld*ld,&ctx->qB);
144:   return(0);
145: }

147: /*
148:   Computes GS orthogonalization  x = [z;x] - [Sp;Sq]*y,
149:   where y = Omega\([Sp;Sq]'*[qK zeros(size(qK,1)) ;zeros(size(qK,1)) qM]*[z;x]).
150:   n: Column from S to be orthogonalized against previous columns.
151: */
152: static PetscErrorCode PEPSTOAROrth2(PEP pep,PetscInt k,PetscReal *Omega,PetscScalar *y)
153: {
155:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
156:   PetscBLASInt   n_,lds_,k_,one=1,ld_;
157:   PetscScalar    *S=ctx->S,sonem=-1.0,sone=1.0,szero=0.0,*tp,*tq,*xp,*xq,*c,*qK,*qM;
158:   PetscInt       i,lds=ctx->d*ctx->ld,n,j;

161:   qK = ctx->qB;
162:   qM = ctx->qB+ctx->ld*ctx->ld;
163:   n = k+2;
164:   PetscMalloc3(n,&tp,n,&tq,k,&c);
165:   PetscBLASIntCast(n,&n_); /* Size of qK and qM */
166:   PetscBLASIntCast(ctx->ld,&ld_);
167:   PetscBLASIntCast(lds,&lds_);
168:   PetscBLASIntCast(k,&k_); /* Number of vectors to orthogonalize against */
169:   xp = S+k*lds;
170:   xq = S+ctx->ld+k*lds;
171:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qK,&ld_,xp,&one,&szero,tp,&one));
172:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qM,&ld_,xq,&one,&szero,tq,&one));
173:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,ctx->S,&lds_,tp,&one,&szero,y,&one));
174:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+ctx->ld,&lds_,tq,&one,&sone,y,&one));
175:   for (i=0;i<k;i++) y[i] /= Omega[i];
176:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S,&lds_,y,&one,&sone,xp,&one));
177:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+ctx->ld,&lds_,y,&one,&sone,xq,&one));
178:   /* three times */
179:   for (j=0;j<2;j++) {
180:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qK,&ld_,xp,&one,&szero,tp,&one));
181:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,qM,&ld_,xq,&one,&szero,tq,&one));
182:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,ctx->S,&lds_,tp,&one,&szero,c,&one));
183:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+ctx->ld,&lds_,tq,&one,&sone,c,&one));
184:     for (i=0;i<k;i++) c[i] /= Omega[i];
185:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S,&lds_,c,&one,&sone,xp,&one));
186:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+ctx->ld,&lds_,c,&one,&sone,xq,&one));
187:     for (i=0;i<k;i++) y[i] += c[i];
188:   }
189:   PetscFree3(tp,tq,c);
190:   return(0);
191: }

193: /*
194:   Compute a run of Lanczos iterations. dim(work)=(ctx->ld)*4
195: */
196: static PetscErrorCode PEPSTOARrun(PEP pep,PetscReal *a,PetscReal *b,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscScalar *work,Vec *t_)
197: {
199:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
200:   PetscInt       i,j,m=*M,l;
201:   PetscInt       lds=ctx->d*ctx->ld,offq=ctx->ld;
202:   Vec            v=t_[0],t=t_[1],q=t_[2];
203:   PetscReal      norm,sym=0.0,fro=0.0,*f;
204:   PetscScalar    *y,*S=ctx->S;
205:   PetscBLASInt   j_,one=1;
206:   PetscBool      lindep;

209:   *breakdown = PETSC_FALSE; /* ----- */
210:   DSGetDimensions(pep->ds,NULL,NULL,&l,NULL,NULL);
211:   y = work;
212:   for (j=k;j<m;j++) {
213:     /* apply operator */
214:     BVSetActiveColumns(pep->V,0,j+2);
215:     BVMultVec(pep->V,1.0,0.0,v,S+j*lds);
216:     STMatMult(pep->st,0,v,t);
217:     BVMultVec(pep->V,1.0,0.0,v,S+offq+j*lds);
218:     STMatMult(pep->st,1,v,q);
219:     VecAXPY(t,pep->sfactor,q);
220:     STMatSolve(pep->st,t,q);
221:     VecScale(q,-1.0/(pep->sfactor*pep->sfactor));

223:     /* orthogonalize */
224:     BVOrthogonalizeVec(pep->V,q,S+offq+(j+1)*lds,&norm,&lindep);
225:     if (lindep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STOAR does not support detection of linearly dependent TOAR vectors");
226:     *(S+offq+(j+1)*lds+j+2) = norm;
227:     VecScale(q,1.0/norm);
228:     BVInsertVec(pep->V,j+2,q);
229:     for (i=0;i<=j+1;i++) *(S+(j+1)*lds+i) = *(S+offq+j*lds+i);

231:     /* update qK and qM */
232:     PEPSTOARqKqMupdates(pep,j+2,t_);

234:     /* level-2 orthogonalization */
235:     PEPSTOAROrth2(pep,j+1,omega,y);
236:     a[j] = PetscRealPart(y[j])/omega[j];
237:     PEPSTOARNorm(pep,j+1,&norm);
238:     omega[j+1] = (norm > 0)?1.0:-1.0;
239:     for (i=0;i<=j+2;i++) {
240:       S[i+(j+1)*lds] /= norm;
241:       S[i+offq+(j+1)*lds] /= norm;
242:     }
243:     b[j] = PetscAbsReal(norm);

245:     /* check symmetry */
246:     DSGetArrayReal(pep->ds,DS_MAT_T,&f);
247:     if (j==k) {
248:       for (i=l;i<j-1;i++) y[i] = PetscAbsScalar(y[i])-PetscAbsReal(f[2*ctx->ld+i]);
249:       for (i=0;i<l;i++) y[i] = 0.0;
250:     }
251:     DSRestoreArrayReal(pep->ds,DS_MAT_T,&f);
252:     if (j>0) y[j-1] = PetscAbsScalar(y[j-1])-PetscAbsReal(b[j-1]);
253:     PetscBLASIntCast(j,&j_);
254:     sym = SlepcAbs(BLASnrm2_(&j_,y,&one),sym);
255:     fro = SlepcAbs(fro,SlepcAbs(a[j],b[j]));
256:     if (j>0) fro = SlepcAbs(fro,b[j-1]);
257:     if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*pep->tol)) {
258:       *symmlost = PETSC_TRUE;
259:       *M=j+1;
260:       break;
261:     }
262:   }
263:   return(0);
264: }

266: static PetscErrorCode PEPSTOARTrunc(PEP pep,PetscInt rs1,PetscInt cs1,PetscScalar *work,PetscReal *rwork)
267: {
268: #if defined(PETSC_MISSING_LAPACK_GESVD)
270:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
271: #else
273:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
274:   Mat            G;
275:   PetscInt       lwa,nwu=0,nrwu=0;
276:   PetscInt       i,n,lds=2*ctx->ld;
277:   PetscScalar    *M,*V,*U,*S=ctx->S,sone=1.0,zero=0.0,t,*qK,*qM;
278:   PetscReal      *sg;
279:   PetscBLASInt   cs1_,rs1_,cs1t2,cs1p1,n_,info,lw_,lds_,ld_;

282:   qK = ctx->qB;
283:   qM = ctx->qB+ctx->ld*ctx->ld;
284:   n = (rs1>2*cs1)?2*cs1:rs1;
285:   lwa = cs1*rs1*4+n*(rs1+2*cs1)+(cs1+1)*(cs1+2);
286:   M = work+nwu;
287:   nwu += rs1*cs1*2;
288:   U = work+nwu;
289:   nwu += rs1*n;
290:   V = work+nwu;
291:   nwu += 2*cs1*n;
292:   sg = rwork+nrwu;
293:   nrwu += n;
294:   for (i=0;i<cs1;i++) {
295:     PetscMemcpy(M+i*rs1,S+i*lds,rs1*sizeof(PetscScalar));
296:     PetscMemcpy(M+(i+cs1)*rs1,S+i*lds+ctx->ld,rs1*sizeof(PetscScalar));
297:   }
298:   PetscBLASIntCast(n,&n_);
299:   PetscBLASIntCast(cs1,&cs1_);
300:   PetscBLASIntCast(rs1,&rs1_);
301:   PetscBLASIntCast(cs1*2,&cs1t2);
302:   PetscBLASIntCast(cs1+1,&cs1p1);
303:   PetscBLASIntCast(lds,&lds_);
304:   PetscBLASIntCast(ctx->ld,&ld_);
305:   PetscBLASIntCast(lwa-nwu,&lw_);
306: #if !defined(PETSC_USE_COMPLEX)
307:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&rs1_,&cs1t2,M,&rs1_,sg,U,&rs1_,V,&n_,work+nwu,&lw_,&info));
308: #else
309:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&rs1_,&cs1t2,M,&rs1_,sg,U,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));
310: #endif
311:   SlepcCheckLapackInfo("gesvd",info);

313:   /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
314:   MatCreateSeqDense(PETSC_COMM_SELF,rs1,2*cs1,U,&G);
315:   BVSetActiveColumns(pep->V,0,rs1);
316:   BVMultInPlace(pep->V,G,0,cs1+1);
317:   MatDestroy(&G);

319:   /* Update S */
320:   PetscMemzero(S,lds*ctx->ld*sizeof(PetscScalar));

322:   for (i=0;i<cs1+1;i++) {
323:     t = sg[i];
324:     PetscStackCallBLAS("BLASscal",BLASscal_(&cs1t2,&t,V+i,&n_));
325:   }
326:   for (i=0;i<cs1;i++) {
327:     PetscMemcpy(S+i*lds,V+i*n,(cs1+1)*sizeof(PetscScalar));
328:     PetscMemcpy(S+ctx->ld+i*lds,V+(cs1+i)*n,(cs1+1)*sizeof(PetscScalar));
329:   }

331:   /* Update qM and qK */
332:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&cs1p1,&rs1_,&sone,qK,&ld_,U,&rs1_,&zero,work+nwu,&rs1_));
333:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&cs1p1,&cs1p1,&rs1_,&sone,U,&rs1_,work+nwu,&rs1_,&zero,qK,&ld_));
334:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&cs1p1,&rs1_,&sone,qM,&ld_,U,&rs1_,&zero,work+nwu,&rs1_));
335:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&cs1p1,&cs1p1,&rs1_,&sone,U,&rs1_,work+nwu,&rs1_,&zero,qM,&ld_));
336:   return(0);
337: #endif
338: }

340: /*
341:   S <- S*Q
342:   columns s-s+ncu of S
343:   rows 0-sr of S
344:   size(Q) qr x ncu
345:   dim(work)=sr*ncu;
346: */
347: static PetscErrorCode PEPSTOARSupdate(PetscScalar *S,PetscInt ld,PetscInt sr,PetscInt s,PetscInt ncu,PetscInt qr,PetscScalar *Q,PetscInt ldq,PetscScalar *work)
348: {
350:   PetscScalar    a=1.0,b=0.0;
351:   PetscBLASInt   sr_,ncu_,ldq_,lds_,qr_;
352:   PetscInt       j,lds=2*ld;

355:   PetscBLASIntCast(sr,&sr_);
356:   PetscBLASIntCast(qr,&qr_);
357:   PetscBLASIntCast(ncu,&ncu_);
358:   PetscBLASIntCast(lds,&lds_);
359:   PetscBLASIntCast(ldq,&ldq_);
360:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S,&lds_,Q,&ldq_,&b,work,&sr_));
361:   for (j=0;j<ncu;j++) {
362:     PetscMemcpy(S+lds*(s+j),work+j*sr,sr*sizeof(PetscScalar));
363:   }
364:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S+ld,&lds_,Q,&ldq_,&b,work,&sr_));
365:   for (j=0;j<ncu;j++) {
366:     PetscMemcpy(S+lds*(s+j)+ld,work+j*sr,sr*sizeof(PetscScalar));
367:   }
368:   return(0);
369: }

371: #if 0
372: static PetscErrorCode PEPSTOARpreKConvergence(PEP pep,PetscInt nv,PetscReal *norm,Vec *w)
373: {
375:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
376:   PetscBLASInt   n_,one=1;
377:   PetscInt       lds=2*ctx->ld;
378:   PetscReal      t1,t2;
379:   PetscScalar    *S=ctx->S;

382:   PetscBLASIntCast(nv+2,&n_);
383:   t1 = BLASnrm2_(&n_,S+nv*2*ctx->ld,&one);
384:   t2 = BLASnrm2_(&n_,S+(nv*2+1)*ctx->ld,&one);
385:   *norm = SlepcAbs(t1,t2);
386:   BVSetActiveColumns(pep->V,0,nv+2);
387:   BVMultVec(pep->V,1.0,0.0,w[1],S+nv*lds);
388:   STMatMult(pep->st,0,w[1],w[2]);
389:   VecNorm(w[2],NORM_2,&t1);
390:   BVMultVec(pep->V,1.0,0.0,w[1],S+ctx->ld+nv*lds);
391:   STMatMult(pep->st,2,w[1],w[2]);
392:   VecNorm(w[2],NORM_2,&t2);
393:   t2 *= pep->sfactor*pep->sfactor;
394:   *norm = PetscMax(*norm,SlepcAbs(t1,t2));
395:   return(0);
396: }
397: #endif

399: PetscErrorCode PEPSolve_STOAR(PEP pep)
400: {
402:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
403:   PetscInt       j,k,l,nv=0,ld=ctx->ld,lds=ctx->d*ctx->ld,off,ldds,t,nq=0;
404:   PetscInt       lwa,lrwa,nwu=0,nrwu=0,nconv=0;
405:   PetscScalar    *S=ctx->S,*Q,*work;
406:   PetscReal      beta,norm=1.0,*omega,*a,*b,*r,*rwork;
407:   PetscBool      breakdown,symmlost=PETSC_FALSE,sinv;

410:   PetscCitationsRegister(citation,&cited);
411:   BVSetMatrix(pep->V,NULL,PETSC_FALSE);
412:   lwa = 9*ld*ld+5*ld;
413:   lrwa = 8*ld;
414:   PetscMalloc2(lwa,&work,lrwa,&rwork); /* REVIEW */
415:   PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
416:   RGPushScale(pep->rg,sinv?pep->sfactor:1.0/pep->sfactor);
417:   STScaleShift(pep->st,sinv?pep->sfactor:1.0/pep->sfactor);

419:   /* Get the starting Arnoldi vector */
420:   for (j=0;j<ctx->d;j++) {
421:     if (j>=pep->nini) {
422:       BVSetRandomColumn(pep->V,nq);
423:     }else {
424:       BVCopyColumn(pep->V,j,nq);
425:     }
426:     BVOrthogonalizeColumn(pep->V,nq,ctx->S+j*ctx->ld,&norm,&breakdown);
427:     if (!breakdown) {
428:       BVScaleColumn(pep->V,nq,1.0/norm);
429:       ctx->S[nq+j*ctx->ld] = norm;
430:       PEPSTOARqKqMupdates(pep,nq,pep->work);
431:       nq++;
432:     }
433:   }
434:   if (nq<2) SETERRQ(PetscObjectComm((PetscObject)pep),1,"PEP: Problem with initial vector");
435:   PEPSTOARNorm(pep,0,&norm);
436:   for (j=0;j<2;j++) { ctx->S[j+ld] /= norm; ctx->S[j] /= norm; }
437:   DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
438:   omega[0] = (norm>0)?1.0:-1.0;
439:   DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);

441:   /* Restart loop */
442:   l = 0;
443:   DSGetLeadingDimension(pep->ds,&ldds);
444:   while (pep->reason == PEP_CONVERGED_ITERATING) {
445:     pep->its++;
446:     DSGetArrayReal(pep->ds,DS_MAT_T,&a);
447:     b = a+ldds;
448:     DSGetArrayReal(pep->ds,DS_MAT_D,&omega);

450:     /* Compute an nv-step Lanczos factorization */
451:     nv = PetscMin(pep->nconv+pep->mpd,pep->ncv);
452:     PEPSTOARrun(pep,a,b,omega,pep->nconv+l,&nv,&breakdown,&symmlost,work+nwu,pep->work);
453:     beta = b[nv-1];
454:     if (symmlost) {
455:       pep->reason = PEP_DIVERGED_SYMMETRY_LOST;
456:       if (nv==pep->nconv+l+1) { pep->nconv = nconv; break; }
457:     }
458:     DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
459:     DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
460:     DSSetDimensions(pep->ds,nv,0,pep->nconv,pep->nconv+l);
461:     if (l==0) {
462:       DSSetState(pep->ds,DS_STATE_INTERMEDIATE);
463:     } else {
464:       DSSetState(pep->ds,DS_STATE_RAW);
465:     }

467:     /* Solve projected problem */
468:     DSSolve(pep->ds,pep->eigr,pep->eigi);
469:     DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
470:     DSSynchronize(pep->ds,pep->eigr,pep->eigi);

472:     /* Check convergence */
473:     /* PEPSTOARpreKConvergence(pep,nv,&norm,pep->work);*/
474:     norm = 1.0;
475:     DSGetDimensions(pep->ds,NULL,NULL,NULL,NULL,&t);
476:     PEPKrylovConvergence(pep,PETSC_FALSE,pep->nconv,t-pep->nconv,PetscAbsReal(beta)*norm,&k);
477:     nconv = k;
478:     (*pep->stopping)(pep,pep->its,pep->max_it,k,pep->nev,&pep->reason,pep->stoppingctx);

480:     /* Update l */
481:     if (pep->reason != PEP_CONVERGED_ITERATING || breakdown) l = 0;
482:     else {
483:       l = PetscMax(1,(PetscInt)((nv-k)/2));
484:       l = PetscMin(l,t);
485:       DSGetArrayReal(pep->ds,DS_MAT_T,&a);
486:       if (*(a+ldds+k+l-1)!=0) {
487:         if (k+l<nv-1) l = l+1;
488:         else l = l-1;
489:       }
490:       DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
491:     }
492:     if (!ctx->lock && l>0) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

494:     /* Update S */
495:     off = pep->nconv*ldds;
496:     DSGetArray(pep->ds,DS_MAT_Q,&Q);
497:     PEPSTOARSupdate(S,ld,nv+2,pep->nconv,k+l-pep->nconv,nv,Q+off,ldds,work+nwu);
498:     DSRestoreArray(pep->ds,DS_MAT_Q,&Q);

500:     /* Copy last column of S */
501:     PetscMemcpy(S+lds*(k+l),S+lds*nv,lds*sizeof(PetscScalar));

503:     if (pep->reason == PEP_CONVERGED_ITERATING) {
504:       if (breakdown) {
505:         /* Stop if breakdown */
506:         PetscInfo2(pep,"Breakdown STOAR method (it=%D norm=%g)\n",pep->its,(double)beta);
507:         pep->reason = PEP_DIVERGED_BREAKDOWN;
508:       } else {
509:         /* Prepare the Rayleigh quotient for restart */
510:         DSGetArray(pep->ds,DS_MAT_Q,&Q);
511:         DSGetArrayReal(pep->ds,DS_MAT_T,&a);
512:         DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
513:         r = a + 2*ldds;
514:         for (j=k;j<k+l;j++) {
515:           r[j] = PetscRealPart(Q[nv-1+j*ldds]*beta);
516:         }
517:         b = a+ldds;
518:         b[k+l-1] = r[k+l-1];
519:         omega[k+l] = omega[nv];
520:         DSRestoreArray(pep->ds,DS_MAT_Q,&Q);
521:         DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
522:         DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
523:         /* Truncate S */
524:         DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
525:         PEPSTOARTrunc(pep,nv+2,k+l+1,work+nwu,rwork+nrwu);
526:         DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
527:       }
528:     }


531:     pep->nconv = k;
532:     PEPMonitor(pep,pep->its,pep->nconv,pep->eigr,pep->eigi,pep->errest,nv);
533:   }

535:   if (pep->nconv>0) {
536:     /* Truncate S */
537:     DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
538:     PEPSTOARTrunc(pep,nv+2,pep->nconv,work+nwu,rwork+nrwu);
539:     DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);

541:     /* Extraction */
542:     DSSetDimensions(pep->ds,pep->nconv,0,0,0);
543:     DSSetState(pep->ds,DS_STATE_RAW);

545:     for (j=0;j<pep->nconv;j++) {
546:       pep->eigr[j] *= pep->sfactor;
547:       pep->eigi[j] *= pep->sfactor;
548:     }
549:   }
550:   STScaleShift(pep->st,sinv?1.0/pep->sfactor:pep->sfactor);
551:   RGPopScale(pep->rg);

553:   /* truncate Schur decomposition and change the state to raw so that
554:      DSVectors() computes eigenvectors from scratch */
555:   DSSetDimensions(pep->ds,pep->nconv,0,0,0);
556:   DSSetState(pep->ds,DS_STATE_RAW);
557:   PetscFree2(work,rwork);
558:   return(0);
559: }

561: PetscErrorCode PEPSetFromOptions_STOAR(PetscOptionItems *PetscOptionsObject,PEP pep)
562: {
564:   PetscBool      flg,lock;

567:   PetscOptionsHead(PetscOptionsObject,"PEP STOAR Options");

569:     PetscOptionsBool("-pep_stoar_locking","Choose between locking and non-locking variants","PEPSTOARSetLocking",PETSC_FALSE,&lock,&flg);
570:     if (flg) { PEPSTOARSetLocking(pep,lock); }

572:   PetscOptionsTail();
573:   return(0);
574: }

576: static PetscErrorCode PEPSTOARSetLocking_STOAR(PEP pep,PetscBool lock)
577: {
578:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

581:   ctx->lock = lock;
582:   return(0);
583: }

585: /*@
586:    PEPSTOARSetLocking - Choose between locking and non-locking variants of
587:    the STOAR method.

589:    Logically Collective on PEP

591:    Input Parameters:
592: +  pep  - the eigenproblem solver context
593: -  lock - true if the locking variant must be selected

595:    Options Database Key:
596: .  -pep_stoar_locking - Sets the locking flag

598:    Notes:
599:    The default is to lock converged eigenpairs when the method restarts.
600:    This behaviour can be changed so that all directions are kept in the
601:    working subspace even if already converged to working accuracy (the
602:    non-locking variant).

604:    Level: advanced

606: .seealso: PEPSTOARGetLocking()
607: @*/
608: PetscErrorCode PEPSTOARSetLocking(PEP pep,PetscBool lock)
609: {

615:   PetscTryMethod(pep,"PEPSTOARSetLocking_C",(PEP,PetscBool),(pep,lock));
616:   return(0);
617: }

619: static PetscErrorCode PEPSTOARGetLocking_STOAR(PEP pep,PetscBool *lock)
620: {
621:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

624:   *lock = ctx->lock;
625:   return(0);
626: }

628: /*@
629:    PEPSTOARGetLocking - Gets the locking flag used in the STOAR method.

631:    Not Collective

633:    Input Parameter:
634: .  pep - the eigenproblem solver context

636:    Output Parameter:
637: .  lock - the locking flag

639:    Level: advanced

641: .seealso: PEPSTOARSetLocking()
642: @*/
643: PetscErrorCode PEPSTOARGetLocking(PEP pep,PetscBool *lock)
644: {

650:   PetscUseMethod(pep,"PEPSTOARGetLocking_C",(PEP,PetscBool*),(pep,lock));
651:   return(0);
652: }

654: PetscErrorCode PEPView_STOAR(PEP pep,PetscViewer viewer)
655: {
657:   PEP_TOAR      *ctx = (PEP_TOAR*)pep->data;
658:   PetscBool      isascii;

661:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
662:   if (isascii) {
663:     PetscViewerASCIIPrintf(viewer,"  using the %slocking variant\n",ctx->lock?"":"non-");
664:   }
665:   return(0);
666: }

668: PetscErrorCode PEPDestroy_STOAR(PEP pep)
669: {
671:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;

674:   PetscFree2(ctx->S,ctx->qB);
675:   PetscFree(pep->data);
676:   PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARSetLocking_C",NULL);
677:   PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARGetLocking_C",NULL);
678:   return(0);
679: }

681: PETSC_EXTERN PetscErrorCode PEPCreate_STOAR(PEP pep)
682: {
684:   PEP_TOAR      *ctx;

687:   PetscNewLog(pep,&ctx);
688:   pep->data = (void*)ctx;
689:   ctx->lock = PETSC_TRUE;

691:   pep->ops->solve          = PEPSolve_STOAR;
692:   pep->ops->setup          = PEPSetUp_STOAR;
693:   pep->ops->setfromoptions = PEPSetFromOptions_STOAR;
694:   pep->ops->destroy        = PEPDestroy_STOAR;
695:   pep->ops->view           = PEPView_STOAR;
696:   pep->ops->backtransform  = PEPBackTransform_Default;
697:   pep->ops->computevectors = PEPComputeVectors_Default;
698:   pep->ops->extractvectors = PEPExtractVectors_TOAR;
699:   pep->ops->setdefaultst   = PEPSetDefaultST_Transform;

701:   PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARSetLocking_C",PEPSTOARSetLocking_STOAR);
702:   PetscObjectComposeFunction((PetscObject)pep,"PEPSTOARGetLocking_C",PEPSTOARGetLocking_STOAR);
703:   return(0);
704: }