Actual source code: filtlan.c

slepc-3.8.0 2017-10-20
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2017, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    This file is an adaptation of several subroutines from FILTLAN, the
 12:    Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.

 14:    More information at:
 15:    http://www-users.cs.umn.edu/~saad/software/filtlan

 17:    References:

 19:        [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
 20:            eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
 21: */

 23: #include <slepc/private/stimpl.h>
 24: #include "./filter.h"

 26: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
 27: static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
 28: static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);

 30: /* ////////////////////////////////////////////////////////////////////////////
 31:    //    Newton - Hermite Polynomial Interpolation
 32:    //////////////////////////////////////////////////////////////////////////// */

 34: /*
 35:    FILTLAN function NewtonPolynomial

 37:    build P(z) by Newton's divided differences in the form
 38:        P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
 39:    such that P(x(i)) = y(i) for i=1,...,n, where
 40:        x,y are input vectors of length n, and a is the output vector of length n
 41:    if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
 42:        and the Hermite polynomial interpolation is applied
 43:    in general, if there are k x(i)'s with the same value x0, then
 44:        the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
 45: */
 46: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf)
 47: {
 49:   PetscReal      d,*sx=x,*sy=y;
 50:   PetscInt       j,k;

 53:   PetscMemcpy(sf,sy,n*sizeof(PetscReal));

 55:   /* apply Newton's finite difference method */
 56:   sa[0] = sf[0];
 57:   for (j=1;j<n;j++) {
 58:     for (k=n-1;k>=j;k--) {
 59:       d = sx[k]-sx[k-j];
 60:       if (d == 0.0) sf[k] = 0.0;  /* assume that the derivative is 0.0 and apply the Hermite interpolation */
 61:       else sf[k] = (sf[k]-sf[k-1]) / d;
 62:     }
 63:     sa[j] = sf[j];
 64:   }
 65:   return(0);
 66: }

 68: /*
 69:    FILTLAN function HermiteBaseFilterInChebyshevBasis

 71:    compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
 72:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval

 74:    The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
 75:    P_j(z) a Hermite interpolating polynomial

 77:    input:
 78:    intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
 79:    HiLowFlags determines the shape of the base filter P(z)
 80:    Consider the j-th interval [intv(j), intv(j+1)]
 81:    HighLowFlag[j-1]==1,  P(z)==1 for z in [intv(j), intv(j+1)]
 82:                    ==0,  P(z)==0 for z in [intv(j), intv(j+1)]
 83:                    ==-1, [intv(j), intv(j+1)] is a transition interval;
 84:                          P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
 85:    baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
 86:    to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
 87:    end points z=intv(j) for i=1,...,baseDeg

 89:    output:
 90:    P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
 91:    to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
 92:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
 93:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
 94:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
 95:    with T_i(z) the Chebyshev polynomial of the first kind,
 96:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
 97:    the return matrix is the matrix of Chebyshev coefficients pp just described

 99:    note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
100:    let n be the length of intv; then there are n-1 intervals
101:    therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
102: */
103: static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)
104: {
106:   PetscInt       m,ii,jj;
107:   PetscReal      flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
108:   const PetscInt *hilo = HighLowFlags;

111:   m = 2*baseDeg+2;
112:   jj = npoints-1;  /* jj is initialized as the number of intervals */
113:   PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work);
114:   sbf = baseFilter;

116:   while (jj--) {  /* the main loop to compute the Chebyshev coefficients */

118:     flag = (PetscReal)(*hilo++);  /* get the flag of the current interval */
119:     if (flag == -1.0) {  /* flag == -1 means that the current interval is a transition polynomial */

121:       flag2 = (PetscReal)(*hilo);  /* get flag2, the flag of the next interval */
122:       flag0 = 1.0-flag2;       /* the flag of the previous interval is 1-flag2 */

124:       /* two pointers for traversing x[] and y[] */
125:       sx = px;
126:       sy = py;

128:       /* find the current interval [aa,bb] */
129:       aa = *currentPoint++;
130:       bb = *currentPoint;

132:       /* now left-hand side */
133:       ii = baseDeg+1;
134:       while (ii--) {
135:         *sy++ = flag0;
136:         *sx++ = aa;
137:       }

139:       /* now right-hand side */
140:       ii = baseDeg+1;
141:       while (ii--) {
142:         *sy++ = flag2;
143:         *sx++ = bb;
144:       }

146:       /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
147:       FILTLAN_NewtonPolynomial(m,px,py,pp,work);

149:       /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
150:          P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */

152:       /* translate the Newton coefficients to the Chebyshev coefficients */
153:       FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work);
154:       /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */

156:       /* copy the Chebyshev coefficients to baseFilter
157:          OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
158:       sq = qq;
159:       ii = 2*baseDeg+2;
160:       while (ii--) *sbf++ = *sq++;

162:     } else {

164:       /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
165:        OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
166:       *sbf++ = flag;

168:       /* the other coefficients are all zero, since P(z) is a constant
169:        OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
170:       ii = 2*baseDeg+1;
171:       while (ii--) *sbf++ = 0.0;

173:       /* for the next point */
174:       currentPoint++;
175:     }
176:   }
177:   PetscFree5(px,py,pp,qq,work);
178:   return(0);
179: }

181: /* ////////////////////////////////////////////////////////////////////////////
182:    //    Base Filter
183:    //////////////////////////////////////////////////////////////////////////// */

185: /*
186:    FILTLAN function GetIntervals

188:    this routine determines the intervals (including the transition one(s)) by an iterative process

190:    frame is a vector consisting of 4 ordered elements:
191:        [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
192:        [frame(2),frame(3)] is the interval in which the eigenvalues are sought
193:    baseDeg is the left-and-right degree of the base filter for each interval
194:    polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
195:    intv is the output; the j-th interval is [intv(j),intv(j+1))
196:    opts is a collection of interval options

198:    the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
199:    at each end point of intervals

201:    the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
202:    minimized with s(z) a polynomial of degree up to polyDeg

204:    the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
205:    y in [frame[0],frame[3]] but not in [frame[1],frame[2]]

207:    the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
208: */
209: static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)
210: {
211:   PetscErrorCode  ierr;
212:   PetscReal       intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
213:   PetscReal       yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
214:   PetscReal       yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
215:   PetscInt        i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
216:   PetscBool       leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
217:   const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
218:   const PetscInt  HighLowFlags[5] = { 1, -1, 0, -1, 1 };  /* if filterType is 1, only first 3 elements will be used */
219:   const PetscInt  numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));

222:   if (a>a1 || a1>b1 || b1>b) SETERRQ(PETSC_COMM_SELF,1,"Values in the frame vector should be non-decreasing");
223:   if (a1 == b1) SETERRQ(PETSC_COMM_SELF,1,"The range of wanted eigenvalues cannot be of size zero");
224:   filterInfo->filterType = 2;      /* mid-pass filter, for interior eigenvalues */
225:   if (b == b1) {
226:     if (a == a1) SETERRQ(PETSC_COMM_SELF,1,"A polynomial filter should not cover all eigenvalues");
227:     filterInfo->filterType = 1;    /* high-pass filter, for largest eigenvalues */
228:   } else if (a == a1) SETERRQ(PETSC_COMM_SELF,1,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");

230:   /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
231:   halfPlateau = 0.5*(b1-a1)*opts->initialPlateau;    /* half length of the "plateau" of the (dual) base filter */
232:   leftDelta = (b1-a1)*opts->initialShiftStep;        /* initial left shift */
233:   rightDelta = leftDelta;                            /* initial right shift */
234:   opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
235:   gridSize = (b-a) / (PetscReal)(opts->numGridPoints);

237:   if (filterInfo->filterType == 2) {  /* for interior eigenvalues */
238:     npoints = 6;
239:     intv[0] = a;
240:     intv[5] = b;
241:     /* intv[1], intv[2], intv[3], and intv[4] to be determined */
242:   } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
243:     npoints = 4;
244:     intv[0] = a;
245:     intv[3] = b;
246:     /* intv[1], and intv[2] to be determined */
247:   }
248:   z1 = a1 - leftDelta;
249:   z2 = b1 + rightDelta;
250:   filterInfo->filterOK = 0;  /* not yet found any OK filter */

252:   /* allocate matrices */
253:   PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter);

255:   /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
256:   intervals[0] = intv[0];
257:   intervals[3] = intv[3];
258:   intervals[5] = intv[5];
259:   intervals[1] = z1;
260:   if (filterInfo->filterType == 2) {  /* a mid-pass filter for interior eigenvalues */
261:     intervals[4] = z2;
262:     c = (a1+b1) / 2.0;
263:     intervals[2] = c - halfPlateau;
264:     intervals[3] = c + halfPlateau;
265:   } else {  /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
266:     intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
267:   }

269:   /* the main loop */
270:   for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
271:     if (z1 <= a) {  /* outer loop updates z1 and z2 */
272:       z1 = a;
273:       leftBoundaryMet = PETSC_TRUE;
274:     }
275:     if (filterInfo->filterType == 2) {  /* a <= z1 < (a1) */
276:       if (z2 >= b) {  /* a mid-pass filter for interior eigenvalues */
277:         z2 = b;
278:         rightBoundaryMet = PETSC_TRUE;
279:       }
280:       c = (z1+z2) / 2.0;
281:       /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
282:       /* [z1, c-h] and [c+h, z2] are transition interval */
283:       intv[1] = z1;
284:       intv[4] = z2;
285:       c1 = z1 + halfPlateau;
286:       intv[2] = z1;           /* i.e. c1 - halfPlateau */
287:       intv[3] = c1 + halfPlateau;
288:       FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
289:       FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
290:       /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
291:       c2 = z2 - halfPlateau;
292:       intv[2] = c2 - halfPlateau;
293:       intv[3] = z2;           /* i.e. c2 + halfPlateau */
294:       FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
295:       FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
296:       fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
297:       yLimitGap = PETSC_MAX_REAL;
298:       ii = opts->maxInnerIter;
299:       while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
300:         /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
301:         c = (c1+c2) / 2.0;
302:         intv[2] = c - halfPlateau;
303:         intv[3] = c + halfPlateau;
304:         FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
305:         FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
306:         fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
307:         if (fc*fc2 < 0.0) {
308:           c1 = c;
309:           /* fc1 = fc; */
310:         } else {
311:           c2 = c;
312:           fc2 = fc;
313:         }
314:         yLimitGap = PetscAbsReal(fc);
315:       }
316:     } else {  /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
317:       intv[1] = z1;
318:       intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
319:       FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg);
320:       FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg);
321:     }
322:     /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
323:        expanded in the `translated' Chebyshev basis */
324:     /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
325:     yLeftLimit  = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
326:     yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
327:     yLimit  = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
328:     ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
329:     x = a1;
330:     while ((x+=gridSize) < b1) {
331:       y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
332:       if (y < yLimit)  yLimit  = y;
333:       if (y > ySummit) ySummit = y;
334:     }
335:     /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
336:     stepLeft  = PETSC_FALSE;
337:     stepRight = PETSC_FALSE;
338:     if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
339:       /* very bad, step to see what will happen */
340:       stepLeft = PETSC_TRUE;
341:       if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
342:     } else if (filterInfo->filterType == 2) {
343:       if (yLeftLimit < yRightLimit) {
344:         if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
345:       } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
346:     }
347:     if (!stepLeft) {
348:       yLeftBottom = yLeftLimit;
349:       x = a1;
350:       while ((x-=gridSize) >= a) {
351:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
352:         if (y < yLeftBottom) yLeftBottom = y;
353:         else if (y > yLeftBottom) break;
354:       }
355:       yLeftSummit = yLeftBottom;
356:       while ((x-=gridSize) >= a) {
357:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
358:         if (y > yLeftSummit) {
359:           yLeftSummit = y;
360:           if (yLeftSummit > yLimit*opts->yRippleLimit) {
361:             stepLeft = PETSC_TRUE;
362:             break;
363:           }
364:         }
365:         if (y < yLeftBottom) yLeftBottom = y;
366:       }
367:     }
368:     if (filterInfo->filterType == 2 && !stepRight) {
369:       yRightBottom = yRightLimit;
370:       x = b1;
371:       while ((x+=gridSize) <= b) {
372:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
373:         if (y < yRightBottom) yRightBottom = y;
374:         else if (y > yRightBottom) break;
375:       }
376:       yRightSummit = yRightBottom;
377:       while ((x+=gridSize) <= b) {
378:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
379:         if (y > yRightSummit) {
380:           yRightSummit = y;
381:           if (yRightSummit > yLimit*opts->yRippleLimit) {
382:             stepRight = PETSC_TRUE;
383:             break;
384:           }
385:         }
386:         if (y < yRightBottom) yRightBottom = y;
387:       }
388:     }
389:     if (!stepLeft && !stepRight) {
390:       if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
391:       else bottom = yLeftBottom;
392:       qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
393:       if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
394:         /* found the first OK filter or a better filter */
395:         for (i=0;i<6;i++) intervals[i] = intv[i];
396:         filterInfo->filterOK           = 1;
397:         filterInfo->filterQualityIndex = qIndex;
398:         filterInfo->numIter            = numIter;
399:         filterInfo->yLimit             = yLimit;
400:         filterInfo->ySummit            = ySummit;
401:         filterInfo->numLeftSteps       = numLeftSteps;
402:         filterInfo->yLeftSummit        = yLeftSummit;
403:         filterInfo->yLeftBottom        = yLeftBottom;
404:         if (filterInfo->filterType == 2) {
405:           filterInfo->yLimitGap        = yLimitGap;
406:           filterInfo->numRightSteps    = numRightSteps;
407:           filterInfo->yRightSummit     = yRightSummit;
408:           filterInfo->yRightBottom     = yRightBottom;
409:         }
410:         numMoreLooked = 0;
411:       } else if (++numMoreLooked == numLookMore) {
412:         /* filter has been optimal */
413:         filterInfo->filterOK = 2;
414:         break;
415:       }
416:       /* try stepping further to see whether it can improve */
417:       stepLeft = PETSC_TRUE;
418:       if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
419:     }
420:     /* check whether we can really "step" */
421:     if (leftBoundaryMet) {
422:       if (filterInfo->filterType == 1 || rightBoundaryMet) break;  /* cannot step further, so break the loop */
423:       if (stepLeft) {
424:         /* cannot step left, so try stepping right */
425:         stepLeft  = PETSC_FALSE;
426:         stepRight = PETSC_TRUE;
427:       }
428:     }
429:     if (rightBoundaryMet && stepRight) {
430:       /* cannot step right, so try stepping left */
431:       stepRight = PETSC_FALSE;
432:       stepLeft  = PETSC_TRUE;
433:     }
434:     /* now "step" */
435:     if (stepLeft) {
436:       numLeftSteps++;
437:       if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
438:       z1 -= leftDelta;
439:     }
440:     if (stepRight) {
441:       numRightSteps++;
442:       rightDelta *= opts->shiftStepExpanRate;  /* expand the step for faster convergence */
443:       z2 += rightDelta;
444:     }
445:     if (filterInfo->filterType == 2) {
446:       /* shrink the "plateau" of the (dual) base filter */
447:       if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
448:       else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
449:     }
450:   }
451:   filterInfo->totalNumIter = numIter;
452:   PetscFree2(polyFilter,baseFilter);
453:   return(0);
454: }

456: /* ////////////////////////////////////////////////////////////////////////////
457:    //    Chebyshev Polynomials
458:    //////////////////////////////////////////////////////////////////////////// */

460: /*
461:    FILTLAN function ExpandNewtonPolynomialInChebyshevBasis

463:    translate the coefficients of a Newton polynomial to the coefficients
464:    in a basis of the `translated' (scale-and-shift) Chebyshev polynomials

466:    input:
467:    a Newton polynomial defined by vectors a and x:
468:        P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
469:    the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
470:        where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
471:    note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2

473:    output:
474:    a vector q containing the Chebyshev coefficients:
475:        P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
476: */
477: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)
478: {
479:   PetscInt  m,mm;
480:   PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;

483:   sa = a+n;    /* pointers for traversing a and x */
484:   sx = x+n-1;
485:   *q = *--sa;  /* set q[0] = a(n) */

487:   c = (aa+bb)/2.0;
488:   h = (bb-aa)/2.0;
489:   h2 = h/2.0;

491:   for (m=1;m<=n-1;m++) {  /* the main loop for translation */

493:     /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
494:     mm = m;
495:     sq = q;
496:     sq2 = q2;
497:     c2 = c-(*--sx);
498:     while (mm--) *(sq2++) = c2*(*sq++);
499:     *sq2 = 0.0;         /* q2[m] = 0.0 */
500:     *(q2+1) += h*(*q);  /* q2[1] = q2[1] + h*q[0] */

502:     /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
503:     mm = m-1;
504:     sq2 = q2;
505:     sq = q+1;
506:     while (mm--) *(sq2++) += h2*(*sq++);

508:     /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
509:     mm = m-1;
510:     sq2 = q2+2;
511:     sq = q+1;
512:     while (mm--) *(sq2++) += h2*(*sq++);

514:     /* compute q[0:m] = q2[0:m] */
515:     mm = m+1;
516:     sq2 = q2;
517:     sq = q;
518:     while (mm--) *sq++ = *sq2++;
519:     *q += (*--sa);      /* q[0] = q[0] + p[n-m-1] */
520:   }
521:   return(0);
522: }

524: /*
525:    FILTLAN function PolynomialEvaluationInChebyshevBasis

527:    evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
528:    the `translated' (i.e. scale-and-shift) Chebyshev polynomials

530:    input:
531:    c is a vector of Chebyshev coefficients which defines the polynomial
532:        P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
533:    where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
534:        c = (intv(j)+intv(j+1)) / 2,  h = (intv(j+1)-intv(j)) / 2
535:    note that T_i(z) is the Chebyshev polynomial of the first kind,
536:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

538:    output:
539:    the evaluated value of P(z) at z=z0
540: */
541: PETSC_STATIC_INLINE PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)
542: {
543:   PetscInt  ii,deg1;
544:   PetscReal y,zz,t0,t1,t2,*sc;

547:   deg1 = m;
548:   if (aa==-1.0 && bb==1.0) zz = z0;  /* treat it as a special case to reduce rounding errors */
549:   else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);

551:   /* compute y = P(z0), where we utilize the Chebyshev recursion */
552:   sc = pp+(idx-1)*m;   /* sc points to column idx of pp */
553:   y = *sc++;
554:   t1 = 1.0; t2 = zz;
555:   ii = deg1-1;
556:   while (ii--) {
557:     /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
558:        the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
559:     t0 = 2*zz*t1 - t2;
560:     /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
561:     t2 = t1;
562:     t1 = t0;
563:     y += (*sc++)*t0;
564:   }
565:   PetscFunctionReturn(y);
566: }

568: #define basisTranslated PETSC_TRUE
569: /*
570:    FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis

572:    evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
573:    in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval

575:    input:
576:    intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
577:    pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
578:    in a basis of the `translated' Chebyshev polynomials in each interval
579:    the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
580:        if basisTranslated == false, then
581:            P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
582:        where T_i(z) is the Chebyshev polynomial of the first kind,
583:            T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
584:        if basisTranslated == true, then
585:            P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
586:        where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
587:            c = (intv(j)+intv(j+1)) / 2,  h = (intv(j+1)-intv(j)) / 2

589:    output:
590:    the evaluated value of P(z) at z=z0

592:    note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
593:              if z0 flies over  the last  interval, then the polynomial in the last  interval will be used to evaluate P(z0)
594: */
595: PETSC_STATIC_INLINE PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)
596: {
597:   PetscReal *sintv,aa,bb,resul;
598:   PetscInt  idx;

601:   /* determine the interval which contains z0 */
602:   sintv = &intv[1];
603:   idx = 1;
604:   if (npoints>2 && z0 >= *sintv) {
605:     sintv++;
606:     while (++idx < npoints-1) {
607:       if (z0 < *sintv) break;
608:       sintv++;
609:     }
610:   }
611:   /* idx==1 if npoints<=2; otherwise idx satisfies:
612:          intv(idx) <= z0 < intv(idx+1),  if 2 <= idx <= npoints-2
613:          z0 < intv(idx+1),               if idx == 1
614:          intv(idx) <= z0,                if idx == npoints-1
615:      in addition, sintv points to &intv(idx+1) */
616:   if (basisTranslated) {
617:     /* the basis consists of `translated' Chebyshev polynomials */
618:     /* find the interval of concern, [aa,bb] */
619:     aa = *(sintv-1);
620:     bb = *sintv;
621:     resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
622:   } else {
623:     /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
624:     resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
625:   }
626:   PetscFunctionReturn(resul);
627: }

629: /*
630:    FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis

632:    compute the weighted inner product of two piecewise polynomials expanded
633:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval

635:    pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
636:    for z in the j-th interval, P(z) equals
637:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
638:    and Q(z) equals
639:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
640:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
641:        S_i((z-c)/h) = T_i(z),  c = (aa+bb)) / 2,  h = (bb-aa) / 2,
642:    with T_i(z) the Chebyshev polynomial of the first kind
643:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

645:    the (scaled) j-th interval inner product is defined by
646:        <P_j,Q_j> = (Pi/2)*( pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j) ),
647:    which comes from the property
648:        <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j

650:    the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
651:    which is the return value

653:    note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
654: */
655: PETSC_STATIC_INLINE PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)
656: {
657:   PetscInt  nintv,deg1,i,k;
658:   PetscReal *sp,*sq,ans=0.0,ans2;

661:   deg1 = PetscMin(prows,qrows);  /* number of effective coefficients, one more than the effective polynomial degree */
662:   if (!deg1) PetscFunctionReturn(0.0);
663:   nintv = PetscMin(pcols,qcols);  /* number of intervals */

665:   /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
666:      compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
667:   for (i=0;i<nintv;i++) {   /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
668:     sp = pp+i*ldp;
669:     sq = qq+i*ldq;
670:     ans2 = (*sp) * (*sq);  /* the first term pp(1,i)*qq(1,i) is being added twice */
671:     for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++);  /* add pp(k,i)*qq(k,i) */
672:     ans += ans2*intervalWeights[i];  /* compute ans += ans2*intervalWeights(i) */
673:   }
674:   PetscFunctionReturn(ans*PETSC_PI/2.0);
675: }

677: /*
678:    FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX

680:    compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
681:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval

683:    P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively

685:    For z in the j-th interval, P(z) equals
686:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
687:    and Q(z) equals
688:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
689:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
690:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
691:    with T_i(z) the Chebyshev polynomial of the first kind
692:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

694:    the returned matrix is qq which represents Q(z) = z*P(z)
695: */
696: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)
697: {
698:   PetscInt  i,j;
699:   PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;

702:   for (j=0;j<nintv;j++) {    /* consider interval between intv(j) and intv(j+1) */
703:     sp = pp+j*ldp;
704:     sq = qq+j*ldq;
705:     sp2 = sp;
706:     sq2 = sq+1;
707:     c = 0.5*(intv[j] + intv[j+1]);   /* compute c = (intv(j) + intv(j+1))/2 */
708:     h = 0.5*(intv[j+1] - intv[j]);   /* compute h = (intv(j+1) - intv(j))/2  and  h2 = h/2 */
709:     h2 = 0.5*h;
710:     i = deg1;
711:     while (i--) *sq++ = c*(*sp++);    /* compute q(1:deg1,j) = c*p(1:deg1,j) */
712:     *sq++ = 0.0;                      /* set q(deg1+1,j) = 0.0 */
713:     *(sq2++) += h*(*sp2++);           /* compute q(2,j) = q(2,j) + h*p(1,j) */
714:     i = deg1-1;
715:     while (i--) {       /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
716:       tmp = h2*(*sp2++);
717:       *(sq2-2) += tmp;
718:       *(sq2++) += tmp;
719:     }
720:   }
721:   return(0);
722: }

724: /* ////////////////////////////////////////////////////////////////////////////
725:    //    Conjugate Residual Method in the Polynomial Space
726:    //////////////////////////////////////////////////////////////////////////// */

728: /*
729:     B := alpha*A + beta*B

731:     A,B are nxk
732: */
733: PETSC_STATIC_INLINE PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)
734: {
736:   PetscInt       i,j;

739:   if (beta==(PetscReal)1.0) {
740:     for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
741:     PetscLogFlops(2.0*n*k);
742:   } else {
743:     for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
744:     PetscLogFlops(3.0*n*k);
745:   }
746:   return(0);
747: }

749: /*
750:    FILTLAN function FilteredConjugateResidualPolynomial

752:    ** Conjugate Residual Method in the Polynomial Space

754:    this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
755:    where P(z), the base filter, is the input piecewise polynomial, and
756:          Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
757:    niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
758:    both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
759:    and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively

761:    input:
762:    intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
763:    w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
764:        the interval weights define the inner product of two continuous functions and then
765:        the derived w-norm ||P(z)-Q(z)||_w
766:    pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
767:    to be specific, for z in [intv(j), intv(j+1)), P(z) equals
768:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
769:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
770:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
771:    with T_i(z) the Chebyshev polynomial of the first kind,
772:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

774:    output:
775:    the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
776:    and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
777:    this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
778:    to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
779:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)

781:    note:
782:    1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
783:       for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
784:    2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
785:       in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
786: */
787: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)
788: {
790:   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
791:   PetscReal      rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;

794:   nintv = m-1;
795:   ld = niter+2;  /* leading dimension */
796:   PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol);
797:   PetscMemzero(cpol,ld*nintv*sizeof(PetscReal));
798:   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
799:   sppol = 2;
800:   srpol = 2;
801:   scpol = 2;
802:   for (j=0;j<nintv;j++) {
803:     ppol[j*ld] = 1.0;
804:     rpol[j*ld] = 1.0;
805:     cpol[j*ld] = 1.0;
806:   }
807:   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
808:      rpol is the r-polynomial (corresponding to the residual vector r in CG)
809:      cpol is the "corrected" residual polynomial (result of this function) */
810:   sappol = 3;
811:   sarpol = 3;
812:   FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
813:   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
814:   rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
815:   for (i=0;i<niter;i++) {
816:     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
817:     alp0 = rho/den;
818:     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
819:     alp  = (rho-rho1)/den;
820:     srpol++;
821:     scpol++;
822:     Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
823:     Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
824:     if (i+1 == niter) break;
825:     sarpol++;
826:     FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
827:     rho0 = rho;
828:     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
829:     bet  = rho / rho0;
830:     sppol++;
831:     sappol++;
832:     Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
833:     Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);
834:   }
835:   PetscFree4(ppol,rpol,appol,arpol);
836:   return(0);
837: }

839: /*
840:    FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct

842:    this routine employs a conjugate-residual-type algorithm in polynomial space to compute
843:    x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
844:    P(z) is a given piecewise polynomial, called the base filter,
845:    s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
846:    and b and x0 are given vectors

848:    note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
849:    and presented as a matrix of Chebyshev coefficients pp

851:    input:
852:    A is a sparse matrix
853:    x0, b are vectors
854:    niter is the number of conjugate-residual iterations
855:    intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
856:    w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
857:        the interval weights define the inner product of two continuous functions and then
858:        the derived w-norm ||P(z)-Q(z)||_w
859:    pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
860:    to be specific, for z in [intv(j), intv(j+1)), P(z) equals
861:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
862:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
863:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
864:    with T_i(z) the Chebyshev polynomial of the first kind,
865:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
866:    tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
867:        than tol, then stop the conjugate-residual iteration

869:    output:
870:    the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
871:    subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
872:    in short, z*s(z) approximates 1-P(z)

874:    note:
875:    1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
876:       for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
877:    2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
878:       in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
879:    3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
880:       in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
881:       z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
882: */
883: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)
884: {
886:   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld;
887:   PetscReal      rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
888:   Vec            r=work[0],p=work[1],ap=work[2],w=work[3];
889:   PetscScalar    alpha;

892:   ld = niter+3;  /* leading dimension */
893:   PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol);
894:   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
895:   sppol = 2;
896:   srpol = 2;
897:   scpol = 2;
898:   for (j=0;j<nintv;j++) {
899:     ppol[j*ld] = 1.0;
900:     rpol[j*ld] = 1.0;
901:     cpol[j*ld] = 1.0;
902:   }
903:   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
904:      rpol is the r-polynomial (corresponding to the residual vector r in CG)
905:      cpol is the "corrected" residual polynomial */
906:   sappol = 3;
907:   sarpol = 3;
908:   FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
909:   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
910:   rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
911:   rho = rho00;

913:   /* corrected CR in vector space */
914:   /* we assume x0 is always 0 */
915:   VecSet(x,0.0);
916:   VecCopy(b,r);     /* initial residual r = b-A*x0 */
917:   VecCopy(r,p);     /* p = r */
918:   MatMult(A,p,ap);  /* ap = A*p */

920:   for (i=0;i<niter;i++) {
921:     /* iteration in the polynomial space */
922:     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
923:     alp0 = rho/den;
924:     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
925:     alp  = (rho-rho1)/den;
926:     srpol++;
927:     scpol++;
928:     Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
929:     Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
930:     sarpol++;
931:     FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
932:     rho0 = rho;
933:     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);

935:     /* update x in the vector space */
936:     alpha = alp;
937:     VecAXPY(x,alpha,p);   /* x += alp*p */
938:     if (rho < tol*rho00) break;

940:     /* finish the iteration in the polynomial space */
941:     bet = rho / rho0;
942:     sppol++;
943:     sappol++;
944:     Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
945:     Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);

947:     /* finish the iteration in the vector space */
948:     alpha = -alp0;
949:     VecAXPY(r,alpha,ap);    /* r -= alp0*ap */
950:     alpha = bet;
951:     VecAYPX(p,alpha,r);     /* p = r + bet*p */
952:     MatMult(A,r,w);         /* ap = A*r + bet*ap */
953:     VecAYPX(ap,alpha,w);
954:   }
955:   PetscFree5(ppol,rpol,cpol,appol,arpol);
956:   return(0);
957: }

959: /*
960:    Gateway to FILTLAN for evaluating y=p(A)*x
961: */
962: PetscErrorCode STFilter_FILTLAN_Apply(ST st,Vec x,Vec y)
963: {
965:   ST_FILTER      *ctx = (ST_FILTER*)st->data;
966:   PetscInt       npoints;

969:   npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
970:   FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(st->T[0],x,y,ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work);
971:   return(0);
972: }

974: /*
975:    FILTLAN function PolynomialFilterInterface::setFilter

977:    Creates the shifted (and scaled) matrix and the base filter P(z)
978: */
979: PetscErrorCode STFilter_FILTLAN_setFilter(ST st)
980: {
982:   ST_FILTER      *ctx = (ST_FILTER*)st->data;
983:   PetscInt       i,npoints;
984:   PetscReal      frame2[4];
985:   PetscScalar    alpha;
986:   const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };

989:   MatDestroy(&st->T[0]);
990:   if (ctx->frame[0] == ctx->frame[1]) {  /* low pass filter, convert it to high pass filter */
991:     /* T = frame[3]*eye(n) - A */
992:     MatDuplicate(st->A[0],MAT_COPY_VALUES,&st->T[0]);
993:     MatScale(st->T[0],-1.0);
994:     alpha = ctx->frame[3];
995:     MatShift(st->T[0],alpha);
996:     for (i=0;i<4;i++) frame2[i] = ctx->frame[3] - ctx->frame[3-i];
997:     FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
998:     /* translate the intervals back */
999:     for (i=0;i<4;i++) ctx->intervals2[i] = ctx->frame[3] - ctx->intervals[3-i];
1000:   } else {  /* it can be a mid-pass filter or a high-pass filter */
1001:       if (ctx->frame[0] == 0.0) {
1002:       PetscObjectReference((PetscObject)st->A[0]);
1003:       st->T[0] = st->A[0];
1004:       FILTLAN_GetIntervals(ctx->intervals,ctx->frame,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1005:       for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i];
1006:     } else {
1007:       /* T = A - frame[0]*eye(n) */
1008:       MatDuplicate(st->A[0],MAT_COPY_VALUES,&st->T[0]);
1009:       alpha = -ctx->frame[0];
1010:       MatShift(st->T[0],alpha);
1011:       for (i=0;i<4;i++) frame2[i] = ctx->frame[i] - ctx->frame[0];
1012:       FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1013:       /* translate the intervals back */
1014:       for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i] + ctx->frame[0];
1015:     }
1016:   }
1017:   npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1018:   PetscMalloc1((2*ctx->baseDegree+2)*(npoints-1),&ctx->baseFilter);
1019:   FILTLAN_HermiteBaseFilterInChebyshevBasis(ctx->baseFilter,ctx->intervals,npoints,HighLowFlags,ctx->baseDegree);
1020:   return(0);
1021: }