1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2017, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: This file is an adaptation of several subroutines from FILTLAN, the
12: Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.
14: More information at:
15: http://www-users.cs.umn.edu/~saad/software/filtlan
17: References:
19: [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
20: eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
21: */
23: #include <slepc/private/stimpl.h>
24: #include "./filter.h"
26: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
27: static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
28: static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);
30: /* ////////////////////////////////////////////////////////////////////////////
31: // Newton - Hermite Polynomial Interpolation
32: //////////////////////////////////////////////////////////////////////////// */
34: /*
35: FILTLAN function NewtonPolynomial
37: build P(z) by Newton's divided differences in the form
38: P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
39: such that P(x(i)) = y(i) for i=1,...,n, where
40: x,y are input vectors of length n, and a is the output vector of length n
41: if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
42: and the Hermite polynomial interpolation is applied
43: in general, if there are k x(i)'s with the same value x0, then
44: the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
45: */
46: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf) 47: {
49: PetscReal d,*sx=x,*sy=y;
50: PetscInt j,k;
53: PetscMemcpy(sf,sy,n*sizeof(PetscReal));
55: /* apply Newton's finite difference method */
56: sa[0] = sf[0];
57: for (j=1;j<n;j++) {
58: for (k=n-1;k>=j;k--) {
59: d = sx[k]-sx[k-j];
60: if (d == 0.0) sf[k] = 0.0; /* assume that the derivative is 0.0 and apply the Hermite interpolation */
61: else sf[k] = (sf[k]-sf[k-1]) / d;
62: }
63: sa[j] = sf[j];
64: }
65: return(0);
66: }
68: /*
69: FILTLAN function HermiteBaseFilterInChebyshevBasis
71: compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
72: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval
74: The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
75: P_j(z) a Hermite interpolating polynomial
77: input:
78: intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
79: HiLowFlags determines the shape of the base filter P(z)
80: Consider the j-th interval [intv(j), intv(j+1)]
81: HighLowFlag[j-1]==1, P(z)==1 for z in [intv(j), intv(j+1)]
82: ==0, P(z)==0 for z in [intv(j), intv(j+1)]
83: ==-1, [intv(j), intv(j+1)] is a transition interval;
84: P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
85: baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
86: to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
87: end points z=intv(j) for i=1,...,baseDeg
89: output:
90: P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
91: to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
92: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
93: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
94: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
95: with T_i(z) the Chebyshev polynomial of the first kind,
96: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
97: the return matrix is the matrix of Chebyshev coefficients pp just described
99: note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
100: let n be the length of intv; then there are n-1 intervals
101: therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
102: */
103: static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)104: {
106: PetscInt m,ii,jj;
107: PetscReal flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
108: const PetscInt *hilo = HighLowFlags;
111: m = 2*baseDeg+2;
112: jj = npoints-1; /* jj is initialized as the number of intervals */
113: PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work);
114: sbf = baseFilter;
116: while (jj--) { /* the main loop to compute the Chebyshev coefficients */
118: flag = (PetscReal)(*hilo++); /* get the flag of the current interval */
119: if (flag == -1.0) { /* flag == -1 means that the current interval is a transition polynomial */
121: flag2 = (PetscReal)(*hilo); /* get flag2, the flag of the next interval */
122: flag0 = 1.0-flag2; /* the flag of the previous interval is 1-flag2 */
124: /* two pointers for traversing x[] and y[] */
125: sx = px;
126: sy = py;
128: /* find the current interval [aa,bb] */
129: aa = *currentPoint++;
130: bb = *currentPoint;
132: /* now left-hand side */
133: ii = baseDeg+1;
134: while (ii--) {
135: *sy++ = flag0;
136: *sx++ = aa;
137: }
139: /* now right-hand side */
140: ii = baseDeg+1;
141: while (ii--) {
142: *sy++ = flag2;
143: *sx++ = bb;
144: }
146: /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
147: FILTLAN_NewtonPolynomial(m,px,py,pp,work);
149: /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
150: P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */
152: /* translate the Newton coefficients to the Chebyshev coefficients */
153: FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work);
154: /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */
156: /* copy the Chebyshev coefficients to baseFilter
157: OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
158: sq = qq;
159: ii = 2*baseDeg+2;
160: while (ii--) *sbf++ = *sq++;
162: } else {
164: /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
165: OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
166: *sbf++ = flag;
168: /* the other coefficients are all zero, since P(z) is a constant
169: OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
170: ii = 2*baseDeg+1;
171: while (ii--) *sbf++ = 0.0;
173: /* for the next point */
174: currentPoint++;
175: }
176: }
177: PetscFree5(px,py,pp,qq,work);
178: return(0);
179: }
181: /* ////////////////////////////////////////////////////////////////////////////
182: // Base Filter
183: //////////////////////////////////////////////////////////////////////////// */
185: /*
186: FILTLAN function GetIntervals
188: this routine determines the intervals (including the transition one(s)) by an iterative process
190: frame is a vector consisting of 4 ordered elements:
191: [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
192: [frame(2),frame(3)] is the interval in which the eigenvalues are sought
193: baseDeg is the left-and-right degree of the base filter for each interval
194: polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
195: intv is the output; the j-th interval is [intv(j),intv(j+1))
196: opts is a collection of interval options
198: the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
199: at each end point of intervals
201: the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
202: minimized with s(z) a polynomial of degree up to polyDeg
204: the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
205: y in [frame[0],frame[3]] but not in [frame[1],frame[2]]
207: the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
208: */
209: static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)210: {
211: PetscErrorCode ierr;
212: PetscReal intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
213: PetscReal yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
214: PetscReal yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
215: PetscInt i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
216: PetscBool leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
217: const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
218: const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; /* if filterType is 1, only first 3 elements will be used */
219: const PetscInt numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));
222: if (a>a1 || a1>b1 || b1>b) SETERRQ(PETSC_COMM_SELF,1,"Values in the frame vector should be non-decreasing");
223: if (a1 == b1) SETERRQ(PETSC_COMM_SELF,1,"The range of wanted eigenvalues cannot be of size zero");
224: filterInfo->filterType = 2; /* mid-pass filter, for interior eigenvalues */
225: if (b == b1) {
226: if (a == a1) SETERRQ(PETSC_COMM_SELF,1,"A polynomial filter should not cover all eigenvalues");
227: filterInfo->filterType = 1; /* high-pass filter, for largest eigenvalues */
228: } else if (a == a1) SETERRQ(PETSC_COMM_SELF,1,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");
230: /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
231: halfPlateau = 0.5*(b1-a1)*opts->initialPlateau; /* half length of the "plateau" of the (dual) base filter */
232: leftDelta = (b1-a1)*opts->initialShiftStep; /* initial left shift */
233: rightDelta = leftDelta; /* initial right shift */
234: opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
235: gridSize = (b-a) / (PetscReal)(opts->numGridPoints);
237: if (filterInfo->filterType == 2) { /* for interior eigenvalues */
238: npoints = 6;
239: intv[0] = a;
240: intv[5] = b;
241: /* intv[1], intv[2], intv[3], and intv[4] to be determined */
242: } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
243: npoints = 4;
244: intv[0] = a;
245: intv[3] = b;
246: /* intv[1], and intv[2] to be determined */
247: }
248: z1 = a1 - leftDelta;
249: z2 = b1 + rightDelta;
250: filterInfo->filterOK = 0; /* not yet found any OK filter */
252: /* allocate matrices */
253: PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter);
255: /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
256: intervals[0] = intv[0];
257: intervals[3] = intv[3];
258: intervals[5] = intv[5];
259: intervals[1] = z1;
260: if (filterInfo->filterType == 2) { /* a mid-pass filter for interior eigenvalues */
261: intervals[4] = z2;
262: c = (a1+b1) / 2.0;
263: intervals[2] = c - halfPlateau;
264: intervals[3] = c + halfPlateau;
265: } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
266: intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
267: }
269: /* the main loop */
270: for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
271: if (z1 <= a) { /* outer loop updates z1 and z2 */
272: z1 = a;
273: leftBoundaryMet = PETSC_TRUE;
274: }
275: if (filterInfo->filterType == 2) { /* a <= z1 < (a1) */
276: if (z2 >= b) { /* a mid-pass filter for interior eigenvalues */
277: z2 = b;
278: rightBoundaryMet = PETSC_TRUE;
279: }
280: c = (z1+z2) / 2.0;
281: /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
282: /* [z1, c-h] and [c+h, z2] are transition interval */
283: intv[1] = z1;
284: intv[4] = z2;
285: c1 = z1 + halfPlateau;
286: intv[2] = z1; /* i.e. c1 - halfPlateau */
287: intv[3] = c1 + halfPlateau;
288: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
289: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
290: /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
291: c2 = z2 - halfPlateau;
292: intv[2] = c2 - halfPlateau;
293: intv[3] = z2; /* i.e. c2 + halfPlateau */
294: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
295: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
296: fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
297: yLimitGap = PETSC_MAX_REAL;
298: ii = opts->maxInnerIter;
299: while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
300: /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
301: c = (c1+c2) / 2.0;
302: intv[2] = c - halfPlateau;
303: intv[3] = c + halfPlateau;
304: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
305: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
306: fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
307: if (fc*fc2 < 0.0) {
308: c1 = c;
309: /* fc1 = fc; */
310: } else {
311: c2 = c;
312: fc2 = fc;
313: }
314: yLimitGap = PetscAbsReal(fc);
315: }
316: } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
317: intv[1] = z1;
318: intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
319: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg);
320: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg);
321: }
322: /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
323: expanded in the `translated' Chebyshev basis */
324: /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
325: yLeftLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
326: yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
327: yLimit = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
328: ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
329: x = a1;
330: while ((x+=gridSize) < b1) {
331: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
332: if (y < yLimit) yLimit = y;
333: if (y > ySummit) ySummit = y;
334: }
335: /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
336: stepLeft = PETSC_FALSE;
337: stepRight = PETSC_FALSE;
338: if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
339: /* very bad, step to see what will happen */
340: stepLeft = PETSC_TRUE;
341: if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
342: } else if (filterInfo->filterType == 2) {
343: if (yLeftLimit < yRightLimit) {
344: if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
345: } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
346: }
347: if (!stepLeft) {
348: yLeftBottom = yLeftLimit;
349: x = a1;
350: while ((x-=gridSize) >= a) {
351: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
352: if (y < yLeftBottom) yLeftBottom = y;
353: else if (y > yLeftBottom) break;
354: }
355: yLeftSummit = yLeftBottom;
356: while ((x-=gridSize) >= a) {
357: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
358: if (y > yLeftSummit) {
359: yLeftSummit = y;
360: if (yLeftSummit > yLimit*opts->yRippleLimit) {
361: stepLeft = PETSC_TRUE;
362: break;
363: }
364: }
365: if (y < yLeftBottom) yLeftBottom = y;
366: }
367: }
368: if (filterInfo->filterType == 2 && !stepRight) {
369: yRightBottom = yRightLimit;
370: x = b1;
371: while ((x+=gridSize) <= b) {
372: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
373: if (y < yRightBottom) yRightBottom = y;
374: else if (y > yRightBottom) break;
375: }
376: yRightSummit = yRightBottom;
377: while ((x+=gridSize) <= b) {
378: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
379: if (y > yRightSummit) {
380: yRightSummit = y;
381: if (yRightSummit > yLimit*opts->yRippleLimit) {
382: stepRight = PETSC_TRUE;
383: break;
384: }
385: }
386: if (y < yRightBottom) yRightBottom = y;
387: }
388: }
389: if (!stepLeft && !stepRight) {
390: if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
391: else bottom = yLeftBottom;
392: qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
393: if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
394: /* found the first OK filter or a better filter */
395: for (i=0;i<6;i++) intervals[i] = intv[i];
396: filterInfo->filterOK = 1;
397: filterInfo->filterQualityIndex = qIndex;
398: filterInfo->numIter = numIter;
399: filterInfo->yLimit = yLimit;
400: filterInfo->ySummit = ySummit;
401: filterInfo->numLeftSteps = numLeftSteps;
402: filterInfo->yLeftSummit = yLeftSummit;
403: filterInfo->yLeftBottom = yLeftBottom;
404: if (filterInfo->filterType == 2) {
405: filterInfo->yLimitGap = yLimitGap;
406: filterInfo->numRightSteps = numRightSteps;
407: filterInfo->yRightSummit = yRightSummit;
408: filterInfo->yRightBottom = yRightBottom;
409: }
410: numMoreLooked = 0;
411: } else if (++numMoreLooked == numLookMore) {
412: /* filter has been optimal */
413: filterInfo->filterOK = 2;
414: break;
415: }
416: /* try stepping further to see whether it can improve */
417: stepLeft = PETSC_TRUE;
418: if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
419: }
420: /* check whether we can really "step" */
421: if (leftBoundaryMet) {
422: if (filterInfo->filterType == 1 || rightBoundaryMet) break; /* cannot step further, so break the loop */
423: if (stepLeft) {
424: /* cannot step left, so try stepping right */
425: stepLeft = PETSC_FALSE;
426: stepRight = PETSC_TRUE;
427: }
428: }
429: if (rightBoundaryMet && stepRight) {
430: /* cannot step right, so try stepping left */
431: stepRight = PETSC_FALSE;
432: stepLeft = PETSC_TRUE;
433: }
434: /* now "step" */
435: if (stepLeft) {
436: numLeftSteps++;
437: if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
438: z1 -= leftDelta;
439: }
440: if (stepRight) {
441: numRightSteps++;
442: rightDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
443: z2 += rightDelta;
444: }
445: if (filterInfo->filterType == 2) {
446: /* shrink the "plateau" of the (dual) base filter */
447: if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
448: else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
449: }
450: }
451: filterInfo->totalNumIter = numIter;
452: PetscFree2(polyFilter,baseFilter);
453: return(0);
454: }
456: /* ////////////////////////////////////////////////////////////////////////////
457: // Chebyshev Polynomials
458: //////////////////////////////////////////////////////////////////////////// */
460: /*
461: FILTLAN function ExpandNewtonPolynomialInChebyshevBasis
463: translate the coefficients of a Newton polynomial to the coefficients
464: in a basis of the `translated' (scale-and-shift) Chebyshev polynomials
466: input:
467: a Newton polynomial defined by vectors a and x:
468: P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
469: the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
470: where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
471: note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2
473: output:
474: a vector q containing the Chebyshev coefficients:
475: P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
476: */
477: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)478: {
479: PetscInt m,mm;
480: PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;
483: sa = a+n; /* pointers for traversing a and x */
484: sx = x+n-1;
485: *q = *--sa; /* set q[0] = a(n) */
487: c = (aa+bb)/2.0;
488: h = (bb-aa)/2.0;
489: h2 = h/2.0;
491: for (m=1;m<=n-1;m++) { /* the main loop for translation */
493: /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
494: mm = m;
495: sq = q;
496: sq2 = q2;
497: c2 = c-(*--sx);
498: while (mm--) *(sq2++) = c2*(*sq++);
499: *sq2 = 0.0; /* q2[m] = 0.0 */
500: *(q2+1) += h*(*q); /* q2[1] = q2[1] + h*q[0] */
502: /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
503: mm = m-1;
504: sq2 = q2;
505: sq = q+1;
506: while (mm--) *(sq2++) += h2*(*sq++);
508: /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
509: mm = m-1;
510: sq2 = q2+2;
511: sq = q+1;
512: while (mm--) *(sq2++) += h2*(*sq++);
514: /* compute q[0:m] = q2[0:m] */
515: mm = m+1;
516: sq2 = q2;
517: sq = q;
518: while (mm--) *sq++ = *sq2++;
519: *q += (*--sa); /* q[0] = q[0] + p[n-m-1] */
520: }
521: return(0);
522: }
524: /*
525: FILTLAN function PolynomialEvaluationInChebyshevBasis
527: evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
528: the `translated' (i.e. scale-and-shift) Chebyshev polynomials
530: input:
531: c is a vector of Chebyshev coefficients which defines the polynomial
532: P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
533: where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
534: c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
535: note that T_i(z) is the Chebyshev polynomial of the first kind,
536: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
538: output:
539: the evaluated value of P(z) at z=z0
540: */
541: PETSC_STATIC_INLINE PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)542: {
543: PetscInt ii,deg1;
544: PetscReal y,zz,t0,t1,t2,*sc;
547: deg1 = m;
548: if (aa==-1.0 && bb==1.0) zz = z0; /* treat it as a special case to reduce rounding errors */
549: else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);
551: /* compute y = P(z0), where we utilize the Chebyshev recursion */
552: sc = pp+(idx-1)*m; /* sc points to column idx of pp */
553: y = *sc++;
554: t1 = 1.0; t2 = zz;
555: ii = deg1-1;
556: while (ii--) {
557: /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
558: the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
559: t0 = 2*zz*t1 - t2;
560: /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
561: t2 = t1;
562: t1 = t0;
563: y += (*sc++)*t0;
564: }
565: PetscFunctionReturn(y);
566: }
568: #define basisTranslated PETSC_TRUE569: /*
570: FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis
572: evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
573: in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval
575: input:
576: intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
577: pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
578: in a basis of the `translated' Chebyshev polynomials in each interval
579: the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
580: if basisTranslated == false, then
581: P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
582: where T_i(z) is the Chebyshev polynomial of the first kind,
583: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
584: if basisTranslated == true, then
585: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
586: where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
587: c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
589: output:
590: the evaluated value of P(z) at z=z0
592: note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
593: if z0 flies over the last interval, then the polynomial in the last interval will be used to evaluate P(z0)
594: */
595: PETSC_STATIC_INLINE PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)596: {
597: PetscReal *sintv,aa,bb,resul;
598: PetscInt idx;
601: /* determine the interval which contains z0 */
602: sintv = &intv[1];
603: idx = 1;
604: if (npoints>2 && z0 >= *sintv) {
605: sintv++;
606: while (++idx < npoints-1) {
607: if (z0 < *sintv) break;
608: sintv++;
609: }
610: }
611: /* idx==1 if npoints<=2; otherwise idx satisfies:
612: intv(idx) <= z0 < intv(idx+1), if 2 <= idx <= npoints-2
613: z0 < intv(idx+1), if idx == 1
614: intv(idx) <= z0, if idx == npoints-1
615: in addition, sintv points to &intv(idx+1) */
616: if (basisTranslated) {
617: /* the basis consists of `translated' Chebyshev polynomials */
618: /* find the interval of concern, [aa,bb] */
619: aa = *(sintv-1);
620: bb = *sintv;
621: resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
622: } else {
623: /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
624: resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
625: }
626: PetscFunctionReturn(resul);
627: }
629: /*
630: FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis
632: compute the weighted inner product of two piecewise polynomials expanded
633: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
635: pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
636: for z in the j-th interval, P(z) equals
637: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
638: and Q(z) equals
639: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
640: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
641: S_i((z-c)/h) = T_i(z), c = (aa+bb)) / 2, h = (bb-aa) / 2,
642: with T_i(z) the Chebyshev polynomial of the first kind
643: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
645: the (scaled) j-th interval inner product is defined by
646: <P_j,Q_j> = (Pi/2)*( pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j) ),
647: which comes from the property
648: <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j
650: the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
651: which is the return value
653: note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
654: */
655: PETSC_STATIC_INLINE PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)656: {
657: PetscInt nintv,deg1,i,k;
658: PetscReal *sp,*sq,ans=0.0,ans2;
661: deg1 = PetscMin(prows,qrows); /* number of effective coefficients, one more than the effective polynomial degree */
662: if (!deg1) PetscFunctionReturn(0.0);
663: nintv = PetscMin(pcols,qcols); /* number of intervals */
665: /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
666: compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
667: for (i=0;i<nintv;i++) { /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
668: sp = pp+i*ldp;
669: sq = qq+i*ldq;
670: ans2 = (*sp) * (*sq); /* the first term pp(1,i)*qq(1,i) is being added twice */
671: for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++); /* add pp(k,i)*qq(k,i) */
672: ans += ans2*intervalWeights[i]; /* compute ans += ans2*intervalWeights(i) */
673: }
674: PetscFunctionReturn(ans*PETSC_PI/2.0);
675: }
677: /*
678: FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX
680: compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
681: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
683: P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively
685: For z in the j-th interval, P(z) equals
686: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
687: and Q(z) equals
688: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
689: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
690: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
691: with T_i(z) the Chebyshev polynomial of the first kind
692: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
694: the returned matrix is qq which represents Q(z) = z*P(z)
695: */
696: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)697: {
698: PetscInt i,j;
699: PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;
702: for (j=0;j<nintv;j++) { /* consider interval between intv(j) and intv(j+1) */
703: sp = pp+j*ldp;
704: sq = qq+j*ldq;
705: sp2 = sp;
706: sq2 = sq+1;
707: c = 0.5*(intv[j] + intv[j+1]); /* compute c = (intv(j) + intv(j+1))/2 */
708: h = 0.5*(intv[j+1] - intv[j]); /* compute h = (intv(j+1) - intv(j))/2 and h2 = h/2 */
709: h2 = 0.5*h;
710: i = deg1;
711: while (i--) *sq++ = c*(*sp++); /* compute q(1:deg1,j) = c*p(1:deg1,j) */
712: *sq++ = 0.0; /* set q(deg1+1,j) = 0.0 */
713: *(sq2++) += h*(*sp2++); /* compute q(2,j) = q(2,j) + h*p(1,j) */
714: i = deg1-1;
715: while (i--) { /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
716: tmp = h2*(*sp2++);
717: *(sq2-2) += tmp;
718: *(sq2++) += tmp;
719: }
720: }
721: return(0);
722: }
724: /* ////////////////////////////////////////////////////////////////////////////
725: // Conjugate Residual Method in the Polynomial Space
726: //////////////////////////////////////////////////////////////////////////// */
728: /*
729: B := alpha*A + beta*B
731: A,B are nxk
732: */
733: PETSC_STATIC_INLINE PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)734: {
736: PetscInt i,j;
739: if (beta==(PetscReal)1.0) {
740: for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
741: PetscLogFlops(2.0*n*k);
742: } else {
743: for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
744: PetscLogFlops(3.0*n*k);
745: }
746: return(0);
747: }
749: /*
750: FILTLAN function FilteredConjugateResidualPolynomial
752: ** Conjugate Residual Method in the Polynomial Space
754: this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
755: where P(z), the base filter, is the input piecewise polynomial, and
756: Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
757: niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
758: both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
759: and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively
761: input:
762: intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
763: w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
764: the interval weights define the inner product of two continuous functions and then
765: the derived w-norm ||P(z)-Q(z)||_w
766: pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
767: to be specific, for z in [intv(j), intv(j+1)), P(z) equals
768: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
769: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
770: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
771: with T_i(z) the Chebyshev polynomial of the first kind,
772: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
774: output:
775: the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
776: and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
777: this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
778: to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
779: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)
781: note:
782: 1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
783: for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
784: 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
785: in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
786: */
787: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)788: {
790: PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
791: PetscReal rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;
794: nintv = m-1;
795: ld = niter+2; /* leading dimension */
796: PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol);
797: PetscMemzero(cpol,ld*nintv*sizeof(PetscReal));
798: /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
799: sppol = 2;
800: srpol = 2;
801: scpol = 2;
802: for (j=0;j<nintv;j++) {
803: ppol[j*ld] = 1.0;
804: rpol[j*ld] = 1.0;
805: cpol[j*ld] = 1.0;
806: }
807: /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
808: rpol is the r-polynomial (corresponding to the residual vector r in CG)
809: cpol is the "corrected" residual polynomial (result of this function) */
810: sappol = 3;
811: sarpol = 3;
812: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
813: for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
814: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
815: for (i=0;i<niter;i++) {
816: den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
817: alp0 = rho/den;
818: rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
819: alp = (rho-rho1)/den;
820: srpol++;
821: scpol++;
822: Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
823: Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
824: if (i+1 == niter) break;
825: sarpol++;
826: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
827: rho0 = rho;
828: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
829: bet = rho / rho0;
830: sppol++;
831: sappol++;
832: Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
833: Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);
834: }
835: PetscFree4(ppol,rpol,appol,arpol);
836: return(0);
837: }
839: /*
840: FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct
842: this routine employs a conjugate-residual-type algorithm in polynomial space to compute
843: x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
844: P(z) is a given piecewise polynomial, called the base filter,
845: s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
846: and b and x0 are given vectors
848: note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
849: and presented as a matrix of Chebyshev coefficients pp
851: input:
852: A is a sparse matrix
853: x0, b are vectors
854: niter is the number of conjugate-residual iterations
855: intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
856: w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
857: the interval weights define the inner product of two continuous functions and then
858: the derived w-norm ||P(z)-Q(z)||_w
859: pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
860: to be specific, for z in [intv(j), intv(j+1)), P(z) equals
861: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
862: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
863: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
864: with T_i(z) the Chebyshev polynomial of the first kind,
865: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
866: tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
867: than tol, then stop the conjugate-residual iteration
869: output:
870: the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
871: subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
872: in short, z*s(z) approximates 1-P(z)
874: note:
875: 1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
876: for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
877: 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
878: in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
879: 3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
880: in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
881: z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
882: */
883: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)884: {
886: PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
887: PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
888: Vec r=work[0],p=work[1],ap=work[2],w=work[3];
889: PetscScalar alpha;
892: ld = niter+3; /* leading dimension */
893: PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol);
894: /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
895: sppol = 2;
896: srpol = 2;
897: scpol = 2;
898: for (j=0;j<nintv;j++) {
899: ppol[j*ld] = 1.0;
900: rpol[j*ld] = 1.0;
901: cpol[j*ld] = 1.0;
902: }
903: /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
904: rpol is the r-polynomial (corresponding to the residual vector r in CG)
905: cpol is the "corrected" residual polynomial */
906: sappol = 3;
907: sarpol = 3;
908: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
909: for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
910: rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
911: rho = rho00;
913: /* corrected CR in vector space */
914: /* we assume x0 is always 0 */
915: VecSet(x,0.0);
916: VecCopy(b,r); /* initial residual r = b-A*x0 */
917: VecCopy(r,p); /* p = r */
918: MatMult(A,p,ap); /* ap = A*p */
920: for (i=0;i<niter;i++) {
921: /* iteration in the polynomial space */
922: den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
923: alp0 = rho/den;
924: rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
925: alp = (rho-rho1)/den;
926: srpol++;
927: scpol++;
928: Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
929: Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
930: sarpol++;
931: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
932: rho0 = rho;
933: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
935: /* update x in the vector space */
936: alpha = alp;
937: VecAXPY(x,alpha,p); /* x += alp*p */
938: if (rho < tol*rho00) break;
940: /* finish the iteration in the polynomial space */
941: bet = rho / rho0;
942: sppol++;
943: sappol++;
944: Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
945: Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);
947: /* finish the iteration in the vector space */
948: alpha = -alp0;
949: VecAXPY(r,alpha,ap); /* r -= alp0*ap */
950: alpha = bet;
951: VecAYPX(p,alpha,r); /* p = r + bet*p */
952: MatMult(A,r,w); /* ap = A*r + bet*ap */
953: VecAYPX(ap,alpha,w);
954: }
955: PetscFree5(ppol,rpol,cpol,appol,arpol);
956: return(0);
957: }
959: /*
960: Gateway to FILTLAN for evaluating y=p(A)*x
961: */
962: PetscErrorCode STFilter_FILTLAN_Apply(ST st,Vec x,Vec y)963: {
965: ST_FILTER *ctx = (ST_FILTER*)st->data;
966: PetscInt npoints;
969: npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
970: FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(st->T[0],x,y,ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work);
971: return(0);
972: }
974: /*
975: FILTLAN function PolynomialFilterInterface::setFilter
977: Creates the shifted (and scaled) matrix and the base filter P(z)
978: */
979: PetscErrorCode STFilter_FILTLAN_setFilter(ST st)980: {
982: ST_FILTER *ctx = (ST_FILTER*)st->data;
983: PetscInt i,npoints;
984: PetscReal frame2[4];
985: PetscScalar alpha;
986: const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };
989: MatDestroy(&st->T[0]);
990: if (ctx->frame[0] == ctx->frame[1]) { /* low pass filter, convert it to high pass filter */
991: /* T = frame[3]*eye(n) - A */
992: MatDuplicate(st->A[0],MAT_COPY_VALUES,&st->T[0]);
993: MatScale(st->T[0],-1.0);
994: alpha = ctx->frame[3];
995: MatShift(st->T[0],alpha);
996: for (i=0;i<4;i++) frame2[i] = ctx->frame[3] - ctx->frame[3-i];
997: FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
998: /* translate the intervals back */
999: for (i=0;i<4;i++) ctx->intervals2[i] = ctx->frame[3] - ctx->intervals[3-i];
1000: } else { /* it can be a mid-pass filter or a high-pass filter */
1001: if (ctx->frame[0] == 0.0) {
1002: PetscObjectReference((PetscObject)st->A[0]);
1003: st->T[0] = st->A[0];
1004: FILTLAN_GetIntervals(ctx->intervals,ctx->frame,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1005: for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i];
1006: } else {
1007: /* T = A - frame[0]*eye(n) */
1008: MatDuplicate(st->A[0],MAT_COPY_VALUES,&st->T[0]);
1009: alpha = -ctx->frame[0];
1010: MatShift(st->T[0],alpha);
1011: for (i=0;i<4;i++) frame2[i] = ctx->frame[i] - ctx->frame[0];
1012: FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1013: /* translate the intervals back */
1014: for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i] + ctx->frame[0];
1015: }
1016: }
1017: npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1018: PetscMalloc1((2*ctx->baseDegree+2)*(npoints-1),&ctx->baseFilter);
1019: FILTLAN_HermiteBaseFilterInChebyshevBasis(ctx->baseFilter,ctx->intervals,npoints,HighLowFlags,ctx->baseDegree);
1020: return(0);
1021: }