Actual source code: dsnhep.c

slepc-3.12.2 2020-01-13
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
 15: {

 19:   DSAllocateMat_Private(ds,DS_MAT_A);
 20:   DSAllocateMat_Private(ds,DS_MAT_Q);
 21:   PetscFree(ds->perm);
 22:   PetscMalloc1(ld,&ds->perm);
 23:   PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
 24:   return(0);
 25: }

 27: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
 28: {
 29:   PetscErrorCode    ierr;
 30:   PetscViewerFormat format;

 33:   PetscViewerGetFormat(viewer,&format);
 34:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return(0);
 35:   DSViewMat(ds,viewer,DS_MAT_A);
 36:   if (ds->state>DS_STATE_INTERMEDIATE) { DSViewMat(ds,viewer,DS_MAT_Q); }
 37:   if (ds->mat[DS_MAT_X]) { DSViewMat(ds,viewer,DS_MAT_X); }
 38:   if (ds->mat[DS_MAT_Y]) { DSViewMat(ds,viewer,DS_MAT_Y); }
 39:   return(0);
 40: }

 42: static PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
 43: {
 44: #if defined(PETSC_MISSING_LAPACK_GESVD)
 46:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
 47: #else
 49:   PetscInt       i,j;
 50:   PetscBLASInt   info,ld,n,n1,lwork,inc=1;
 51:   PetscScalar    sdummy,done=1.0,zero=0.0;
 52:   PetscReal      *sigma;
 53:   PetscBool      iscomplex = PETSC_FALSE;
 54:   PetscScalar    *A = ds->mat[DS_MAT_A];
 55:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
 56:   PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
 57:   PetscScalar    *W;

 60:   if (left) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for left vectors");
 61:   PetscBLASIntCast(ds->n,&n);
 62:   PetscBLASIntCast(ds->ld,&ld);
 63:   n1 = n+1;
 64:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
 65:   if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
 66:   DSAllocateWork_Private(ds,5*ld,6*ld,0);
 67:   DSAllocateMat_Private(ds,DS_MAT_W);
 68:   W = ds->mat[DS_MAT_W];
 69:   lwork = 5*ld;
 70:   sigma = ds->rwork+5*ld;

 72:   /* build A-w*I in W */
 73:   for (j=0;j<n;j++)
 74:     for (i=0;i<=n;i++)
 75:       W[i+j*ld] = A[i+j*ld];
 76:   for (i=0;i<n;i++)
 77:     W[i+i*ld] -= A[(*k)+(*k)*ld];

 79:   /* compute SVD of W */
 80: #if !defined(PETSC_USE_COMPLEX)
 81:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
 82: #else
 83:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
 84: #endif
 85:   SlepcCheckLapackInfo("gesvd",info);

 87:   /* the smallest singular value is the new error estimate */
 88:   if (rnorm) *rnorm = sigma[n-1];

 90:   /* update vector with right singular vector associated to smallest singular value,
 91:      accumulating the transformation matrix Q */
 92:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
 93:   return(0);
 94: #endif
 95: }

 97: static PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
 98: {
100:   PetscInt       i;

103:   for (i=0;i<ds->n;i++) {
104:     DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
105:   }
106:   return(0);
107: }

109: static PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
110: {
111: #if defined(SLEPC_MISSING_LAPACK_TREVC)
113:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
114: #else
116:   PetscInt       i;
117:   PetscBLASInt   mm=1,mout,info,ld,n,*select,inc = 1;
118:   PetscScalar    tmp,done=1.0,zero=0.0;
119:   PetscReal      norm;
120:   PetscBool      iscomplex = PETSC_FALSE;
121:   PetscScalar    *A = ds->mat[DS_MAT_A];
122:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
123:   PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
124:   PetscScalar    *Y;

127:   PetscBLASIntCast(ds->n,&n);
128:   PetscBLASIntCast(ds->ld,&ld);
129:   DSAllocateWork_Private(ds,0,0,ld);
130:   select = ds->iwork;
131:   for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;

133:   /* compute k-th eigenvector Y of A */
134:   Y = X+(*k)*ld;
135:   select[*k] = (PetscBLASInt)PETSC_TRUE;
136: #if !defined(PETSC_USE_COMPLEX)
137:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
138:   mm = iscomplex? 2: 1;
139:   if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
140:   DSAllocateWork_Private(ds,3*ld,0,0);
141:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
142: #else
143:   DSAllocateWork_Private(ds,2*ld,ld,0);
144:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
145: #endif
146:   SlepcCheckLapackInfo("trevc",info);
147:   if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");

149:   /* accumulate and normalize eigenvectors */
150:   if (ds->state>=DS_STATE_CONDENSED) {
151:     PetscArraycpy(ds->work,Y,mout*ld);
152:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
153: #if !defined(PETSC_USE_COMPLEX)
154:     if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
155: #endif
156:     norm = BLASnrm2_(&n,Y,&inc);
157: #if !defined(PETSC_USE_COMPLEX)
158:     if (iscomplex) {
159:       tmp = BLASnrm2_(&n,Y+ld,&inc);
160:       norm = SlepcAbsEigenvalue(norm,tmp);
161:     }
162: #endif
163:     tmp = 1.0 / norm;
164:     PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
165: #if !defined(PETSC_USE_COMPLEX)
166:     if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
167: #endif
168:   }

170:   /* set output arguments */
171:   if (iscomplex) (*k)++;
172:   if (rnorm) {
173:     if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
174:     else *rnorm = PetscAbsScalar(Y[n-1]);
175:   }
176:   return(0);
177: #endif
178: }

180: static PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
181: {
182: #if defined(SLEPC_MISSING_LAPACK_TREVC)
184:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
185: #else
187:   PetscInt       i;
188:   PetscBLASInt   n,ld,mout,info,inc = 1;
189:   PetscBool      iscomplex;
190:   PetscScalar    *X,*Y,*Z,*A = ds->mat[DS_MAT_A],tmp;
191:   PetscReal      norm;
192:   const char     *side,*back;

195:   PetscBLASIntCast(ds->n,&n);
196:   PetscBLASIntCast(ds->ld,&ld);
197:   if (left) {
198:     X = NULL;
199:     Y = ds->mat[DS_MAT_Y];
200:     side = "L";
201:   } else {
202:     X = ds->mat[DS_MAT_X];
203:     Y = NULL;
204:     side = "R";
205:   }
206:   Z = left? Y: X;
207:   if (ds->state>=DS_STATE_CONDENSED) {
208:     /* DSSolve() has been called, backtransform with matrix Q */
209:     back = "B";
210:     PetscArraycpy(Z,ds->mat[DS_MAT_Q],ld*ld);
211:   } else back = "A";
212: #if !defined(PETSC_USE_COMPLEX)
213:   DSAllocateWork_Private(ds,3*ld,0,0);
214:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
215: #else
216:   DSAllocateWork_Private(ds,2*ld,ld,0);
217:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
218: #endif
219:   SlepcCheckLapackInfo("trevc",info);

221:   /* normalize eigenvectors */
222:   for (i=0;i<n;i++) {
223:     iscomplex = (i<n-1 && A[i+1+i*ld]!=0.0)? PETSC_TRUE: PETSC_FALSE;
224:     norm = BLASnrm2_(&n,Z+i*ld,&inc);
225: #if !defined(PETSC_USE_COMPLEX)
226:     if (iscomplex) {
227:       tmp = BLASnrm2_(&n,Z+(i+1)*ld,&inc);
228:       norm = SlepcAbsEigenvalue(norm,tmp);
229:     }
230: #endif
231:     tmp = 1.0 / norm;
232:     PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+i*ld,&inc));
233: #if !defined(PETSC_USE_COMPLEX)
234:     if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+(i+1)*ld,&inc));
235: #endif
236:     if (iscomplex) i++;
237:   }
238:   return(0);
239: #endif
240: }

242: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
243: {

247:   switch (mat) {
248:     case DS_MAT_X:
249:       if (ds->refined) {
250:         if (!ds->extrarow) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Refined vectors require activating the extra row");
251:         if (j) {
252:           DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
253:         } else {
254:           DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
255:         }
256:       } else {
257:         if (j) {
258:           DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
259:         } else {
260:           DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
261:         }
262:       }
263:       break;
264:     case DS_MAT_Y:
265:       if (ds->refined) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
266:       if (j) {
267:         DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
268:       } else {
269:         DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
270:       }
271:       break;
272:     case DS_MAT_U:
273:     case DS_MAT_VT:
274:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
275:       break;
276:     default:
277:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
278:   }
279:   if (ds->state < DS_STATE_CONDENSED) {
280:     DSSetState(ds,DS_STATE_CONDENSED);
281:   }
282:   return(0);
283: }

285: static PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
286: {
287: #if defined(PETSC_MISSING_LAPACK_TRSEN)
289:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
290: #else
292:   PetscInt       i;
293:   PetscBLASInt   info,n,ld,mout,lwork,*selection;
294:   PetscScalar    *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
295: #if !defined(PETSC_USE_COMPLEX)
296:   PetscBLASInt   *iwork,liwork;
297: #endif

300:   if (!k) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_WRONG,"Must supply argument k");
301:   PetscBLASIntCast(ds->n,&n);
302:   PetscBLASIntCast(ds->ld,&ld);
303: #if !defined(PETSC_USE_COMPLEX)
304:   lwork = n;
305:   liwork = 1;
306:   DSAllocateWork_Private(ds,lwork,0,liwork+n);
307:   work = ds->work;
308:   lwork = ds->lwork;
309:   selection = ds->iwork;
310:   iwork = ds->iwork + n;
311:   liwork = ds->liwork - n;
312: #else
313:   lwork = 1;
314:   DSAllocateWork_Private(ds,lwork,0,n);
315:   work = ds->work;
316:   selection = ds->iwork;
317: #endif
318:   /* Compute the selected eigenvalue to be in the leading position */
319:   DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
320:   PetscArrayzero(selection,n);
321:   for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
322: #if !defined(PETSC_USE_COMPLEX)
323:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
324: #else
325:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
326: #endif
327:   SlepcCheckLapackInfo("trsen",info);
328:   *k = mout;
329:   return(0);
330: #endif
331: }

333: static PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
334: {
335: #if defined(SLEPC_MISSING_LAPACK_TREXC)
337:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
338: #else
340:   PetscScalar    re;
341:   PetscInt       i,j,pos,result;
342:   PetscBLASInt   ifst,ilst,info,n,ld;
343:   PetscScalar    *T = ds->mat[DS_MAT_A];
344:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
345: #if !defined(PETSC_USE_COMPLEX)
346:   PetscScalar    *work,im;
347: #endif

350:   PetscBLASIntCast(ds->n,&n);
351:   PetscBLASIntCast(ds->ld,&ld);
352: #if !defined(PETSC_USE_COMPLEX)
353:   DSAllocateWork_Private(ds,ld,0,0);
354:   work = ds->work;
355: #endif
356:   /* selection sort */
357:   for (i=ds->l;i<n-1;i++) {
358:     re = wr[i];
359: #if !defined(PETSC_USE_COMPLEX)
360:     im = wi[i];
361: #endif
362:     pos = 0;
363:     j=i+1; /* j points to the next eigenvalue */
364: #if !defined(PETSC_USE_COMPLEX)
365:     if (im != 0) j=i+2;
366: #endif
367:     /* find minimum eigenvalue */
368:     for (;j<n;j++) {
369: #if !defined(PETSC_USE_COMPLEX)
370:       SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
371: #else
372:       SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
373: #endif
374:       if (result > 0) {
375:         re = wr[j];
376: #if !defined(PETSC_USE_COMPLEX)
377:         im = wi[j];
378: #endif
379:         pos = j;
380:       }
381: #if !defined(PETSC_USE_COMPLEX)
382:       if (wi[j] != 0) j++;
383: #endif
384:     }
385:     if (pos) {
386:       /* interchange blocks */
387:       PetscBLASIntCast(pos+1,&ifst);
388:       PetscBLASIntCast(i+1,&ilst);
389: #if !defined(PETSC_USE_COMPLEX)
390:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
391: #else
392:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
393: #endif
394:       SlepcCheckLapackInfo("trexc",info);
395:       /* recover original eigenvalues from T matrix */
396:       for (j=i;j<n;j++) {
397:         wr[j] = T[j+j*ld];
398: #if !defined(PETSC_USE_COMPLEX)
399:         if (j<n-1 && T[j+1+j*ld] != 0.0) {
400:           /* complex conjugate eigenvalue */
401:           wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
402:                   PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
403:           wr[j+1] = wr[j];
404:           wi[j+1] = -wi[j];
405:           j++;
406:         } else {
407:           wi[j] = 0.0;
408:         }
409: #endif
410:       }
411:     }
412: #if !defined(PETSC_USE_COMPLEX)
413:     if (wi[i] != 0) i++;
414: #endif
415:   }
416:   return(0);
417: #endif
418: }

420: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
421: {

425:   if (!rr || wr == rr) {
426:     DSSort_NHEP_Total(ds,wr,wi);
427:   } else {
428:     DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
429:   }
430:   return(0);
431: }

433: static PetscErrorCode DSSortWithPermutation_NHEP(DS ds,PetscInt *perm,PetscScalar *wr,PetscScalar *wi)
434: {
435: #if defined(SLEPC_MISSING_LAPACK_TREXC)
437:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
438: #else
440:   PetscInt       i,j,pos,inc=1;
441:   PetscBLASInt   ifst,ilst,info,n,ld;
442:   PetscScalar    *T = ds->mat[DS_MAT_A];
443:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
444: #if !defined(PETSC_USE_COMPLEX)
445:   PetscScalar    *work;
446: #endif

449:   PetscBLASIntCast(ds->n,&n);
450:   PetscBLASIntCast(ds->ld,&ld);
451: #if !defined(PETSC_USE_COMPLEX)
452:   DSAllocateWork_Private(ds,ld,0,0);
453:   work = ds->work;
454: #endif
455:   for (i=ds->l;i<n-1;i++) {
456:     pos = perm[i];
457: #if !defined(PETSC_USE_COMPLEX)
458:     inc = (pos<n-1 && T[pos+1+pos*ld] != 0.0)? 2: 1;
459: #endif
460:     if (pos!=i) {
461: #if !defined(PETSC_USE_COMPLEX)
462:       if ((T[pos+(pos-1)*ld] != 0.0 && perm[i+1]!=pos-1) || (T[pos+1+pos*ld] != 0.0 && perm[i+1]!=pos+1))
463:  SETERRQ1(PETSC_COMM_SELF,1,"Invalid permutation due to a 2x2 block at position %D",pos);
464: #endif

466:       /* interchange blocks */
467:       PetscBLASIntCast(pos+1,&ifst);
468:       PetscBLASIntCast(i+1,&ilst);
469: #if !defined(PETSC_USE_COMPLEX)
470:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
471: #else
472:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
473: #endif
474:       SlepcCheckLapackInfo("trexc",info);
475:       for (j=i+1;j<n;j++) {
476:         if (perm[j]>=i && perm[j]<pos) perm[j]+=inc;
477:       }
478:       perm[i] = i;
479:       if (inc==2) perm[i+1] = i+1;
480:     }
481:     if (inc==2) i++;
482:   }
483:   /* recover original eigenvalues from T matrix */
484:   for (j=ds->l;j<n;j++) {
485:     wr[j] = T[j+j*ld];
486: #if !defined(PETSC_USE_COMPLEX)
487:     if (j<n-1 && T[j+1+j*ld] != 0.0) {
488:       /* complex conjugate eigenvalue */
489:       wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) * PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
490:       wr[j+1] = wr[j];
491:       wi[j+1] = -wi[j];
492:       j++;
493:     } else {
494:       wi[j] = 0.0;
495:     }
496: #endif
497:   }
498:   return(0);
499: #endif
500: }

502: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
503: {
505:   PetscInt       i;
506:   PetscBLASInt   n,ld,incx=1;
507:   PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;

510:   PetscBLASIntCast(ds->n,&n);
511:   PetscBLASIntCast(ds->ld,&ld);
512:   A  = ds->mat[DS_MAT_A];
513:   Q  = ds->mat[DS_MAT_Q];
514:   DSAllocateWork_Private(ds,2*ld,0,0);
515:   x = ds->work;
516:   y = ds->work+ld;
517:   for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
518:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
519:   for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
520:   ds->k = n;
521:   return(0);
522: }

524: /*
525:    Reduce a matrix A to upper Hessenberg form Q'*A*Q, where Q is an orthogonal matrix.
526:    The result overwrites A. Matrix A has the form

528:          [ S | * ]
529:      A = [-------]
530:          [ R | H ]

532:   where S is an upper (quasi-)triangular matrix of order k, H is an upper Hessenberg
533:   matrix of order n-k, and R is all zeros except the first row (the arrow).
534:   The algorithm uses elementary reflectors to annihilate entries in the arrow, and
535:   then proceeds upwards.
536:   If ilo>1, then it is assumed that the first ilo-1 entries of the arrow are zero, and
537:   hence the first ilo-1 rows and columns of Q are set to the identity matrix.

539:   Required workspace is 2*n.
540: */
541: static PetscErrorCode ArrowHessenberg(PetscBLASInt n,PetscBLASInt k,PetscBLASInt ilo,PetscScalar *A,PetscBLASInt lda,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *work)
542: {
543: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
545:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
546: #else
547:   PetscBLASInt i,j,n0,m,inc=1,incn=-1;
548:   PetscScalar  t,*v=work,*w=work+n,tau,tauc;

551:   m = n-ilo+1;
552:   for (i=k;i>ilo;i--) {
553:     for (j=0;j<=i-ilo;j++) v[j] = A[i+(i-j-1)*lda];  /* _larfg does not allow negative inc, so use buffer */
554:     n0 = i-ilo+1;
555:     PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,v,v+1,&inc,&tau));
556:     for (j=1;j<=i-ilo;j++) v[j] = PetscConj(v[j]);
557:     tauc = PetscConj(tau);
558:     A[i+(i-1)*lda] = v[0];
559:     v[0] = 1.0;
560:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&i,&n0,v,&incn,&tauc,A+(ilo-1)*lda,&lda,w));
561:     for (j=1;j<=i-ilo;j++) A[i+(i-j-1)*lda] = 0.0;
562:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,A+ilo-1+(ilo-1)*lda,&lda,w));
563:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,Q+ilo-1+(ilo-1)*ldq,&ldq,w));
564:   }
565:   /* trivial in-place transposition of Q */
566:   for (j=ilo-1;j<k;j++) {
567:     for (i=j;i<k;i++) {
568:       t = Q[i+j*ldq];
569:       if (i!=j) Q[i+j*ldq] = PetscConj(Q[j+i*ldq]);
570:       Q[j+i*ldq] = PetscConj(t);
571:     }
572:   }
573:   return(0);
574: #endif
575: }

577: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
578: {
579: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
581:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
582: #else
584:   PetscScalar    *work,*tau;
585:   PetscInt       i,j;
586:   PetscBLASInt   ilo,lwork,info,n,k,ld;
587:   PetscScalar    *A = ds->mat[DS_MAT_A];
588:   PetscScalar    *Q = ds->mat[DS_MAT_Q];

591: #if !defined(PETSC_USE_COMPLEX)
593: #endif
594:   PetscBLASIntCast(ds->n,&n);
595:   PetscBLASIntCast(ds->ld,&ld);
596:   PetscBLASIntCast(ds->l+1,&ilo);
597:   PetscBLASIntCast(ds->k,&k);
598:   DSAllocateWork_Private(ds,ld+6*ld,0,0);
599:   tau  = ds->work;
600:   work = ds->work+ld;
601:   lwork = 6*ld;

603:   /* initialize orthogonal matrix */
604:   PetscArrayzero(Q,ld*ld);
605:   for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
606:   if (n==1) { /* quick return */
607:     wr[0] = A[0];
608:     wi[0] = 0.0;
609:     return(0);
610:   }

612:   /* reduce to upper Hessenberg form */
613:   if (ds->state<DS_STATE_INTERMEDIATE) {
614:     if (PETSC_FALSE && k>0) {
615:       ArrowHessenberg(n,k,ilo,A,ld,Q,ld,work);
616:     } else {
617:       PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
618:       SlepcCheckLapackInfo("gehrd",info);
619:       for (j=0;j<n-1;j++) {
620:         for (i=j+2;i<n;i++) {
621:           Q[i+j*ld] = A[i+j*ld];
622:           A[i+j*ld] = 0.0;
623:         }
624:       }
625:       PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
626:       SlepcCheckLapackInfo("orghr",info);
627:     }
628:   }

630:   /* compute the (real) Schur form */
631: #if !defined(PETSC_USE_COMPLEX)
632:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
633:   for (j=0;j<ds->l;j++) {
634:     if (j==n-1 || A[j+1+j*ld] == 0.0) {
635:       /* real eigenvalue */
636:       wr[j] = A[j+j*ld];
637:       wi[j] = 0.0;
638:     } else {
639:       /* complex eigenvalue */
640:       wr[j] = A[j+j*ld];
641:       wr[j+1] = A[j+j*ld];
642:       wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
643:               PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
644:       wi[j+1] = -wi[j];
645:       j++;
646:     }
647:   }
648: #else
649:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
650:   if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
651: #endif
652:   SlepcCheckLapackInfo("hseqr",info);
653:   return(0);
654: #endif
655: }

657: PetscErrorCode DSSynchronize_NHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
658: {
660:   PetscInt       ld=ds->ld,l=ds->l,k;
661:   PetscMPIInt    n,rank,off=0,size,ldn;

664:   k = (ds->n-l)*ld;
665:   if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
666:   if (eigr) k += ds->n-l;
667:   if (eigi) k += ds->n-l;
668:   DSAllocateWork_Private(ds,k,0,0);
669:   PetscMPIIntCast(k*sizeof(PetscScalar),&size);
670:   PetscMPIIntCast(ds->n-l,&n);
671:   PetscMPIIntCast(ld*(ds->n-l),&ldn);
672:   MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
673:   if (!rank) {
674:     MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
675:     if (ds->state>DS_STATE_RAW) {
676:       MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
677:     }
678:     if (eigr) {
679:       MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
680:     }
681:     if (eigi) {
682:       MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
683:     }
684:   }
685:   MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
686:   if (rank) {
687:     MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
688:     if (ds->state>DS_STATE_RAW) {
689:       MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
690:     }
691:     if (eigr) {
692:       MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
693:     }
694:     if (eigi) {
695:       MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
696:     }
697:   }
698:   return(0);
699: }

701: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
702: {
703:   PetscInt    i,newn,ld=ds->ld,l=ds->l;
704:   PetscScalar *A;

707:   if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
708:   A = ds->mat[DS_MAT_A];
709:   /* be careful not to break a diagonal 2x2 block */
710:   if (A[n+(n-1)*ld]==0.0) newn = n;
711:   else {
712:     if (n<ds->n-1) newn = n+1;
713:     else newn = n-1;
714:   }
715:   if (ds->extrarow && ds->k==ds->n) {
716:     /* copy entries of extra row to the new position, then clean last row */
717:     for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
718:     for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
719:   }
720:   ds->k = 0;
721:   ds->n = newn;
722:   return(0);
723: }

725: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
726: {
727: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
729:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
730: #else
732:   PetscScalar    *work;
733:   PetscReal      *rwork;
734:   PetscBLASInt   *ipiv;
735:   PetscBLASInt   lwork,info,n,ld;
736:   PetscReal      hn,hin;
737:   PetscScalar    *A;

740:   PetscBLASIntCast(ds->n,&n);
741:   PetscBLASIntCast(ds->ld,&ld);
742:   lwork = 8*ld;
743:   DSAllocateWork_Private(ds,lwork,ld,ld);
744:   work  = ds->work;
745:   rwork = ds->rwork;
746:   ipiv  = ds->iwork;

748:   /* use workspace matrix W to avoid overwriting A */
749:   DSAllocateMat_Private(ds,DS_MAT_W);
750:   A = ds->mat[DS_MAT_W];
751:   PetscArraycpy(A,ds->mat[DS_MAT_A],ds->ld*ds->ld);

753:   /* norm of A */
754:   if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
755:   else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);

757:   /* norm of inv(A) */
758:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
759:   SlepcCheckLapackInfo("getrf",info);
760:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
761:   SlepcCheckLapackInfo("getri",info);
762:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

764:   *cond = hn*hin;
765:   return(0);
766: #endif
767: }

769: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
770: {
771: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
773:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
774: #else
776:   PetscInt       i,j;
777:   PetscBLASInt   *ipiv,info,n,ld,one=1,ncol;
778:   PetscScalar    *A,*B,*Q,*g=gin,*ghat;
779:   PetscScalar    done=1.0,dmone=-1.0,dzero=0.0;
780:   PetscReal      gnorm;

783:   PetscBLASIntCast(ds->n,&n);
784:   PetscBLASIntCast(ds->ld,&ld);
785:   A  = ds->mat[DS_MAT_A];

787:   if (!recover) {

789:     DSAllocateWork_Private(ds,0,0,ld);
790:     ipiv = ds->iwork;
791:     if (!g) {
792:       DSAllocateWork_Private(ds,ld,0,0);
793:       g = ds->work;
794:     }
795:     /* use workspace matrix W to factor A-tau*eye(n) */
796:     DSAllocateMat_Private(ds,DS_MAT_W);
797:     B = ds->mat[DS_MAT_W];
798:     PetscArraycpy(B,A,ld*ld);

800:     /* Vector g initialy stores b = beta*e_n^T */
801:     PetscArrayzero(g,n);
802:     g[n-1] = beta;

804:     /* g = (A-tau*eye(n))'\b */
805:     for (i=0;i<n;i++) B[i+i*ld] -= tau;
806:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
807:     SlepcCheckLapackInfo("getrf",info);
808:     PetscLogFlops(2.0*n*n*n/3.0);
809:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
810:     SlepcCheckLapackInfo("getrs",info);
811:     PetscLogFlops(2.0*n*n-n);

813:     /* A = A + g*b' */
814:     for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta;

816:   } else { /* recover */

818:     DSAllocateWork_Private(ds,ld,0,0);
819:     ghat = ds->work;
820:     Q    = ds->mat[DS_MAT_Q];

822:     /* g^ = -Q(:,idx)'*g */
823:     PetscBLASIntCast(ds->l+ds->k,&ncol);
824:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));

826:     /* A = A + g^*b' */
827:     for (i=0;i<ds->l+ds->k;i++)
828:       for (j=ds->l;j<ds->l+ds->k;j++)
829:         A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;

831:     /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
832:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
833:   }

835:   /* Compute gamma factor */
836:   if (gamma) {
837:     gnorm = 0.0;
838:     for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
839:     *gamma = PetscSqrtReal(1.0+gnorm);
840:   }
841:   return(0);
842: #endif
843: }

845: SLEPC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
846: {
848:   ds->ops->allocate      = DSAllocate_NHEP;
849:   ds->ops->view          = DSView_NHEP;
850:   ds->ops->vectors       = DSVectors_NHEP;
851:   ds->ops->solve[0]      = DSSolve_NHEP;
852:   ds->ops->sort          = DSSort_NHEP;
853:   ds->ops->sortperm      = DSSortWithPermutation_NHEP;
854:   ds->ops->synchronize   = DSSynchronize_NHEP;
855:   ds->ops->truncate      = DSTruncate_NHEP;
856:   ds->ops->update        = DSUpdateExtraRow_NHEP;
857:   ds->ops->cond          = DSCond_NHEP;
858:   ds->ops->transharm     = DSTranslateHarmonic_NHEP;
859:   return(0);
860: }