MechSys: linalg/scalpk.h Source File

00001 /*************************************************************************************
00002  * MechSys - A C++ library to simulate (Continuum) Mechanical Systems                *
00003  * Copyright (C) 2005 Dorival de Moraes Pedroso <dorival.pedroso at gmail.com>       *
00004  * Copyright (C) 2005 Raul Dario Durand Farfan  <raul.durand at gmail.com>           *
00005  *                                                                                   *
00006  * This file is part of MechSys.                                                     *
00007  *                                                                                   *
00008  * MechSys is free software; you can redistribute it and/or modify it under the      *
00009  * terms of the GNU General Public License as published by the Free Software         *
00010  * Foundation; either version 2 of the License, or (at your option) any later        *
00011  * version.                                                                          *
00012  *                                                                                   *
00013  * MechSys is distributed in the hope that it will be useful, but WITHOUT ANY        *
00014  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A   *
00015  * PARTICULAR PURPOSE. See the GNU General Public License for more details.          *
00016  *                                                                                   *
00017  * You should have received a copy of the GNU General Public License along with      *
00018  * MechSys; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, *
00019  * Fifth Floor, Boston, MA 02110-1301, USA                                           *
00020  *************************************************************************************/
00021 
00022 #ifndef MECHSYS_SCLAPK_H
00023 #define MECHSYS_SCLAPK_H
00024 
00025 // MechSys
00026 #include "linalg/matrix.h"
00027 #include "util/util.h"
00028 
00029 extern "C" // {{{
00030 {
00031     void   Cblacs_pinfo    (int* mypnum, int* nprocs);
00032     void   Cblacs_get      (int context, int request, int* value);
00033     int    Cblacs_gridinit (int* context, char * order, int np_row, int np_col);
00034     void   Cblacs_gridinfo (int context, int*  np_row, int* np_col, int*  my_row, int*  my_col);
00035     void   Cblacs_gridexit (int context);
00036     void   Cblacs_exit     (int error_code);
00037     void   Cblacs_barrier  (int context, char *scope);
00038     void   Cigebs2d        (int context, char *scope, char *top, int m, int n, int    *A, int lda);
00039     void   Cigebr2d        (int context, char *scope, char *top, int m, int n, int    *A, int lda, int rsrc, int csrc);
00040     void   Cdgebs2d        (int context, char *scope, char *top, int m, int n, double *A, int lda);
00041     void   Cdgebr2d        (int context, char *scope, char *top, int m, int n, double *A, int lda, int rsrc, int csrc);
00042     void   Cpdgemr2d       (int M, int N,
00043                             double *A, int IA, int JA, int *ADESC,
00044                             double *B, int IB, int JB, int *BDESC,
00045                             int CTXT);
00046     int    numroc_         (int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
00047     void   descinit_       (int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc, int *ictxt, int *lld, int *info);
00048     int    descset_        (int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc, int *ictxt, int *lld);
00049     double pdlamch_        (int *ictxt , char *cmach);
00050     double pdlange_        (char *norm, int *m, int *n, double *A, int *ia, int *ja, int *desca, double *work);
00051     void   pdlacpy_        (char *uplo, int *m, int *n, double *a, int *ia, int *ja, int *desca, double *b, int *ib, int *jb, int *descb);
00052     void   pdgesv_         (int *n, int *nrhs, double *A, int *ia, int *ja, int *desca, int* ipiv, double *B, int *ib, int *jb, int *descb, int *info);
00053     void   pdgemm_         (char *TRANSA, char *TRANSB, int * M, int * N, int * K, double * ALPHA,
00054                             double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB,
00055                             double * BETA, double * C, int * IC, int * JC, int * DESCC );
00056     int    indxg2p_        (int *indxglob, int *nb, int *iproc, int *isrcproc, int *nprocs);
00057     int    indxl2g_        (int *indxloc , int *nb, int *iproc, int *isrcproc, int *nprocs);
00058     void   pdgeadd_        (char *TRANS, int * M, int * N,
00059                             double * ALPHA,
00060                             double * A, int * IA, int * JA, int * DESCA,
00061                             double * BETA,
00062                             double * C, int * IC, int * JC, int * DESCC);
00063 } // }}}
00064 
00065 inline int IndxL2G(int idxloc, int nb, int iproc, int isrcproc, int nprocs)
00066 { // {{{
00067 
00068     // (FORTRAN) indxl2g_ function uses indexes which starts at 1 (one)
00069     int fortran_idxloc = idxloc + 1;
00070     return indxl2g_(&fortran_idxloc, &nb, &iproc, &isrcproc, &nprocs) - 1;
00071 
00072 } // }}}
00073 
00074 inline void PDLAFill(LinAlg::Matrix<double> const & AA, double *A, int *DescA, int iRread, int iCread, double *Work)
00075 { // {{{
00076 
00077     /*
00078     This function was based in a function inside file pdlaread.f
00079     That function (PDLAREAD) has the following caption:
00080     
00081      -- ScaLAPACK auxiliary routine (version 2.0) --
00082         University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00083         and University of California, Berkeley.
00084         August 12, 2001 
00085     
00086     Purpose
00087     =======
00088     
00089      PDLAREAD reads from a file named FILNAM a matrix and distribute
00090      it to the process grid.
00091     
00092      Only the process of coordinates {IRREAD, ICREAD} read the file.
00093     
00094      WORK must be of size >= MB_ = DESCA( MB_ ).
00095     
00096     Further Details
00097     ===============
00098     
00099     Contributed by Song Jin, University of Tennessee, 1996.
00100     */
00101     // Parameters
00102     //int block_cyclic_2d = 1;
00103     //int dtype_          = 1;
00104     int ctxt_           = 2;
00105     //int m_              = 3;
00106     //int n_              = 4;
00107     int mb_             = 5;
00108     //int nb_             = 6;
00109     //int rsrc_           = 7;
00110     //int csrc_           = 8;
00111     //int lld_            = 9;
00112     int one = 1;
00113 
00114     // Local scalars
00115     bool isioprocessor = false;
00116     int  ictxt         = DescA[ctxt_-1];
00117     int  lwork         = DescA[mb_-1];
00118     int  mycol         = 0;
00119     int  myrow         = 0;
00120     int  npcol         = 0;
00121     int  nprow         = 0;
00122 
00123     // Check if this is the IO processor
00124     Cblacs_gridinfo (ictxt, &nprow, &npcol, &myrow, &mycol);
00125     isioprocessor = ((myrow==iRread)&&(mycol==iCread));
00126 
00127     // Get number of rows and columns
00128     int iwork[2];
00129     if (isioprocessor)
00130     {
00131         iwork[0] = AA.Rows();
00132         iwork[1] = AA.Cols();
00133         //igebs2d_(&ictxt, "All", " ", 2, 1, &iwork, 2);
00134         Cigebs2d(ictxt, "All", " ", 2, 1, iwork, 2);
00135     }
00136     else
00137     {
00138         //igebr2d_(&ictxt, "All", " ", 2, 1, &iwork, 2, &iRread, &iCread);
00139         Cigebr2d(ictxt, "All", " ", 2, 1, iwork, 2, iRread, iCread);
00140     }
00141     int m = iwork[0];
00142     int n = iwork[1];
00143 
00144     // DESCSET initializes a descriptor vector 
00145     int descwork[9];
00146     int mm   = Util::Max(1, Util::Min(m, lwork));
00147     int nn   = Util::Max(1, static_cast<int>(lwork/mm));
00148     int mb   = mm;
00149     int nb   = nn;
00150     int rsrc = iRread;
00151     int csrc = iCread;
00152     int ldd  = Util::Max(1, mm);
00153     descset_(descwork, &mm, &nn, &mb, &nb, &rsrc, &csrc, &ictxt, &ldd);
00154 
00155     // Fill matrix
00156     for (int jstart=0; jstart<n; jstart+=nn)
00157     {
00158         int jend  = Util::Min(n, jstart+nn);
00159         int jsize = jend - jstart;
00160         for (int istart=0; istart<m; istart+=mm)
00161         {
00162             int    iend  = Util::Min(m, istart+mm);
00163             int    isize = iend - istart;
00164             double alpha = 1.0;
00165             double beta  = 0.0;
00166             if (isioprocessor)
00167             {
00168                 for (int j=0; j<jsize; j++)
00169                 for (int i=0; i<isize; i++)
00170                     Work[i+j*ldd] = AA(i,j);
00171             }
00172             pdgeadd_("N", &isize, &jsize,
00173                      &alpha,
00174                      Work, &one, &one, descwork,
00175                      &beta,
00176                      A, &istart, &jstart, DescA);
00177         }
00178     }
00179 
00180     // Flag (?)
00181     Work[0] = DescA[mb_];
00182 
00183 } // }}}
00184 
00185 #endif // MECHSYS_SCLAPK_H
00186 
00187 /* {{{ MANUAL
00188  
00189 
00190    These notes were 'borrowed' from ScaLAPACK manual:
00191 
00192   -- ScaLAPACK routine (version 1.7) --
00193      University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00194      and University of California, Berkeley.
00195      Jan 30, 2006
00196 
00197  
00198   ======================================================================
00199 
00200   INTEGER FUNCTION INDXL2G( INDXLOC, NB, IPROC, ISRCPROC, NPROCS )
00201 
00202   Purpose
00203   =======
00204 
00205   INDXL2G computes the global index of a distributed matrix entry
00206   pointed to by the local index INDXLOC of the process indicated by
00207   IPROC.
00208 
00209   Arguments
00210   =========
00211 
00212   INDXLOC   (global input) INTEGER
00213             The local index of the distributed matrix entry.
00214 
00215   NB        (global input) INTEGER
00216             Block size, size of the blocks the distributed matrix is
00217             split into.
00218 
00219   IPROC     (local input) INTEGER
00220             The coordinate of the process whose local array row or
00221             column is to be determined.
00222 
00223   ISRCPROC  (global input) INTEGER
00224             The coordinate of the process that possesses the first
00225             row/column of the distributed matrix.
00226 
00227   NPROCS    (global input) INTEGER
00228             The total number processes over which the distributed
00229             matrix is distributed.
00230 
00231   ======================================================================
00232 
00233 
00234   ======================================================================
00235 
00236   INTEGER FUNCTION NUMROC(N, NB, IPROC, ISRCPROC, NPROCS)
00237 
00238   NUMROC computes the NUMber of Rows Or Columns of a distributed
00239   matrix owned by the process indicated by IPROC.
00240 
00241   Arguments
00242   =========
00243 
00244   N         (global input) INTEGER
00245             The number of rows/columns in distributed matrix.
00246 
00247   NB        (global input) INTEGER
00248             Block size, size of the blocks the distributed matrix is
00249             split into.
00250 
00251   IPROC     (local input) INTEGER
00252             The coordinate of the process whose local array row or
00253             column is to be determined.
00254 
00255   ISRCPROC  (global input) INTEGER
00256             The coordinate of the process that possesses the first
00257             row or column of the distributed matrix.
00258 
00259   NPROCS    (global input) INTEGER
00260             The total number processes over which the matrix is
00261             distributed.
00262 
00263   ======================================================================
00264 
00265 
00266   ======================================================================
00267 
00268   SUBROUTINE
00269   DESCINIT(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
00270 
00271   DESCINIT initializes the descriptor vector with the 8 input arguments
00272   M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD.
00273 
00274   Notes
00275   =====
00276 
00277   Each global data object is described by an associated description
00278   vector.  This vector stores the information required to establish
00279   the mapping between an object element and its corresponding process
00280   and memory location.
00281 
00282   Let A be a generic term for any 2D block cyclicly distributed array.
00283   Such a global array has an associated description vector DESCA.
00284   In the following comments, the character _ should be read as
00285   "of the global array".
00286 
00287   NOTATION        STORED IN      EXPLANATION
00288   --------------- -------------- --------------------------------------
00289   DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00290                                  DTYPE_A = 1.
00291   CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00292                                  the BLACS process grid A is distribu-
00293                                  ted over. The context itself is glo-
00294                                  bal, but the handle (the integer
00295                                  value) may vary.
00296   M_A    (global) DESCA( M_ )    The number of rows in the global
00297                                  array A.
00298   N_A    (global) DESCA( N_ )    The number of columns in the global
00299                                  array A.
00300   MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00301                                  the rows of the array.
00302   NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00303                                  the columns of the array.
00304   RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00305                                  row of the array A is distributed.
00306   CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00307                                  first column of the array A is
00308                                  distributed.
00309   LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00310                                  array.  LLD_A >= MAX(1,LOCr(M_A)).
00311 
00312   Let K be the number of rows or columns of a distributed matrix,
00313   and assume that its process grid has dimension p x q.
00314   LOCr( K ) denotes the number of elements of K that a process
00315   would receive if K were distributed over the p processes of its
00316   process column.
00317   Similarly, LOCc( K ) denotes the number of elements of K that a
00318   process would receive if K were distributed over the q processes of
00319   its process row.
00320   The values of LOCr() and LOCc() may be determined via a call to the
00321   ScaLAPACK tool function, NUMROC:
00322           LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00323           LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00324   An upper bound for these quantities may be computed by:
00325           LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00326           LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00327 
00328   Arguments
00329   =========
00330 
00331   DESC    (output) INTEGER array of dimension DLEN_.
00332           The array descriptor of a distributed matrix to be set.
00333 
00334   M       (global input) INTEGER
00335           The number of rows in the distributed matrix. M >= 0.
00336 
00337   N       (global input) INTEGER
00338           The number of columns in the distributed matrix. N >= 0.
00339 
00340   MB      (global input) INTEGER
00341           The blocking factor used to distribute the rows of the
00342           matrix. MB >= 1.
00343 
00344   NB      (global input) INTEGER
00345           The blocking factor used to distribute the columns of the
00346           matrix. NB >= 1.
00347 
00348   IRSRC   (global input) INTEGER
00349           The process row over which the first row of the matrix is
00350           distributed. 0 <= IRSRC < NPROW.
00351 
00352   ICSRC   (global input) INTEGER
00353           The process column over which the first column of the
00354           matrix is distributed. 0 <= ICSRC < NPCOL.
00355 
00356   ICTXT   (global input) INTEGER
00357           The BLACS context handle, indicating the global context of
00358           the operation on the matrix. The context itself is global.
00359 
00360   LLD     (local input)  INTEGER
00361           The leading dimension of the local array storing the local
00362           blocks of the distributed matrix. LLD >= MAX(1,LOCr(M)).
00363 
00364   INFO    (output) INTEGER
00365           = 0: successful exit
00366           < 0: if INFO = -i, the i-th argument had an illegal value
00367 
00368   Note
00369   ====
00370 
00371   If the routine can recover from an erroneous input argument, it will
00372   return an acceptable descriptor vector.  For example, if LLD = 0 on
00373   input, DESC(LLD_) will contain the smallest leading dimension
00374   required to store the specified M-by-N distributed matrix, INFO
00375   will be set  -9 in that case.
00376 
00377   ======================================================================
00378 
00379 
00380   ======================================================================
00381 
00382   SUBROUTINE
00383   PDGEADD (TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC)
00384 
00385   Purpose
00386   =======
00387 
00388   PDGEADD  adds a matrix to another
00389 
00390      sub( C ) := beta*sub( C ) + alpha*op( sub( A ) )
00391 
00392   where
00393 
00394      sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1),  and, op( X )  is one  of
00395 
00396      op( X ) = X   or   op( X ) = X'.
00397 
00398   Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1)   if TRANS = 'N',
00399                                A(IA:IA+N-1,JA:JA+M-1)'  if TRANS = 'T',
00400                                A(IA:IA+N-1,JA:JA+M-1)'  if TRANS = 'C'.
00401 
00402   Alpha  and  beta  are scalars, sub( C ) and op( sub( A ) ) are m by n
00403   submatrices.
00404 
00405   Notes
00406   =====
00407 
00408   A description  vector  is associated with each 2D block-cyclicly dis-
00409   tributed matrix.  This  vector  stores  the  information  required to
00410   establish the  mapping  between a  matrix entry and its corresponding
00411   process and memory location.
00412 
00413   In  the  following  comments,   the character _  should  be  read  as
00414   "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
00415   block cyclicly distributed matrix.  Its description vector is DESC_A:
00416 
00417   NOTATION         STORED IN       EXPLANATION
00418   ---------------- --------------- ------------------------------------
00419   DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
00420   CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
00421                                    the NPROW x NPCOL BLACS process grid
00422                                    A  is  distributed over. The context
00423                                    itself  is  global,  but  the handle
00424                                    (the integer value) may vary.
00425   M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
00426                                    ted matrix A, M_A >= 0.
00427   N_A     (global) DESCA[ N_     ] The number of columns in the distri-
00428                                    buted matrix A, N_A >= 0.
00429   IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
00430                                    block of the matrix A, IMB_A > 0.
00431   INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
00432                                    left   block   of   the  matrix   A,
00433                                    INB_A > 0.
00434   MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
00435                                    bute the last  M_A-IMB_A  rows of A,
00436                                    MB_A > 0.
00437   NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
00438                                    bute the last  N_A-INB_A  columns of
00439                                    A, NB_A > 0.
00440   RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
00441                                    row of the matrix  A is distributed,
00442                                    NPROW > RSRC_A >= 0.
00443   CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
00444                                    first column of  A  is  distributed.
00445                                    NPCOL > CSRC_A >= 0.
00446   LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
00447                                    array  storing  the  local blocks of
00448                                    the distributed matrix A,
00449                                    IF( Lc( 1, N_A ) > 0 )
00450                                       LLD_A >= MAX( 1, Lr( 1, M_A ) )
00451                                    ELSE
00452                                       LLD_A >= 1.
00453 
00454   Let K be the number of  rows of a matrix A starting at the global in-
00455   dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
00456   that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
00457   receive if these K rows were distributed over NPROW processes.  If  K
00458   is the number of columns of a matrix  A  starting at the global index
00459   JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
00460   lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
00461   these K columns were distributed over NPCOL processes.
00462 
00463   The values of Lr() and Lc() may be determined via a call to the func-
00464   tion PB_Cnumroc:
00465   Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
00466   Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
00467 
00468   Arguments
00469   =========
00470 
00471   TRANS   (global input) CHARACTER*1
00472           On entry,  TRANS   specifies the form of op( sub( A ) ) to be
00473           used in the matrix addition as follows:
00474 
00475              TRANS = 'N' or 'n'   op( sub( A ) ) = sub( A ),
00476 
00477              TRANS = 'T' or 't'   op( sub( A ) ) = sub( A )',
00478 
00479              TRANS = 'C' or 'c'   op( sub( A ) ) = sub( A )'.
00480 
00481   M       (global input) INTEGER
00482           On entry,  M  specifies the number of rows of  the  submatrix
00483           sub( C ) and the number of columns of the submatrix sub( A ).
00484           M  must be at least zero.
00485 
00486   N       (global input) INTEGER
00487           On entry, N  specifies the number of columns of the submatrix
00488           sub( C ) and the number of rows of the submatrix sub( A ).  N
00489           must be at least zero.
00490 
00491   ALPHA   (global input) DOUBLE PRECISION
00492           On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
00493           supplied  as  zero  then  the  local entries of  the array  A
00494           corresponding to the entries of the submatrix  sub( A )  need
00495           not be set on input.
00496 
00497   A       (local input) DOUBLE PRECISION array
00498           On entry, A is an array of dimension (LLD_A, Ka), where Ka is
00499           at least Lc( 1, JA+M-1 ).  Before  entry, this array contains
00500           the local entries of the matrix A.
00501 
00502   IA      (global input) INTEGER
00503           On entry, IA  specifies A's global row index, which points to
00504           the beginning of the submatrix sub( A ).
00505 
00506   JA      (global input) INTEGER
00507           On entry, JA  specifies A's global column index, which points
00508           to the beginning of the submatrix sub( A ).
00509 
00510   DESCA   (global and local input) INTEGER array
00511           On entry, DESCA  is an integer array of dimension DLEN_. This
00512           is the array descriptor for the matrix A.
00513 
00514   BETA    (global input) DOUBLE PRECISION
00515           On entry,  BETA  specifies the scalar  beta.   When  BETA  is
00516           supplied  as  zero  then  the  local entries of  the array  C
00517           corresponding to the entries of the submatrix  sub( C )  need
00518           not be set on input.
00519 
00520   C       (local input/local output) DOUBLE PRECISION array
00521           On entry, C is an array of dimension (LLD_C, Kc), where Kc is
00522           at least Lc( 1, JC+N-1 ).  Before  entry, this array contains
00523           the local entries of the matrix C.
00524           On exit, the entries of this array corresponding to the local
00525           entries of the submatrix  sub( C )  are  overwritten  by  the
00526           local entries of the m by n updated submatrix.
00527 
00528   IC      (global input) INTEGER
00529           On entry, IC  specifies C's global row index, which points to
00530           the beginning of the submatrix sub( C ).
00531 
00532   JC      (global input) INTEGER
00533           On entry, JC  specifies C's global column index, which points
00534           to the beginning of the submatrix sub( C ).
00535 
00536   DESCC   (global and local input) INTEGER array
00537           On entry, DESCC  is an integer array of dimension DLEN_. This
00538           is the array descriptor for the matrix C.
00539 
00540   -- Written on April 1, 1998 by
00541      Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
00542 
00543   ======================================================================
00544 
00545 
00546   ======================================================================
00547 
00548   SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO )
00549 
00550   Purpose
00551   =======
00552 
00553   PDGESV computes the solution to a real system of linear equations
00554 
00555                         sub( A ) * X = sub( B ),
00556 
00557   where sub( A ) = A(IA:IA+N-1,JA:JA+N-1) is an N-by-N distributed
00558   matrix and X and sub( B ) = B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS
00559   distributed matrices.
00560 
00561   The LU decomposition with partial pivoting and row interchanges is
00562   used to factor sub( A ) as sub( A ) = P * L * U, where P is a permu-
00563   tation matrix, L is unit lower triangular, and U is upper triangular.
00564   L and U are stored in sub( A ). The factored form of sub( A ) is then
00565   used to solve the system of equations sub( A ) * X = sub( B ).
00566 
00567   Notes
00568   =====
00569 
00570   Each global data object is described by an associated description
00571   vector.  This vector stores the information required to establish
00572   the mapping between an object element and its corresponding process
00573   and memory location.
00574 
00575   Let A be a generic term for any 2D block cyclicly distributed array.
00576   Such a global array has an associated description vector DESCA.
00577   In the following comments, the character _ should be read as
00578   "of the global array".
00579 
00580   NOTATION        STORED IN      EXPLANATION
00581   --------------- -------------- --------------------------------------
00582   DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00583                                  DTYPE_A = 1.
00584   CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00585                                  the BLACS process grid A is distribu-
00586                                  ted over. The context itself is glo-
00587                                  bal, but the handle (the integer
00588                                  value) may vary.
00589   M_A    (global) DESCA( M_ )    The number of rows in the global
00590                                  array A.
00591   N_A    (global) DESCA( N_ )    The number of columns in the global
00592                                  array A.
00593   MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00594                                  the rows of the array.
00595   NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00596                                  the columns of the array.
00597   RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00598                                  row of the array A is distributed.
00599   CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00600                                  first column of the array A is
00601                                  distributed.
00602   LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00603                                  array.  LLD_A >= MAX(1,LOCr(M_A)).
00604 
00605   Let K be the number of rows or columns of a distributed matrix,
00606   and assume that its process grid has dimension p x q.
00607   LOCr( K ) denotes the number of elements of K that a process
00608   would receive if K were distributed over the p processes of its
00609   process column.
00610   Similarly, LOCc( K ) denotes the number of elements of K that a
00611   process would receive if K were distributed over the q processes of
00612   its process row.
00613   The values of LOCr() and LOCc() may be determined via a call to the
00614   ScaLAPACK tool function, NUMROC:
00615           LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00616           LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00617   An upper bound for these quantities may be computed by:
00618           LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00619           LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00620 
00621   This routine requires square block decomposition ( MB_A = NB_A ).
00622 
00623   Arguments
00624   =========
00625 
00626   N       (global input) INTEGER
00627           The number of rows and columns to be operated on, i.e. the
00628           order of the distributed submatrix sub( A ). N >= 0.
00629 
00630   NRHS    (global input) INTEGER
00631           The number of right hand sides, i.e., the number of columns
00632           of the distributed submatrix sub( B ). NRHS >= 0.
00633 
00634   A       (local input/local output) DOUBLE PRECISION pointer into the
00635           local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
00636           On entry, the local pieces of the N-by-N distributed matrix
00637           sub( A ) to be factored. On exit, this array contains the
00638           local pieces of the factors L and U from the factorization
00639           sub( A ) = P*L*U; the unit diagonal elements of L are not
00640           stored.
00641 
00642   IA      (global input) INTEGER
00643           The row index in the global array A indicating the first
00644           row of sub( A ).
00645 
00646   JA      (global input) INTEGER
00647           The column index in the global array A indicating the
00648           first column of sub( A ).
00649 
00650   DESCA   (global and local input) INTEGER array of dimension DLEN_.
00651           The array descriptor for the distributed matrix A.
00652 
00653   IPIV    (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A )
00654           This array contains the pivoting information.
00655           IPIV(i) -> The global row local row i was swapped with.
00656           This array is tied to the distributed matrix A.
00657 
00658   B       (local input/local output) DOUBLE PRECISION pointer into the
00659           local memory to an array of dimension
00660           (LLD_B,LOCc(JB+NRHS-1)).  On entry, the right hand side
00661           distributed matrix sub( B ). On exit, if INFO = 0, sub( B )
00662           is overwritten by the solution distributed matrix X.
00663 
00664   IB      (global input) INTEGER
00665           The row index in the global array B indicating the first
00666           row of sub( B ).
00667 
00668   JB      (global input) INTEGER
00669           The column index in the global array B indicating the
00670           first column of sub( B ).
00671 
00672   DESCB   (global and local input) INTEGER array of dimension DLEN_.
00673           The array descriptor for the distributed matrix B.
00674 
00675   INFO    (global output) INTEGER
00676           = 0:  successful exit
00677           < 0:  If the i-th argument is an array and the j-entry had
00678                 an illegal value, then INFO = -(i*100+j), if the i-th
00679                 argument is a scalar and had an illegal value, then
00680                 INFO = -i.
00681           > 0:  If INFO = K, U(IA+K-1,JA+K-1) is exactly zero.
00682                 The factorization has been completed, but the factor U
00683                 is exactly singular, so the solution could not be
00684                 computed.
00685 
00686   =====================================================================
00687 
00688 
00689   =====================================================================
00690 
00691      SUBROUTINE PDGEMR2D( M, N,
00692                          A, IA, JA, ADESC,
00693                          B, IB, JB, BDESC,
00694                          CTXT)
00695    Purpose
00696    =======
00697 
00698    PDGEMR2D copies a submatrix of A on a submatrix of B.
00699    A and B can have different distributions: they can be on different
00700    processor grids, they can have different blocksizes, the beginning
00701    of the area to be copied can be at a different places on A and B.
00702 
00703    The parameters can be confusing when the grids of A and B are
00704    partially or completly disjoint, in the case a processor calls
00705    this routines but is either not in the A context or B context, the
00706    ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the
00707    routine recognise this situation.
00708    To summarize the rule:
00709    - If a processor is in A context, all parameters related to A must be valid.
00710    - If a processor is in B context, all parameters related to B must be valid.
00711    -  ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1.
00712    - M and N must be valid for everyone.
00713    - other parameters are not examined.
00714 
00715    Notes
00716    =====
00717 
00718    A description vector is associated with each 2D block-cyclicly dis-
00719    tributed matrix.  This vector stores the information required to
00720    establish the mapping between a matrix entry and its corresponding
00721    process and memory location.
00722 
00723    In the following comments, the character _ should be read as
00724    "of the distributed matrix".  Let A be a generic term for any 2D
00725    block cyclicly distributed matrix.  Its description vector is DESC_A:
00726 
00727   NOTATION        STORED IN      EXPLANATION
00728   --------------- -------------- --------------------------------------
00729   DT_A   (global) DESCA( DT_ )   The descriptor type.
00730   CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00731                                  the BLACS process grid A is distribu-
00732                                  ted over. The context itself is glo-
00733                                  bal, but the handle (the integer
00734                                  value) may vary.
00735   M_A    (global) DESCA( M_ )    The number of rows in the distributed
00736                                  matrix A.
00737   N_A    (global) DESCA( N_ )    The number of columns in the distri-
00738                                  buted matrix A.
00739   MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00740                                  the rows of A.
00741   NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00742                                  the columns of A.
00743   RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00744                                  row of the matrix A is distributed.
00745   CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00746                                  first column of A is distributed.
00747   LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00748                                  array storing the local blocks of the
00749                                  distributed matrix A.
00750                                  LLD_A >= MAX(1,LOCp(M_A)).
00751    Important notice
00752    ================
00753     The parameters of the routine have changed in April 1996
00754     There is a new last argument. It must be a context englobing
00755     all processors involved in the initial and final distribution.
00756 
00757     Be aware that all processors  included in this
00758      context must call the redistribution routine.
00759 
00760    Parameters
00761    ==========
00762 
00763 
00764    M        (input) INTEGER.
00765             On entry, M specifies the number of rows of the
00766             submatrix to be copied.  M must be at least zero.
00767             Unchanged on exit.
00768 
00769    N        (input) INTEGER.
00770             On entry, N specifies the number of cols of the submatrix
00771             to be redistributed.rows of B.  M must be at least zero.
00772             Unchanged on exit.
00773 
00774    A        (input) DOUBLE PRECISION
00775             On entry, the source matrix.
00776             Unchanged on exit.
00777 
00778    IA,JA    (input) INTEGER
00779             On entry,the coordinates of the beginning of the submatrix
00780             of A to copy.
00781             1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1,
00782             Unchanged on exit.
00783 
00784    ADESC    (input) A description vector (see Notes above)
00785             If the current processor is not part of the context of A
00786             the ADESC[CTXT] must be equal to -1.
00787 
00788 
00789    B        (output) DOUBLE PRECISION
00790             On entry, the destination matrix.
00791             The portion corresponding to the defined submatrix are updated.
00792 
00793    IB,JB    (input) INTEGER
00794             On entry,the coordinates of the beginning of the submatrix
00795             of B that will be updated.
00796             1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1,
00797             Unchanged on exit.
00798 
00799    BDESC    (input) B description vector (see Notes above)
00800             For processors not part of the context of B
00801             BDESC[CTXT] must be equal to -1.
00802 
00803    CTXT     (input) a context englobing at least all processors included
00804                in either A context or B context
00805 
00806   Memory requirement :
00807   ====================
00808 
00809   for the processors belonging to grid 0, one buffer of size block 0
00810   and for the processors belonging to grid 1, also one buffer of size
00811   block 1.
00812 
00813   =====================================================================
00814 
00815 }}} */
00816 
00817 // vim:fdm=marker