SuperLU_DIST  4.0
superlu_dist on CPU and GPU clusters
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Classes | Macros | Typedefs | Functions
superlu_defs.h File Reference

Definitions which are precision-neutral. More...

#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include "superlu_enum_consts.h"
#include "Cnames.h"
#include "supermatrix.h"
#include "util_dist.h"
#include "psymbfact.h"

Go to the source code of this file.

Classes

struct  superlu_scope_t
 
struct  gridinfo_t
 
struct  Glu_persist_t
 
struct  Glu_freeable_t
 
struct  ScalePermstruct_t
 
struct  superlu_options_t
 
struct  mem_usage_t
 
struct  Ublock_info_t
 
struct  Remain_info_t
 
struct  etree_node
 
struct  pair
 

Macros

#define mpi_int_t   MPI_INT
 
#define BC_HEADER   2
 
#define LB_DESCRIPTOR   2
 
#define BR_HEADER   3
 
#define UB_DESCRIPTOR   2
 
#define NBUFFERS   5
 
#define SLU_MPI_TAG(id, num)   ( (5*(num)+id) % tag_ub )
 
#define NTAGS   INT_MAX
 
#define UjROW   10
 
#define UkSUB   11
 
#define UkVAL   12
 
#define LkSUB   13
 
#define LkVAL   14
 
#define LkkDIAG   15
 
#define XK_H   2 /* The header preceeding each X block. */
 
#define LSUM_H   2 /* The header preceeding each MOD block. */
 
#define GSUM   20
 
#define Xk   21
 
#define Yk   22
 
#define LSUM   23
 
#define COMM_ALL   100
 
#define COMM_COLUMN   101
 
#define COMM_ROW   102
 
#define SUPER_LINEAR   11
 
#define SUPER_BLOCK   12
 
#define NO_MARKER   3
 
#define IAM(comm)   { int rank; MPI_Comm_rank ( comm, &rank ); rank};
 
#define MYROW(iam, grid)   ( (iam) / grid->npcol )
 
#define MYCOL(iam, grid)   ( (iam) % grid->npcol )
 
#define BlockNum(i)   ( supno[i] )
 
#define FstBlockC(bnum)   ( xsup[bnum] )
 
#define SuperSize(bnum)   ( xsup[bnum+1]-xsup[bnum] )
 
#define LBi(bnum, grid)   ( (bnum)/grid->nprow )/* Global to local block rowwise */
 
#define LBj(bnum, grid)   ( (bnum)/grid->npcol )/* Global to local block columnwise*/
 
#define PROW(bnum, grid)   ( (bnum) % grid->nprow )
 
#define PCOL(bnum, grid)   ( (bnum) % grid->npcol )
 
#define PNUM(i, j, grid)   ( (i)*grid->npcol + j ) /* Process number at coord(i,j) */
 
#define CEILING(a, b)   ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) )
 
#define RHS_ITERATE(i)   for (i = 0; i < nrhs; ++i)
 
#define X_BLK(i)   ilsum[i] * nrhs + (i+1) * XK_H
 
#define LSUM_BLK(i)   ilsum[i] * nrhs + (i+1) * LSUM_H
 
#define SuperLU_timer_   SuperLU_timer_dist_
 
#define LOG2(x)   (log10((double) x) / log10(2.0))
 
#define VT_TRACEON
 
#define VT_TRACEOFF
 

Typedefs

typedef int int_t
 

Functions

void set_default_options_dist (superlu_options_t *)
 Set the default values for the options argument. More...
 
void superlu_gridinit (MPI_Comm, int_t, int_t, gridinfo_t *)
 All processes in the MPI communicator must call this routine. More...
 
void superlu_gridmap (MPI_Comm, int_t, int_t, int_t[], int_t, gridinfo_t *)
 All processes in the MPI communicator must call this routine. More...
 
void superlu_gridexit (gridinfo_t *)
 
void print_options_dist (superlu_options_t *)
 Print the options setting. More...
 
void print_sp_ienv_dist (superlu_options_t *)
 Print the blocking parameters. More...
 
void Destroy_CompCol_Matrix_dist (SuperMatrix *)
 
void Destroy_SuperNode_Matrix_dist (SuperMatrix *)
 
void Destroy_SuperMatrix_Store_dist (SuperMatrix *)
 Deallocate the structure pointing to the actual storage of the matrix. More...
 
void Destroy_CompCol_Permuted_dist (SuperMatrix *)
 A is of type Stype==NCP. More...
 
void Destroy_CompRowLoc_Matrix_dist (SuperMatrix *)
 
void Destroy_CompRow_Matrix_dist (SuperMatrix *)
 
void sp_colorder (superlu_options_t *, SuperMatrix *, int_t *, int_t *, SuperMatrix *)
 
int sp_symetree_dist (int_t *, int_t *, int_t *, int_t, int_t *)
 Symmetric elimination tree. More...
 
int sp_coletree_dist (int_t *, int_t *, int_t *, int_t, int_t, int_t *)
 Nonsymmetric elimination tree. More...
 
void get_perm_c_dist (int_t, int_t, SuperMatrix *, int_t *)
 
void at_plus_a_dist (const int_t, const int_t, int_t *, int_t *, int_t *, int_t **, int_t **)
 
int genmmd_dist_ (int_t *, int_t *, int_t *a, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *)
 
void bcast_tree (void *, int, MPI_Datatype, int, int, gridinfo_t *, int, int *)
 
int_t symbfact (superlu_options_t *, int, SuperMatrix *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *)
 
int_t symbfact_SubInit (fact_t, void *, int_t, int_t, int_t, int_t, Glu_persist_t *, Glu_freeable_t *)
 
int_t symbfact_SubXpand (int_t, int_t, int_t, MemType, int_t *, Glu_freeable_t *)
 
int_t symbfact_SubFree (Glu_freeable_t *)
 
void countnz_dist (const int_t, int_t *, long long int *, long long int *, Glu_persist_t *, Glu_freeable_t *)
 
long long int fixupL_dist (const int_t, const int_t *, Glu_persist_t *, Glu_freeable_t *)
 
int_tTreePostorder_dist (int_t, int_t *)
 
float slamch_ (const char *)
 
double dlamch_ (const char *)
 
void * superlu_malloc_dist (size_t)
 
void superlu_free_dist (void *)
 
int_tintMalloc_dist (int_t)
 
int_tintCalloc_dist (int_t)
 
int_t mc64id_dist (int_t *)
 
double SuperLU_timer_ ()
 
void superlu_abort_and_exit_dist (char *)
 
int_t sp_ienv_dist (int_t)
 

Purpose

More...
 
int lsame_ (char *, char *)
 
int xerbla_ (char *, int *)
 
void ifill_dist (int_t *, int_t, int_t)
 Fills an integer array with a given value. More...
 
void super_stats_dist (int_t, int_t *)
 
void ScalePermstructInit (const int_t, const int_t, ScalePermstruct_t *)
 Allocate storage in ScalePermstruct. More...
 
void ScalePermstructFree (ScalePermstruct_t *)
 Deallocate ScalePermstruct. More...
 
void get_diag_procs (int_t, Glu_persist_t *, gridinfo_t *, int_t *, int_t **, int_t **)
 
int_t QuerySpace_dist (int_t, int_t, Glu_freeable_t *, mem_usage_t *)
 
void pxerbla (char *, gridinfo_t *, int_t)
 
void PStatInit (SuperLUStat_t *)
 
void PStatFree (SuperLUStat_t *)
 
void PStatPrint (superlu_options_t *, SuperLUStat_t *, gridinfo_t *)
 
void log_memory (long long, SuperLUStat_t *)
 
void print_memorylog (SuperLUStat_t *, char *)
 
float symbfact_dist (int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *, Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, mem_usage_t *)
 
float get_perm_c_parmetis (SuperMatrix *, int_t *, int_t *, int, int, int_t **, int_t **, gridinfo_t *, MPI_Comm *)
 
int_t psymbfact_LUXpandMem (int_t, int_t, int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
 
int_t psymbfact_LUXpand (int_t, int_t, int_t, int_t, int_t *, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
 
int_t psymbfact_LUXpand_RL (int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
 
int_t psymbfact_prLUXpand (int_t, int_t, int, Llu_symbfact_t *, psymbfact_stat_t *)
 
void print_panel_seg_dist (int_t, int_t, int_t, int_t, int_t *, int_t *)
 Diagnostic print of segment info after panel_dfs(). More...
 
void check_repfnz_dist (int_t, int_t, int_t, int_t *)
 Check whether repfnz[] == EMPTY after reset. More...
 
int_t CheckZeroDiagonal (int_t, int_t *, int_t *, int_t *)
 
void PrintDouble5 (char *, int_t, double *)
 
void PrintInt10 (char *, int_t, int_t *)
 
int file_PrintInt10 (FILE *, char *, int, int *)
 
int file_PrintLong10 (FILE *, char *, int_t, int_t *)
 

Detailed Description

Definitions which are precision-neutral.

– Distributed SuperLU routine (version 2.2) –
Lawrence Berkeley National Lab, Univ. of California Berkeley.
November 1, 2007
Feburary 20, 2008

Macro Definition Documentation

#define BC_HEADER   2
#define BlockNum (   i)    ( supno[i] )
#define BR_HEADER   3
#define CEILING (   a,
 
)    ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) )
#define COMM_ALL   100
#define COMM_COLUMN   101
#define COMM_ROW   102
#define FstBlockC (   bnum)    ( xsup[bnum] )
#define GSUM   20
#define IAM (   comm)    { int rank; MPI_Comm_rank ( comm, &rank ); rank};
#define LB_DESCRIPTOR   2
#define LBi (   bnum,
  grid 
)    ( (bnum)/grid->nprow )/* Global to local block rowwise */
#define LBj (   bnum,
  grid 
)    ( (bnum)/grid->npcol )/* Global to local block columnwise*/
#define LkkDIAG   15
#define LkSUB   13
#define LkVAL   14
#define LOG2 (   x)    (log10((double) x) / log10(2.0))
#define LSUM   23
#define LSUM_BLK (   i)    ilsum[i] * nrhs + (i+1) * LSUM_H
#define LSUM_H   2 /* The header preceeding each MOD block. */
#define mpi_int_t   MPI_INT
#define MYCOL (   iam,
  grid 
)    ( (iam) % grid->npcol )
#define MYROW (   iam,
  grid 
)    ( (iam) / grid->npcol )
#define NBUFFERS   5
#define NO_MARKER   3
#define NTAGS   INT_MAX
#define PCOL (   bnum,
  grid 
)    ( (bnum) % grid->npcol )
#define PNUM (   i,
  j,
  grid 
)    ( (i)*grid->npcol + j ) /* Process number at coord(i,j) */
#define PROW (   bnum,
  grid 
)    ( (bnum) % grid->nprow )
#define RHS_ITERATE (   i)    for (i = 0; i < nrhs; ++i)
#define SLU_MPI_TAG (   id,
  num 
)    ( (5*(num)+id) % tag_ub )
#define SUPER_BLOCK   12
#define SUPER_LINEAR   11
#define SuperLU_timer_   SuperLU_timer_dist_
#define SuperSize (   bnum)    ( xsup[bnum+1]-xsup[bnum] )
#define UB_DESCRIPTOR   2
#define UjROW   10
#define UkSUB   11
#define UkVAL   12
#define VT_TRACEOFF
#define VT_TRACEON
#define X_BLK (   i)    ilsum[i] * nrhs + (i+1) * XK_H
#define Xk   21
#define XK_H   2 /* The header preceeding each X block. */
#define Yk   22

Typedef Documentation

typedef int int_t

Function Documentation

void at_plus_a_dist ( const int_t  n,
const int_t  nz,
int_t colptr,
int_t rowind,
int_t bnz,
int_t **  b_colptr,
int_t **  b_rowind 
)

Purpose

Form the structure of A'+A. A is an n-by-n matrix in column oriented
format represented by (colptr, rowind). The output A'+A is in column
oriented format (symmetrically, also row oriented), represented by
(b_colptr, b_rowind).
void bcast_tree ( void *  buf,
int  count,
MPI_Datatype  dtype,
int  root,
int  tag,
gridinfo_t grid,
int  scope,
int *  recvcnt 
)

Purpose

  Broadcast an array of dtype numbers. The communication pattern
  is a tree with number of branches equal to NBRANCHES.
  The process ranks are between 0 and Np-1.
  The following two pairs of graphs give different ways of viewing the same
  algorithm.  The first pair shows the trees as they should be visualized
  when examining the algorithm.  The second pair are isomorphic graphs of
  of the first, which show the actual pattern of data movement.
  Note that a tree broadcast with NBRANCHES = 2 is isomorphic with a
  hypercube broadcast (however, it does not require the nodes be a
  power of two to work).
   TREE BROADCAST, NBRANCHES = 2     *    TREE BROADCAST, NBRANCHES = 3
    root=2
i=4   &______________                *
      |              \               *       root=2
i=2   &______         &______        * i=3     &______________________
      |      \        |      \       *         |          \           \
i=1   &__     &__     &__     &__    * i=1     &______     &______     &__
      |  \    |  \    |  \    |  \   *         |  \   \    |  \   \    |  \
      2   3   4   5   6   7   0   1  *         2   3   4   5   6   7   0   1
     ISOMORPHIC GRAPHS OF ABOVE, SHOWN IN MORE FAMILIAR TERMS:

           2                                           2
  _________|_________                       ___________|____________
 /         |         \                     /           |      |     \
6          4          3                   5            0      3      4

/ \ | / \ | 0 7 5 6 7 1 | 1

Arguments

scope
void check_repfnz_dist ( int_t  ,
int_t  ,
int_t  ,
int_t  
)

Check whether repfnz[] == EMPTY after reset.

int_t CheckZeroDiagonal ( int_t  ,
int_t ,
int_t ,
int_t  
)
void countnz_dist ( const int_t  n,
int_t xprune,
long long int *  nnzL,
long long int *  nnzU,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)
Count the total number of nonzeros in factors L and U,  and in the 
symmetrically reduced L. 
void Destroy_CompCol_Matrix_dist ( SuperMatrix )
void Destroy_CompCol_Permuted_dist ( SuperMatrix )

A is of type Stype==NCP.

void Destroy_CompRow_Matrix_dist ( SuperMatrix )
void Destroy_CompRowLoc_Matrix_dist ( SuperMatrix )
void Destroy_SuperMatrix_Store_dist ( SuperMatrix )

Deallocate the structure pointing to the actual storage of the matrix.

void Destroy_SuperNode_Matrix_dist ( SuperMatrix )
double dlamch_ ( const char *  )
int file_PrintInt10 ( FILE *  ,
char *  ,
int  ,
int *   
)
int file_PrintLong10 ( FILE *  ,
char *  ,
int_t  ,
int_t  
)
long long int fixupL_dist ( const int_t  n,
const int_t perm_r,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)
Fix up the data storage lsub for L-subscripts. It removes the subscript
sets for structural pruning,    and applies permuation to the remaining
subscripts.
int genmmd_dist_ ( int_t ,
int_t ,
int_t a,
int_t ,
int_t ,
int_t ,
int_t ,
int_t ,
int_t ,
int_t ,
int_t ,
int_t  
)
void get_diag_procs ( int_t  ,
Glu_persist_t ,
gridinfo_t ,
int_t ,
int_t **  ,
int_t **   
)
void get_perm_c_dist ( int_t  pnum,
int_t  ispec,
SuperMatrix A,
int_t perm_c 
)

Purpose

GET_PERM_C_DIST obtains a permutation matrix Pc, by applying the multiple
minimum degree ordering code by Joseph Liu to matrix A'*A or A+A',
or using approximate minimum degree column ordering by Davis et. al.
The LU factorization of A*Pc tends to have less fill than the LU 
factorization of A.

Arguments

ispec   (input) colperm_t
        Specifies what type of column permutation to use to reduce fill.
        = NATURAL: natural ordering (i.e., Pc = I)
        = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A
        = MMD_ATA: minimum degree ordering on structure of A'*A
        = METIS_AT_PLUS_A: MeTis on A'+A
A       (input) SuperMatrix*
        Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
        of the linear equations is A->nrow. Currently, the type of A 
        can be: Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
        In the future, more general A can be handled.
perm_c  (output) int*
    Column permutation vector of size A->ncol, which defines the 
        permutation matrix Pc; perm_c[i] = j means column i of A is 
        in position j in A*Pc.
float get_perm_c_parmetis ( SuperMatrix A,
int_t perm_r,
int_t perm_c,
int  nprocs_i,
int  noDomains,
int_t **  sizes,
int_t **  fstVtxSep,
gridinfo_t grid,
MPI_Comm *  metis_comm 
)

Purpose

GET_PERM_C_PARMETIS obtains a permutation matrix Pc, by applying a
graph partitioning algorithm to the symmetrized graph A+A'.  The
multilevel graph partitioning algorithm used is the
ParMETIS_V3_NodeND routine available in the parallel graph
partitioning package parMETIS.
The number of independent sub-domains noDomains computed by this
algorithm has to be a power of 2.  Hence noDomains is the larger
number power of 2 that is smaller than nprocs_i, where nprocs_i = nprow
* npcol is the number of processors used in SuperLU_DIST.

Arguments

A       (input) SuperMatrix*
        Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
        of the linear equations is A->nrow.  Matrix A is distributed
        in NRformat_loc format.
perm_r  (input) int_t*
        Row permutation vector of size A->nrow, which defines the 
        permutation matrix Pr; perm_r[i] = j means row i of A is in 
        position j in Pr*A.
perm_c  (output) int_t*
    Column permutation vector of size A->ncol, which defines the 
        permutation matrix Pc; perm_c[i] = j means column i of A is 
        in position j in A*Pc.
nprocs_i (input) int*
        Number of processors the input matrix is distributed on in a block
        row format.  It corresponds to number of processors used in
        SuperLU_DIST.
noDomains (input) int*, must be power of 2
        Number of independent domains to be computed by the graph
        partitioning algorithm.  ( noDomains <= nprocs_i )
sizes   (output) int_t**, of size 2 * noDomains
        Returns pointer to an array containing the number of nodes
        for each sub-domain and each separator.  Separators are stored 
        from left to right.
        Memory for the array is allocated in this routine.
fstVtxSep (output) int_t**, of size 2 * noDomains
        Returns pointer to an array containing first node for each
        sub-domain and each separator.
        Memory for the array is allocated in this routine.

Return value

  < 0, number of bytes allocated on return from the symbolic factorization.

0, number of bytes allocated when out of memory.

void ifill_dist ( int_t ,
int_t  ,
int_t   
)

Fills an integer array with a given value.

int_t* intCalloc_dist ( int_t  )
int_t* intMalloc_dist ( int_t  )
void log_memory ( long  long,
SuperLUStat_t  
)
int lsame_ ( char *  ca,
char *  cb 
)

Purpose

    LSAME returns .TRUE. if CA is the same letter as CB regardless of case.

Arguments

    CA      (input) CHARACTER*1   
    CB      (input) CHARACTER*1   
            CA and CB specify the single characters to be compared.   


int_t mc64id_dist ( int_t )
void print_memorylog ( SuperLUStat_t ,
char *   
)
void print_options_dist ( superlu_options_t )

Print the options setting.

void print_panel_seg_dist ( int_t  ,
int_t  ,
int_t  ,
int_t  ,
int_t ,
int_t  
)

Diagnostic print of segment info after panel_dfs().

void print_sp_ienv_dist ( superlu_options_t )

Print the blocking parameters.

void PrintDouble5 ( char *  ,
int_t  ,
double *   
)
void PrintInt10 ( char *  ,
int_t  ,
int_t  
)
void PStatFree ( SuperLUStat_t )
void PStatInit ( SuperLUStat_t )
void PStatPrint ( superlu_options_t ,
SuperLUStat_t ,
gridinfo_t  
)
int_t psymbfact_LUXpand ( int_t  iam,
int_t  n,
int_t  fstVtxLvl_loc,
int_t  vtxXp,
int_t p_next,
int_t  min_new_len,
int_t  mem_type,
int_t  rout_type,
int_t  free_prev_mem,
Pslu_freeable_t Pslu_freeable,
Llu_symbfact_t Llu_symbfact,
vtcsInfo_symbfact_t VInfo,
psymbfact_stat_t PS 
)
Expand the data structures for L and U during the factorization.
Return value: SUCCES_RET - successful return
              ERROR_RET - error due to a memory alocation failure
int_t psymbfact_LUXpand_RL ( int_t  iam,
int_t  n,
int_t  vtxXp,
int_t  next,
int_t  len_texp,
int_t  mem_type,
Pslu_freeable_t Pslu_freeable,
Llu_symbfact_t Llu_symbfact,
vtcsInfo_symbfact_t VInfo,
psymbfact_stat_t PS 
)
Expand the data structures for L and U during the factorization.
Return value:   0 - successful return
              > 0 - number of bytes allocated when run out of space
int_t psymbfact_LUXpandMem ( int_t  iam,
int_t  n,
int_t  vtxXp,
int_t  next,
int_t  min_new_len,
int_t  mem_type,
int_t  rout_type,
int_t  free_prev_mem,
Pslu_freeable_t Pslu_freeable,
Llu_symbfact_t Llu_symbfact,
vtcsInfo_symbfact_t VInfo,
psymbfact_stat_t PS 
)
Expand the data structures for L and U during the factorization.
Return value:   0 - successful return
              > 0 - number of bytes allocated when run out of space
int_t psymbfact_prLUXpand ( int_t  iam,
int_t  min_new_len,
int  mem_type,
Llu_symbfact_t Llu_symbfact,
psymbfact_stat_t PS 
)
Expand the data structures for L and U pruned during the factorization.
Return value: SUCCES_RET - successful return
              ERROR_RET - error when run out of space
void pxerbla ( char *  ,
gridinfo_t ,
int_t   
)
int_t QuerySpace_dist ( int_t  n,
int_t  lsub_size,
Glu_freeable_t Glu_freeable,
mem_usage_t mem_usage 
)
mem_usage consists of the following fields:

  • for_lu (float) The amount of space used in bytes for the L data structures.
  • total (float) The amount of space needed in bytes to perform factorization.
  • expansions (int) Number of memory expansions during the LU factorization.
void ScalePermstructFree ( ScalePermstruct_t )

Deallocate ScalePermstruct.

void ScalePermstructInit ( const int_t  ,
const int_t  ,
ScalePermstruct_t  
)

Allocate storage in ScalePermstruct.

void set_default_options_dist ( superlu_options_t )

Set the default values for the options argument.


float slamch_ ( const char *  )
int sp_coletree_dist ( int_t acolst,
int_t acolend,
int_t arow,
int_t  nr,
int_t  nc,
int_t parent 
)

Nonsymmetric elimination tree.

     Find the elimination tree for A'*A.
     This uses something similar to Liu's algorithm. 
     It runs in time O(nz(A)*log n) and does not form A'*A.
     Input:
       Sparse matrix A.  Numeric values are ignored, so any
       explicit zeros are treated as nonzero.
     Output:
       Integer array of parents representing the elimination
       tree of the symbolic product A'*A.  Each vertex is a
       column of A, and nc means a root of the elimination forest.
     John R. Gilbert, Xerox, 10 Dec 1990
     Based on code by JRG dated 1987, 1988, and 1990.
void sp_colorder ( superlu_options_t options,
SuperMatrix A,
int_t perm_c,
int_t etree,
SuperMatrix AC 
)

Purpose

sp_colorder() permutes the columns of the original matrix. It performs
the following steps:
   1. Apply column permutation perm_c[] to A's column pointers to form AC;
   2. If options->Fact = DOFACT, then
      (1) Compute column elimination tree etree[] of AC'AC;
      (2) Post order etree[] to get a postordered elimination tree etree[],
          and a postorder permutation post[];
      (3) Apply post[] permutation to columns of AC;
      (4) Overwrite perm_c[] with the product perm_c * post.

Arguments

options (input) superlu_options_t*
        Specifies whether or not the elimination tree will be re-used.
        If options->Fact == DOFACT, this means first time factor A, 
        etree is computed and output.
        Otherwise, re-factor A, etree is input, unchanged on exit.
A       (input) SuperMatrix*
        Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
        of the linear equations is A->nrow. Currently, the type of A can be:
        Stype = SLU_NC or SLU_NCP; Dtype = SLU__D; Mtype = SLU_GE.
        In the future, more general A can be handled.
perm_c  (input/output) int*
    Column permutation vector of size A->ncol, which defines the 
        permutation matrix Pc; perm_c[i] = j means column i of A is 
        in position j in A*Pc.
        If options->Fact == DOFACT, perm_c is both input and output.
        On output, it is changed according to a postorder of etree.
        Otherwise, perm_c is input.
etree   (input/output) int*
        Elimination tree of Pc*(A'+A)*Pc', dimension A->ncol.
        If options->Fact == DOFACT, etree is an output argument,
        otherwise it is an input argument.
        Note: etree is a vector of parent pointers for a forest whose
        vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
AC      (output) SuperMatrix*
        The resulting matrix after applied the column permutation
        perm_c[] to matrix A. The type of AC can be:
        Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
int_t sp_ienv_dist ( int_t  ispec)

Purpose

sp_ienv_dist() is inquired to choose machine-dependent parameters for the
local environment. See ISPEC for a description of the parameters.   

This version provides a set of parameters which should give good,   
but not optimal, performance on many of the currently available   
computers.  Users are encouraged to modify this subroutine to set   
the tuning parameters for their particular machine using the option   
and problem size information in the arguments.   

Arguments   
=========   

ISPEC   (input) int
        Specifies the parameter to be returned as the value of SP_IENV_DIST.   
        = 1: the panel size w; a panel consists of w consecutive
         columns of matrix A in the process of Gaussian elimination.
     The best value depends on machine's cache characters.
        = 2: the relaxation parameter relax; if the number of
         nodes (columns) in a subtree of the elimination tree is less
     than relax, this subtree is considered as one supernode,
     regardless of the their row structures.
        = 3: the maximum size for a supernode, which must be greater
             than or equal to relaxation parameter (see case 2);
    = 4: the minimum row dimension for 2-D blocking to be used;
    = 5: the minimum column dimension for 2-D blocking to be used;
    = 6: the estimated fills factor for the adjacency structures 
         of L and U, compared with A;

(SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC < 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value.


Purpose

    sp_ienv_dist() is inquired to choose machine-dependent parameters for the
    local environment. See ISPEC for a description of the parameters.
    This version provides a set of parameters which should give good,   
    but not optimal, performance on many of the currently available   
    computers.  Users are encouraged to modify this subroutine to set   
    the tuning parameters for their particular machine using the option   
    and problem size information in the arguments.

Arguments

    ISPEC   (input) int
            Specifies the parameter to be returned as the value of SP_IENV_DIST.
            = 1: the panel size w; a panel consists of w consecutive
             columns of matrix A in the process of Gaussian elimination.
         The best value depends on machine's cache characters.
            = 2: the relaxation parameter relax; if the number of
             nodes (columns) in a subtree of the elimination tree is less
         than relax, this subtree is considered as one supernode,
         regardless of the their row structures.
            = 3: the maximum size for a supernode, which must be greater
                 than or equal to relaxation parameter (see case 2);
        = 4: the minimum row dimension for 2-D blocking to be used;
        = 5: the minimum column dimension for 2-D blocking to be used;
        = 6: the estimated fills factor for the adjacency structures 
             of L and U, compared with A;
   (SP_IENV_DIST) (output) int
            >= 0: the value of the parameter specified by ISPEC   
            < 0:  if SP_IENV_DIST = -k, the k-th argument had an illegal value.


int sp_symetree_dist ( int_t acolst,
int_t acolend,
int_t arow,
int_t  n,
int_t parent 
)

Symmetric elimination tree.

     p = spsymetree (A);
     Find the elimination tree for symmetric matrix A.
     This uses Liu's algorithm, and runs in time O(nz*log n).
     Input:
       Square sparse matrix A.  No check is made for symmetry;
       elements below and on the diagonal are ignored.
       Numeric values are ignored, so any explicit zeros are 
       treated as nonzero.
     Output:
       Integer array of parents representing the etree, with n
       meaning a root of the elimination forest.
     Note:  
       This routine uses only the upper triangle, while sparse
       Cholesky (as in spchol.c) uses only the lower.  Matlab's
       dense Cholesky uses only the upper.  This routine could
       be modified to use the lower triangle either by transposing
       the matrix or by traversing it by rows with auxiliary
       pointer and link arrays.
     John R. Gilbert, Xerox, 10 Dec 1990
     Based on code by JRG dated 1987, 1988, and 1990.
     Modified by X.S. Li, November 1999.
void super_stats_dist ( int_t  ,
int_t  
)
void superlu_abort_and_exit_dist ( char *  )
void superlu_free_dist ( void *  )
void superlu_gridexit ( gridinfo_t )
void superlu_gridinit ( MPI_Comm  ,
int_t  ,
int_t  ,
gridinfo_t  
)

All processes in the MPI communicator must call this routine.

void superlu_gridmap ( MPI_Comm  ,
int_t  ,
int_t  ,
int_t  [],
int_t  ,
gridinfo_t  
)

All processes in the MPI communicator must call this routine.

void* superlu_malloc_dist ( size_t  )
double SuperLU_timer_ ( )
int_t symbfact ( superlu_options_t options,
int  pnum,
SuperMatrix A,
int_t perm_c,
int_t etree,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)

Purpose

  symbfact() performs a symbolic factorization on matrix A and sets up 
  the nonzero data structures which are suitable for supernodal Gaussian
  elimination with no pivoting (GENP). This routine features:
       o depth-first search (DFS)
       o supernodes
       o symmetric structure pruning

Return value

  < 0, number of bytes needed for LSUB.
  = 0, matrix dimension is 1.

0, number of bytes allocated when out of memory.

float symbfact_dist ( int  ,
int  ,
SuperMatrix ,
int_t ,
int_t ,
int_t ,
int_t ,
Pslu_freeable_t ,
MPI_Comm *  ,
MPI_Comm *  ,
mem_usage_t  
)
 
Purpose
=======
  symbfact_dist() performs symbolic factorization of matrix A suitable
  for performing the supernodal Gaussian elimination with no pivoting (GEPP). 
  This routine computes the structure of one column of L and one row of U 
  at a time.  It uses:
       o distributed input matrix
       o supernodes
       o symmetric structure pruning
Arguments
=========
nprocs_num (input) int
        Number of processors SuperLU_DIST is executed on, and the input 
        matrix is distributed on.
nprocs_symb (input) int
        Number of processors on which the symbolic factorization is
        performed.  It is equal to the number of independent domains
        idenfied in the graph partitioning algorithm executed
        previously and has to be a power of 2.  It corresponds to
        number of leaves in the separator tree.
A       (input) SuperMatrix*
        Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The
        number of the linear equations is A->nrow.  Matrix A is
        distributed in NRformat_loc format.
        Matrix A is not yet permuted by perm_c.
perm_c  (input) int_t*
    Column permutation vector of size A->ncol, which defines the 
        permutation matrix Pc; perm_c[i] = j means column i of A is 
        in position j in A*Pc.
perm_r  (input) int_t*
    Row permutation vector of size A->nrow, which defines the 
        permutation matrix Pr; perm_r[i] = j means column i of A is 
        in position j in Pr*A.
sizes   (input) int_t*
        Contains the number of vertices in each separator.
fstVtxSep (input) int_t*
        Contains first vertex for each separator.
Pslu_freeable (output) Pslu_freeable_t*
        Returns the local L and U structure, and global to local
        information on the indexing of the vertices.  Contains all
        the information necessary for performing the data
        distribution towards the numeric factorization.
num_comm (input) MPI_Comm*
        Communicator for numerical factorization
symb_comm (input) MPI_Comm*
        Communicator for symbolic factorization
symb_mem_usage (input) mem_usage_t *
        Statistics on memory usage.
Return value
============
  < 0, number of bytes allocated on return from the symbolic factorization.
  > 0, number of bytes allocated when out of memory.
Sketch of the algorithm
=======================
 Distrbute the vertices on the processors using a subtree to
 subcube algorithm.
 Redistribute the structure of the input matrix A according to the
 subtree to subcube computed previously for the symbolic
 factorization routine.  This implies in particular a distribution
 from nprocs_num processors to nprocs_symb processors.
 Perform symbolic factorization guided by the separator tree provided by
 a graph partitioning algorithm.  The symbolic factorization uses a 
 combined left-looking, right-looking approach. 
int_t symbfact_SubFree ( Glu_freeable_t Glu_freeable)
Deallocate storage of the data structures common to symbolic
factorization routines.
int_t symbfact_SubInit ( fact_t  fact,
void *  work,
int_t  lwork,
int_t  m,
int_t  n,
int_t  annz,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)
Allocate storage for the data structures common to symbolic factorization
routines. For those unpredictable size, make a guess as FILL * nnz(A).
Return value:
    If lwork = -1, return the estimated amount of space required, plus n;
    otherwise, return the amount of space actually allocated when
    memory allocation failure occurred.
int_t symbfact_SubXpand ( int_t  n,
int_t  jcol,
int_t  next,
MemType  mem_type,
int_t maxlen,
Glu_freeable_t Glu_freeable 
)
Expand the data structures for L and U during the factorization.
Return value:   0 - successful return
              > 0 - number of bytes allocated when run out of space
int_t* TreePostorder_dist ( int_t  ,
int_t  
)
int xerbla_ ( char *  srname,
int *  info 
)

Purpose

    XERBLA  is an error handler for the LAPACK routines.   
    It is called by an LAPACK routine if an input parameter has an   
    invalid value.  A message is printed and execution stops.
    Installers may consider modifying the STOP statement in order to   
    call system-specific exception-handling facilities.

Arguments

    SRNAME  (input) CHARACTER*6   
            The name of the routine which called XERBLA.
    INFO    (input) INT   
            The position of the invalid parameter in the parameter list
            of the calling routine.