SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
psymbfact.h
Go to the documentation of this file.
1
21#ifndef __SUPERLU_DIST_PSYMBFACT /* allow multiple inclusions */
22#define __SUPERLU_DIST_PSYMBFACT
23
24/*
25 * File name: psymbfact.h
26 * Purpose: Definitions for parallel symbolic factorization routine
27 */
28
57typedef struct {
58 int_t *xlsub; /* pointer to the beginning of each column of L */
59 int_t *lsub; /* compressed L subscripts, stored by columns */
60 int_t szLsub; /* current max size of lsub */
61
62 int_t *xusub; /* pointer to the beginning of each row of U */
63 int_t *usub; /* compressed U subscripts, stored by rows */
64 int_t szUsub; /* current max size of usub */
65
66 int_t *supno_loc;
67 int_t *xsup_beg_loc;
68 int_t *xsup_end_loc;
69 int_t nvtcs_loc; /* number of local vertices */
70 int_t *globToLoc; /* global to local indexing */
71 int_t maxNvtcsPProc; /* max number of vertices on the processors */
72 int64_t nnzLU; /* number of nonzeros in L+U*/
74
75
106typedef struct {
107 int_t *xlsubPr; /* pointer to pruned structure of L */
108 int_t *lsubPr; /* pruned structure of L */
109 int_t szLsubPr; /* size of lsubPr array */
110 int_t indLsubPr; /* current index in lsubPr */
111 int_t *xusubPr; /* pointer to pruned structure of U */
112 int_t *usubPr; /* pruned structure of U */
113 int_t szUsubPr; /* size of usubPr array */
114 int_t indUsubPr; /* current index in usubPr */
115
116 int_t *xlsub_rcvd;
117 int_t *xlsub; /* pointer to structure of L, stored by columns */
118 int_t *lsub; /* structure of L, stored by columns */
119 int_t szLsub; /* current max size of lsub */
120 int_t nextl; /* pointer to current computation in lsub */
121
122 int_t *xusub_rcvd; /* */
123 int_t *xusub; /* pointer to structure of U, stored by rows */
124 int_t *usub; /* structure of U, stored by rows */
125 int_t szUsub; /* current max size of usub */
126 int_t nextu; /* pointer to current computation in usub */
127
128 int_t *cntelt_vtcs; /* size of column/row for each vertex */
129 int_t *cntelt_vtcsA_lvl; /* size of column/row of A for each vertex at the
130 current level */
131
132 LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
133 int_t no_expand; /* Number of memory expansions */
134 int_t no_expand_pr; /* Number of memory expansions of the pruned structures */
135 int_t no_expcp; /* Number of memory expansions due to the right looking
136 overestimation approach */
138
140typedef struct {
141 int_t maxSzBlk; /* Max no of vertices in a block */
142 int_t maxNvtcsNds_loc; /* Max number of vertices of a node distributed on one
143 processor. The maximum is computed among all the nodes
144 of the sep arator tree and among all the processors */
145 int_t maxNeltsVtx; /* Max number of elements of a vertex,
146 that is condisering that the matrix is
147 dense */
148 int_t nblks_loc; /* Number of local blocks */
149 int_t *begEndBlks_loc; /* Begin and end vertex of each local block.
150 Array of size 2 * nblks_loc */
151 int_t curblk_loc; /* Index of current block in the level under computation */
152 int_t nvtcs_loc; /* Number of local vertices distributed on a processor */
153 int_t nvtcsLvl_loc; /* Number of local vertices for current
154 level under computation */
155 int filledSep; /* determines if curent or all separators are filled */
156 int_t nnz_asup_loc; /* Number of nonzeros in asup not yet consumed. Used during
157 symbolic factorization routine to determine how much
158 of xusub, usub is still used to store the input matrix AS */
159 int_t nnz_ainf_loc; /* Number of nonzeros in ainf. Similar to nnz_asup_loc. */
160 int_t xusub_nextLvl; /* Pointer to usub of the next level */
161 int_t xlsub_nextLvl; /* Pointer to lsub of the next level */
162 int_t fstVtx_nextLvl; /* First vertex of the next level */
164
166typedef struct {
167 int_t *x_ainf; /* pointers to columns of Ainf */
168 int_t *ind_ainf; /* column indices of Ainf */
169 int_t *x_asup; /* pointers to rows of Asup */
170 int_t *ind_asup; /* row indices of Asup */
172
173typedef struct {
174 int_t *rcv_interLvl; /* from which processors iam receives data */
175 int_t *snd_interLvl; /* to which processors iam sends data */
176 int_t *snd_interSz; /* size of data to be send */
177 int_t *snd_LinterSz; /* size of data in L part to be send */
178 int_t *snd_vtxinter; /* first vertex from where to send data */
179
180 /* inter level data structures */
181 int_t *snd_intraLvl; /* to which processors iam sends data */
182 int_t snd_intraSz; /* size of data to send */
183 int_t snd_LintraSz; /* size of data to send */
184 int_t *rcv_intraLvl; /* from which processors iam receives data */
185 int_t *rcv_buf; /* buffer to receive data */
186 int_t rcv_bufSz; /* size of the buffer to receive data */
187 int_t *snd_buf; /* buffer to send data */
188 int_t snd_bufSz; /* size of the buffer to send data */
189 int_t *ptr_rcvBuf; /* pointer to rcv_buf, the buffer to receive data */
191
192/* relaxation parameters used in the algorithms - for future release */
194typedef struct {
195 int_t fill_par; /* Estimation of fill. It corresponds to sp_ienv_dist(6) */
196 float relax_seps; /* relaxation parameter -not used in this version */
197 float relax_curSep; /* relaxation parameter -not used in this version */
198 float relax_gen; /* relaxation parameter -not used in this version */
199
200 /* number of operations performed during parallel symbolic factorization */
201 float nops;
202
203 /* no of dense current separators per proc */
204 int_t nDnsCurSep;
205 /* no of dense separators up per proc */
206 int_t nDnsUpSeps;
207
208 float no_shmSnd; /* Number of auxiliary messages for send data */
209 float no_msgsSnd; /* Number of messages sending data */
210 int_t maxsz_msgSnd; /* Max size of messages sending data */
211 float sz_msgsSnd; /* Average size of messages sending data */
212 float no_shmRcvd; /* Number of auxiliary messages for rcvd data */
213 float no_msgsRcvd; /* Number of messages receiving data */
214 int_t maxsz_msgRcvd;/* Max size of messages receiving data */
215 float sz_msgsRcvd; /* Average size of messages receiving data */
216 float no_msgsCol; /* Number of messages sent for estimating size
217 of rows/columns, setup information
218 interLvl_symbfact, */
219 int_t maxsz_msgCol; /* Average size of messages counted in
220 no_msgsCol */
221 float sz_msgsCol; /* Max size of messages counted in no_msgsCol */
222
223 /* statistics on fill-in */
224 float fill_pelt[6];
225 /*
226 0 - average fill per elt added during right-looking factorization
227 1 - max fill per elt added during right-looking factorization
228 2 - number vertices modified during right-looking factorization
229 3 - average fill per elt
230 4 - max fill per elt
231 5 - number vertices computed in upper levels of separator tree
232 */
233
234 /* Memory usage */
235 int_t estimLSz; /* size of lsub due to right looking overestimation */
236 int_t estimUSz; /* size of usub due to right looking overestimation */
237 int_t maxSzLPr; /* maximum size of pruned L */
238 int_t maxSzUPr; /* maximum size of pruned U */
239 int_t maxSzBuf; /* maximum size of the send and receive buffers */
240 int_t szDnsSep; /* size of memory used when there are dense separators */
241 float allocMem; /* size of the total memory allocated (in bytes)
242 This is the working storage, does not include LU data arrays
243 */
245
246/* MACROS */
247
248/*
249 Macros for comptuting the owner of a vertex and the local index
250 corresponding to a vertex
251*/
252#define OWNER(x) ((x) / maxNvtcsPProc)
253#define LOCAL_IND(x) ((x) % maxNvtcsPProc)
254
255/* Macros for computing the available memory in lsub, usub */
256#define MEM_LSUB(Llu, VInfo) (Llu->szLsub - VInfo->nnz_ainf_loc)
257#define MEM_USUB(Llu, VInfo) (Llu->szUsub - VInfo->nnz_asup_loc)
258
259#define tag_interLvl 2
260#define tag_interLvl_LData 0
261#define tag_interLvl_UData 1
262#define tag_intraLvl_szMsg 1000
263#define tag_intraLvl_LData 1001
264#define tag_intraLvl_UData 1002
265/* tag_intraLvl has to be the last tag number */
266#define tag_intraLvl 1003
267
268/*
269 * Index of diagonal element, no of elements preceding each column/row
270 * of L/U send to another processor
271 */
272#define DIAG_IND 0
273#define NELTS_IND 1
274#define RCVD_IND 2
275
276#define SUCCES_RET 0 /* successful return from a routine */
277#define ERROR_RET 1 /* error return code from a routine */
278#define FILLED_SEP 2 /* the current separator is dense */
279#define FILLED_SEPS 3 /* all the separators situated on the path from the current
280 separator to the root separator are dense */
281
282/* Code for the type of the memory to expand */
283#define USUB_PR 0
284#define LSUB_PR 1
285/* Sherry: the following are already defined in superlu_enum_const.h
286#define USUB 0
287#define LSUB 1
288*/
289
290/*
291 * Code for the type of computation - right looking (RL_SYMB); left
292 * looking (LL_SYMB); symbolic factorization of an independent domain
293 * (DOMAIN_SYMB); current separator is dense (DNS_CURSEP); all the
294 * separators from the current one to the root of the tree are dense
295 * (DNS_UPSEPS).
296 */
297#define RL_SYMB 0
298#define DOMAIN_SYMB 1
299#define LL_SYMB 2
300#define DNS_UPSEPS 3
301#define DNS_CURSEP 4
302
303
304#endif /* __SUPERLU_DIST_PSYMBFACT */
int int_t
Definition: superlu_defs.h:114
LU_space_t
Definition: superlu_enum_consts.h:40
int_t no_expand
Definition: memory.c:31
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: psymbfact.h:106
Definition: psymbfact.h:57
Definition: psymbfact.h:173
Structure used for redistributing A for the symbolic factorization algorithm.
Definition: psymbfact.h:166
statistics collected during parallel symbolic factorization
Definition: psymbfact.h:194
Local information on vertices distribution.
Definition: psymbfact.h:140