SuperLU_DIST  4.0
superlu_dist on CPU and GPU clusters
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
psymbfact.h
Go to the documentation of this file.
1 
11 #ifndef __SUPERLU_DIST_PSYMBFACT /* allow multiple inclusions */
12 #define __SUPERLU_DIST_PSYMBFACT
13 
14 /*
15  * File name: psymbfact.h
16  * Purpose: Definitions for parallel symbolic factorization routine
17  */
18 
47 typedef struct {
48  int_t *xlsub; /* pointer to the beginning of each column of L */
49  int_t *lsub; /* compressed L subscripts, stored by columns */
50  int_t szLsub; /* current max size of lsub */
51 
52  int_t *xusub; /* pointer to the beginning of each row of U */
53  int_t *usub; /* compressed U subscripts, stored by rows */
54  int_t szUsub; /* current max size of usub */
55 
59  int_t nvtcs_loc; /* number of local vertices */
60  int_t *globToLoc; /* global to local indexing */
61  int_t maxNvtcsPProc; /* max number of vertices on the processors */
63 
64 
95 typedef struct {
96  int_t *xlsubPr; /* pointer to pruned structure of L */
97  int_t *lsubPr; /* pruned structure of L */
98  int_t szLsubPr; /* size of lsubPr array */
99  int_t indLsubPr; /* current index in lsubPr */
100  int_t *xusubPr; /* pointer to pruned structure of U */
101  int_t *usubPr; /* pruned structure of U */
102  int_t szUsubPr; /* size of usubPr array */
103  int_t indUsubPr; /* current index in usubPr */
104 
106  int_t *xlsub; /* pointer to structure of L, stored by columns */
107  int_t *lsub; /* structure of L, stored by columns */
108  int_t szLsub; /* current max size of lsub */
109  int_t nextl; /* pointer to current computation in lsub */
110 
112  int_t *xusub; /* pointer to structure of U, stored by rows */
113  int_t *usub; /* structure of U, stored by rows */
114  int_t szUsub; /* current max size of usub */
115  int_t nextu; /* pointer to current computation in usub */
116 
117  int_t *cntelt_vtcs; /* size of column/row for each vertex */
118  int_t *cntelt_vtcsA_lvl; /* size of column/row of A for each vertex at the
119  current level */
120 
121  LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
122  int_t no_expand; /* Number of memory expansions */
123  int_t no_expand_pr; /* Number of memory expansions of the pruned structures */
124  int_t no_expcp; /* Number of memory expansions due to the right looking
125  overestimation approach */
127 
129 typedef struct {
130  int_t maxSzBlk; /* Max no of vertices in a block */
131  int_t maxNvtcsNds_loc; /* Max number of vertices of a node distributed on one
132  processor. The maximum is computed among all the nodes
133  of the sep arator tree and among all the processors */
134  int_t maxNeltsVtx; /* Max number of elements of a vertex,
135  that is condisering that the matrix is
136  dense */
137  int_t nblks_loc; /* Number of local blocks */
138  int_t *begEndBlks_loc; /* Begin and end vertex of each local block.
139  Array of size 2 * nblks_loc */
140  int_t curblk_loc; /* Index of current block in the level under computation */
141  int_t nvtcs_loc; /* Number of local vertices distributed on a processor */
142  int_t nvtcsLvl_loc; /* Number of local vertices for current
143  level under computation */
144  int filledSep; /* determines if curent or all separators are filled */
145  int_t nnz_asup_loc; /* Number of nonzeros in asup not yet consumed. Used during
146  symbolic factorization routine to determine how much
147  of xusub, usub is still used to store the input matrix AS */
148  int_t nnz_ainf_loc; /* Number of nonzeros in ainf. Similar to nnz_asup_loc. */
149  int_t xusub_nextLvl; /* Pointer to usub of the next level */
150  int_t xlsub_nextLvl; /* Pointer to lsub of the next level */
151  int_t fstVtx_nextLvl; /* First vertex of the next level */
153 
155 typedef struct {
156  int_t *x_ainf; /* pointers to columns of Ainf */
157  int_t *ind_ainf; /* column indices of Ainf */
158  int_t *x_asup; /* pointers to rows of Asup */
159  int_t *ind_asup; /* row indices of Asup */
161 
162 typedef struct {
163  int_t *rcv_interLvl; /* from which processors iam receives data */
164  int_t *snd_interLvl; /* to which processors iam sends data */
165  int_t *snd_interSz; /* size of data to be send */
166  int_t *snd_LinterSz; /* size of data in L part to be send */
167  int_t *snd_vtxinter; /* first vertex from where to send data */
168 
169  /* inter level data structures */
170  int_t *snd_intraLvl; /* to which processors iam sends data */
171  int_t snd_intraSz; /* size of data to send */
172  int_t snd_LintraSz; /* size of data to send */
173  int_t *rcv_intraLvl; /* from which processors iam receives data */
174  int_t *rcv_buf; /* buffer to receive data */
175  int_t rcv_bufSz; /* size of the buffer to receive data */
176  int_t *snd_buf; /* buffer to send data */
177  int_t snd_bufSz; /* size of the buffer to send data */
178  int_t *ptr_rcvBuf; /* pointer to rcv_buf, the buffer to receive data */
180 
181 /* relaxation parameters used in the algorithms - for future release */
183 typedef struct {
184  int_t fill_par; /* Estimation of fill. It corresponds to sp_ienv_dist(6) */
185  float relax_seps; /* relaxation parameter -not used in this version */
186  float relax_curSep; /* relaxation parameter -not used in this version */
187  float relax_gen; /* relaxation parameter -not used in this version */
188 
189  /* number of operations performed during parallel symbolic factorization */
190  float nops;
191 
192  /* no of dense current separators per proc */
194  /* no of dense separators up per proc */
196 
197  float no_shmSnd; /* Number of auxiliary messages for send data */
198  float no_msgsSnd; /* Number of messages sending data */
199  int_t maxsz_msgSnd; /* Max size of messages sending data */
200  float sz_msgsSnd; /* Average size of messages sending data */
201  float no_shmRcvd; /* Number of auxiliary messages for rcvd data */
202  float no_msgsRcvd; /* Number of messages receiving data */
203  int_t maxsz_msgRcvd;/* Max size of messages receiving data */
204  float sz_msgsRcvd; /* Average size of messages receiving data */
205  float no_msgsCol; /* Number of messages sent for estimating size
206  of rows/columns, setup information
207  interLvl_symbfact, */
208  int_t maxsz_msgCol; /* Average size of messages counted in
209  no_msgsCol */
210  float sz_msgsCol; /* Max size of messages counted in no_msgsCol */
211 
212  /* statistics on fill-in */
213  float fill_pelt[6];
214  /*
215  0 - average fill per elt added during right-looking factorization
216  1 - max fill per elt added during right-looking factorization
217  2 - number vertices modified during right-looking factorization
218  3 - average fill per elt
219  4 - max fill per elt
220  5 - number vertices computed in upper levels of separator tree
221  */
222 
223  /* Memory usage */
224  int_t estimLSz; /* size of lsub due to right looking overestimation */
225  int_t estimUSz; /* size of usub due to right looking overestimation */
226  int_t maxSzLPr; /* maximum size of pruned L */
227  int_t maxSzUPr; /* maximum size of pruned U */
228  int_t maxSzBuf; /* maximum size of the send and receive buffers */
229  int_t szDnsSep; /* size of memory used when there are dense separators */
230  float allocMem; /* size of the total memory allocated (in bytes) */
232 
233 /* MACROS */
234 
235 /*
236  Macros for comptuting the owner of a vertex and the local index
237  corresponding to a vertex
238 */
239 #define OWNER(x) ((x) / maxNvtcsPProc)
240 #define LOCAL_IND(x) ((x) % maxNvtcsPProc)
241 
242 /* Macros for computing the available memory in lsub, usub */
243 #define MEM_LSUB(Llu, VInfo) (Llu->szLsub - VInfo->nnz_ainf_loc)
244 #define MEM_USUB(Llu, VInfo) (Llu->szUsub - VInfo->nnz_asup_loc)
245 
246 #define tag_interLvl 2
247 #define tag_interLvl_LData 0
248 #define tag_interLvl_UData 1
249 #define tag_intraLvl_szMsg 1000
250 #define tag_intraLvl_LData 1001
251 #define tag_intraLvl_UData 1002
252 /* tag_intraLvl has to be the last tag number */
253 #define tag_intraLvl 1003
254 
255 /*
256  * Index of diagonal element, no of elements preceding each column/row
257  * of L/U send to another processor
258  */
259 #define DIAG_IND 0
260 #define NELTS_IND 1
261 #define RCVD_IND 2
262 
263 #define SUCCES_RET 0 /* successful return from a routine */
264 #define ERROR_RET 1 /* error return code from a routine */
265 #define FILLED_SEP 2 /* the current separator is dense */
266 #define FILLED_SEPS 3 /* all the separators situated on the path from the current
267  separator to the root separator are dense */
268 
269 /* Code for the type of the memory to expand */
270 #define USUB_PR 0
271 #define LSUB_PR 1
272 #define USUB 0
273 #define LSUB 1
274 
275 /*
276  * Code for the type of computation - right looking (RL_SYMB); left
277  * looking (LL_SYMB); symbolic factorization of an independent domain
278  * (DOMAIN_SYMB); current separator is dense (DNS_CURSEP); all the
279  * separators from the current one to the root of the tree are dense
280  * (DNS_UPSEPS).
281  */
282 #define RL_SYMB 0
283 #define DOMAIN_SYMB 1
284 #define LL_SYMB 2
285 #define DNS_UPSEPS 3
286 #define DNS_CURSEP 4
287 
288 
289 #endif /* __SUPERLU_DIST_PSYMBFACT */
290 
291 
292 
int_t * usub
Definition: psymbfact.h:53
Definition: psymbfact.h:95
int_t snd_LintraSz
Definition: psymbfact.h:172
Definition: psymbfact.h:47
int_t maxsz_msgCol
Definition: psymbfact.h:208
int_t maxsz_msgRcvd
Definition: psymbfact.h:203
int_t snd_bufSz
Definition: psymbfact.h:177
int_t fstVtx_nextLvl
Definition: psymbfact.h:151
int_t * xsup_end_loc
Definition: psymbfact.h:58
int_t estimLSz
Definition: psymbfact.h:224
float no_msgsRcvd
Definition: psymbfact.h:202
Local information on vertices distribution.
Definition: psymbfact.h:129
int_t szLsub
Definition: psymbfact.h:50
int_t szLsub
Definition: psymbfact.h:108
int_t xlsub_nextLvl
Definition: psymbfact.h:150
int_t * snd_vtxinter
Definition: psymbfact.h:167
int_t maxNeltsVtx
Definition: psymbfact.h:134
statistics collected during parallel symbolic factorization
Definition: psymbfact.h:183
int_t * xusub_rcvd
Definition: psymbfact.h:111
int_t nvtcs_loc
Definition: psymbfact.h:141
int_t maxSzLPr
Definition: psymbfact.h:226
int_t no_expand_pr
Definition: psymbfact.h:123
int_t * cntelt_vtcsA_lvl
Definition: psymbfact.h:118
int_t * rcv_interLvl
Definition: psymbfact.h:163
int_t nblks_loc
Definition: psymbfact.h:137
int_t no_expcp
Definition: psymbfact.h:124
int_t maxNvtcsNds_loc
Definition: psymbfact.h:131
LU_space_t MemModel
Definition: psymbfact.h:121
Definition: psymbfact.h:162
int_t szUsub
Definition: psymbfact.h:114
Structure used for redistributing A for the symbolic factorization algorithm.
Definition: psymbfact.h:155
int_t nDnsCurSep
Definition: psymbfact.h:193
int_t indUsubPr
Definition: psymbfact.h:103
int_t rcv_bufSz
Definition: psymbfact.h:175
float relax_seps
Definition: psymbfact.h:185
float nops
Definition: psymbfact.h:190
int filledSep
Definition: psymbfact.h:144
int_t * cntelt_vtcs
Definition: psymbfact.h:117
float relax_curSep
Definition: psymbfact.h:186
int_t maxsz_msgSnd
Definition: psymbfact.h:199
int_t nnz_ainf_loc
Definition: psymbfact.h:148
int_t * xlsub
Definition: psymbfact.h:48
int_t estimUSz
Definition: psymbfact.h:225
int_t maxNvtcsPProc
Definition: psymbfact.h:61
int_t * supno_loc
Definition: psymbfact.h:56
int_t szDnsSep
Definition: psymbfact.h:229
int_t * xlsub_rcvd
Definition: psymbfact.h:105
int_t * xusub
Definition: psymbfact.h:112
int_t * x_asup
Definition: psymbfact.h:158
int_t * rcv_intraLvl
Definition: psymbfact.h:173
int_t * snd_buf
Definition: psymbfact.h:176
float no_msgsCol
Definition: psymbfact.h:205
int_t * ind_ainf
Definition: psymbfact.h:157
int_t szLsubPr
Definition: psymbfact.h:98
int_t * snd_intraLvl
Definition: psymbfact.h:170
int_t * xusubPr
Definition: psymbfact.h:100
int_t * ptr_rcvBuf
Definition: psymbfact.h:178
int_t * lsub
Definition: psymbfact.h:49
float sz_msgsSnd
Definition: psymbfact.h:200
int_t szUsub
Definition: psymbfact.h:54
float relax_gen
Definition: psymbfact.h:187
int_t * snd_interLvl
Definition: psymbfact.h:164
float no_shmSnd
Definition: psymbfact.h:197
float allocMem
Definition: psymbfact.h:230
int_t * usub
Definition: psymbfact.h:113
int_t szUsubPr
Definition: psymbfact.h:102
int_t * rcv_buf
Definition: psymbfact.h:174
float sz_msgsRcvd
Definition: psymbfact.h:204
int int_t
Definition: superlu_defs.h:37
int_t nnz_asup_loc
Definition: psymbfact.h:145
int_t xusub_nextLvl
Definition: psymbfact.h:149
int_t * xlsub
Definition: psymbfact.h:106
int_t * lsub
Definition: psymbfact.h:107
float sz_msgsCol
Definition: psymbfact.h:210
int_t snd_intraSz
Definition: psymbfact.h:171
int_t * xsup_beg_loc
Definition: psymbfact.h:57
int_t indLsubPr
Definition: psymbfact.h:99
int_t * globToLoc
Definition: psymbfact.h:60
LU_space_t
Definition: superlu_enum_consts.h:26
int_t * usubPr
Definition: psymbfact.h:101
int_t * xusub
Definition: psymbfact.h:52
int_t maxSzBuf
Definition: psymbfact.h:228
int_t * xlsubPr
Definition: psymbfact.h:96
int_t * x_ainf
Definition: psymbfact.h:156
int_t nvtcsLvl_loc
Definition: psymbfact.h:142
int_t nDnsUpSeps
Definition: psymbfact.h:195
int_t * lsubPr
Definition: psymbfact.h:97
int_t nextl
Definition: psymbfact.h:109
int_t * begEndBlks_loc
Definition: psymbfact.h:138
int_t * snd_interSz
Definition: psymbfact.h:165
float no_shmRcvd
Definition: psymbfact.h:201
int_t nvtcs_loc
Definition: psymbfact.h:59
int_t maxSzBlk
Definition: psymbfact.h:130
int_t curblk_loc
Definition: psymbfact.h:140
int_t no_expand
Definition: psymbfact.h:122
int_t * snd_LinterSz
Definition: psymbfact.h:166
int_t maxSzUPr
Definition: psymbfact.h:227
int_t fill_par
Definition: psymbfact.h:184
float no_msgsSnd
Definition: psymbfact.h:198
int_t * ind_asup
Definition: psymbfact.h:159
int_t nextu
Definition: psymbfact.h:115