|
void | psgstrf2_trsm (superlu_dist_options_t *options, int_t k0, int_t k, double thresh, Glu_persist_t *Glu_persist, gridinfo_t *grid, sLocalLU_t *Llu, MPI_Request *U_diag_blk_send_req, int tag_ub, SuperLUStat_t *stat, int *info) |
|
static int_t | LpanelUpdate (int off0, int nsupc, float *ublk_ptr, int ld_ujrow, float *lusup, int nsupr, SCT_t *SCT) |
|
void | psgstrf2_xtrsm (superlu_dist_options_t *options, int_t nsupers, int_t k0, int_t k, double thresh, Glu_persist_t *Glu_persist, gridinfo_t *grid, sLocalLU_t *Llu, MPI_Request *U_diag_blk_send_req, int tag_ub, SuperLUStat_t *stat, int *info, SCT_t *SCT) |
|
int_t | sTrs2_GatherU (int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, float *uval, float *tempv) |
|
int_t | sTrs2_ScatterU (int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, float *uval, float *tempv) |
|
int_t | sTrs2_GatherTrsmScatter (int_t klst, int_t iukp, int_t rukp, int_t *usub, float *uval, float *tempv, int_t knsupc, int nsupr, float *lusup, Glu_persist_t *Glu_persist) |
|
void | psgstrs2_omp (int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, sLocalLU_t *Llu, Ublock_info_t *Ublock_info, SuperLUStat_t *stat) |
|
Performs panel LU factorization.
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
All rights reserved.
The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-- Distributed SuperLU routine (version 7.2) --
Lawrence Berkeley National Lab, Univ. of California Berkeley.
August 15, 2014
Modified:
September 30, 2017
May 10, 2019 v7.0.0
December 12, 2021 v7.2.0
Purpose
=======
Panel factorization -- block column k
Factor diagonal and subdiagonal blocks and test for exact singularity.
Only the column processes that own block column *k* participate
in the work.
Arguments
=========
options (input) superlu_dist_options_t* (global)
The structure defines the input parameters to control
how the LU decomposition will be performed.
k0 (input) int (global)
Counter of the next supernode to be factorized.
k (input) int (global)
The column number of the block column to be factorized.
thresh (input) double (global)
The threshold value = s_eps * anorm.
Glu_persist (input) Glu_persist_t*
Global data structures (xsup, supno) replicated on all processes.
grid (input) gridinfo_t*
The 2D process mesh.
Llu (input/output) sLocalLU_t*
Local data structures to store distributed L and U matrices.
U_diag_blk_send_req (input/output) MPI_Request*
List of send requests to send down the diagonal block of U.
tag_ub (input) int
Upper bound of MPI tag values.
stat (output) SuperLUStat_t*
Record the statistics about the factorization.
See SuperLUStat_t structure defined in util.h.
info (output) int*
= 0: successful exit
< 0: if info = -i, the i-th argument had an illegal value
> 0: if info = i, U(i,i) is exactly zero. The factorization has
been completed, but the factor U is exactly singular,
and division by zero will occur if it is used to solve a
system of equations.
void psgstrf2_xtrsm |
( |
superlu_dist_options_t * |
options, |
|
|
int_t |
nsupers, |
|
|
int_t |
k0, |
|
|
int_t |
k, |
|
|
double |
thresh, |
|
|
Glu_persist_t * |
Glu_persist, |
|
|
gridinfo_t * |
grid, |
|
|
sLocalLU_t * |
Llu, |
|
|
MPI_Request * |
U_diag_blk_send_req, |
|
|
int |
tag_ub, |
|
|
SuperLUStat_t * |
stat, |
|
|
int * |
info, |
|
|
SCT_t * |
SCT |
|
) |
| |
Purpose
=======
Factorize the diagonal block; called from process that owns the (k,k) block
Arguments
=========
info (output) int*
= 0: successful exit
> 0: if info = i, U(i,i) is exactly zero. The factorization has
been completed, but the factor U is exactly singular,
and division by zero will occur if it is used to solve a
system of equations.
*/
void Local_Sgstrf2(superlu_dist_options_t *options, int_t k, double thresh,
float *BlockUFactor, /*factored U is overwritten here*/
Glu_persist_t *Glu_persist, gridinfo_t *grid, sLocalLU_t *Llu,
SuperLUStat_t *stat, int *info, SCT_t* SCT)
{
//double t1 = SuperLU_timer_();
int_t *xsup = Glu_persist->xsup;
float alpha = -1, zero = 0.0;
// printf("Entering dgetrf2 %d \n", k);
/* Initialization. */
int_t lk = LBj (k, grid); /* Local block number */
int_t jfst = FstBlockC (k);
int_t jlst = FstBlockC (k + 1);
float *lusup = Llu->Lnzval_bc_ptr[lk];
int nsupc = SuperSize (k);
int nsupr;
if (Llu->Lrowind_bc_ptr[lk])
nsupr = Llu->Lrowind_bc_ptr[lk][1];
else
nsupr = 0;
float *ublk_ptr = BlockUFactor;
float *ujrow = BlockUFactor;
int_t luptr = 0; /* Point_t to the diagonal entries. */
int cols_left = nsupc; /* supernode size */
int_t u_diag_cnt = 0;
int_t ld_ujrow = nsupc; /* leading dimension of ujrow */
int incx = 1;
int incy = ld_ujrow;
for (int_t j = 0; j < jlst - jfst; ++j) /* for each column in panel */
{
/* Diagonal pivot */
int_t i = luptr;
/* Allow to replace zero pivot. */
//if (options->ReplaceTinyPivot == YES && lusup[i] != 0.0)
if (options->ReplaceTinyPivot == YES)
{
if (fabs (lusup[i]) < thresh) { /* Diagonal */
/* Keep the new diagonal entry with the same sign. */
if (lusup[i] < 0) lusup[i] = -thresh;
else lusup[i] = thresh;
++(stat->TinyPivots);
}
}
for (int_t l = 0; l < cols_left; ++l, i += nsupr, ++u_diag_cnt)
{
int_t st = j * ld_ujrow + j;
ublk_ptr[st + l * ld_ujrow] = lusup[i]; /* copy one row of U */
}
if (ujrow[0] == zero) /* Test for singularity. */
{
*info = j + jfst + 1;
}
else /* Scale the j-th column. */
{
float temp;
temp = 1.0 / ujrow[0];
for (int_t i = luptr + 1; i < luptr - j + nsupc; ++i)
lusup[i] *= temp;
stat->ops[FACT] += nsupc - j - 1;
}
/* Rank-1 update of the trailing submatrix. */
if (--cols_left)
{
/*following must be int*/
int l = nsupc - j - 1;
/* Rank-1 update */
superlu_sger(l, cols_left, alpha, &lusup[luptr + 1], incx,
&ujrow[ld_ujrow], incy, &lusup[luptr + nsupr + 1], nsupr);
stat->ops[FACT] += 2 * l * cols_left;
}
ujrow = ujrow + ld_ujrow + 1; /* move to next row of U */
luptr += nsupr + 1; /* move to next column */
} /* for column j ... first loop */
//int_t thread_id = omp_get_thread_num();
// SCT->Local_Dgstrf2_Thread_tl[thread_id * CACHE_LINE_SIZE] += (double) ( SuperLU_timer_() - t1);
} /* end Local_Sgstrf2 */
/************************************************************************/
/*!
Purpose
=======
Panel factorization -- block column k
Factor diagonal and subdiagonal blocks and test for exact singularity.
Only the column processes that own block column *k* participate
in the work.
Arguments
=========
options (input) superlu_dist_options_t* (global)
The structure defines the input parameters to control
how the LU decomposition will be performed.
nsupers (input) int_t (global)
Number of supernodes.
k0 (input) int (global)
Counter of the next supernode to be factorized.
k (input) int (global)
The column number of the block column to be factorized.
thresh (input) double (global)
The threshold value = s_eps * anorm.
Glu_persist (input) Glu_persist_t*
Global data structures (xsup, supno) replicated on all processes.
grid (input) gridinfo_t*
The 2D process mesh.
Llu (input/output) sLocalLU_t*
Local data structures to store distributed L and U matrices.
U_diag_blk_send_req (input/output) MPI_Request*
List of send requests to send down the diagonal block of U.
tag_ub (input) int
Upper bound of MPI tag values.
stat (output) SuperLUStat_t*
Record the statistics about the factorization.
See SuperLUStat_t structure defined in util.h.
info (output) int*
= 0: successful exit
< 0: if info = -i, the i-th argument had an illegal value
> 0: if info = i, U(i,i) is exactly zero. The factorization has
been completed, but the factor U is exactly singular,
and division by zero will occur if it is used to solve a
system of equations.
SCT (output) SCT_t*
Additional statistics used in the 3D algorithm.