SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
|
Macros | |
#define | CLAMP(x, low, high) (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x))) |
#define | MAX_DIM 12800 |
#define | MAX_IN_DIM 256 |
#define | LOG_2_MAX_IN_DIM 8 |
#define | LOG_2_MAX_DIM 7 |
Functions | |
static double | load_imb (double *A, int nthreads) |
double | get_acc_async_cost () |
static unsigned int | next_power_2 (unsigned int v) |
static unsigned int | previous_power_2 (unsigned int v) |
static uint32_t | my_log2 (const uint32_t x) |
static unsigned int | nearst_2_100 (unsigned int v) |
static unsigned int | nearst_k (unsigned int v) |
double | estimate_acc_time (int m, int n, int k) |
double | estimate_acc_gemm_time (int m, int n, int k) |
double | estimate_acc_scatter_time (int m, int n, int k) |
double | estimate_cpu_time (int m, int n, int k) |
double | acc_data_send_time (size_t sz) |
void | LookUpTableInit (int my_rank) |
double | estimate_acc_scatter_time_strat1 (Ublock_info_t *Ublock_info, int_t nub, Remain_info_t *Lblock_info, int_t nlb) |
int_t | fixed_cpu_acc_partition (Ublock_info_t *Ublock_info_Phi, int_t num_u_blks_Phi, int_t Rnbrow, int_t ldu_Phi) |
int_t | tuned_partition (int_t num_u_blks_Phi, Ublock_info_t *Ublock_info_Phi, Remain_info_t *Remain_info, int_t RemainBlk, double cpu_time_0, int_t Rnbrow, int_t ldu_Phi) |
Variables | |
double | cpu_bandwidth |
int | communication_overlap |
double | acc_async_cost |
int_t | fixed_partition |
double | frac |
double | CpuDgemmLookUp [8][8][9] |
double | PhiDgemmLookUp [8][8][9] |
double | PhiBWLookUp [8] |
double | MicPciBandwidth [18] |
double | MicScatterBW [24][24] |
double | l_count [24] |
double | u_count [24] |
#define CLAMP | ( | x, | |
low, | |||
high | |||
) | (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x))) |
#define LOG_2_MAX_DIM 7 |
#define LOG_2_MAX_IN_DIM 8 |
#define MAX_DIM 12800 |
#define MAX_IN_DIM 256 |
double acc_data_send_time | ( | size_t | sz | ) |
double estimate_acc_gemm_time | ( | int | m, |
int | n, | ||
int | k | ||
) |
double estimate_acc_scatter_time | ( | int | m, |
int | n, | ||
int | k | ||
) |
double estimate_acc_scatter_time_strat1 | ( | Ublock_info_t * | Ublock_info, |
int_t | nub, | ||
Remain_info_t * | Lblock_info, | ||
int_t | nlb | ||
) |
double estimate_acc_time | ( | int | m, |
int | n, | ||
int | k | ||
) |
double estimate_cpu_time | ( | int | m, |
int | n, | ||
int | k | ||
) |
int_t fixed_cpu_acc_partition | ( | Ublock_info_t * | Ublock_info_Phi, |
int_t | num_u_blks_Phi, | ||
int_t | Rnbrow, | ||
int_t | ldu_Phi | ||
) |
double get_acc_async_cost | ( | ) |
|
inlinestatic |
void LookUpTableInit | ( | int | my_rank | ) |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
int_t tuned_partition | ( | int_t | num_u_blks_Phi, |
Ublock_info_t * | Ublock_info_Phi, | ||
Remain_info_t * | Remain_info, | ||
int_t | RemainBlk, | ||
double | cpu_time_0, | ||
int_t | Rnbrow, | ||
int_t | ldu_Phi | ||
) |
double acc_async_cost |
int communication_overlap |
double cpu_bandwidth |
double CpuDgemmLookUp[8][8][9] |
int_t fixed_partition |
double frac |
double l_count[24] |
double MicPciBandwidth[18] |
double MicScatterBW[24][24] |
double PhiBWLookUp[8] |
double PhiDgemmLookUp[8][8][9] |
double u_count[24] |