21#include "gpu_api_utils.h"
29#define MAX_NGPU_STREAMS 32
32void check(gpuError_t result,
char const *
const func,
const char *
const file,
int const line)
36 fprintf(stderr,
"GPU error at file %s: line %d code=(%s) \"%s\" \n",
37 file, line, gpuGetErrorString(result), func);
44#define checkGPUErrors(val) check ( (val), #val, __FILE__, __LINE__ )
56 float *Remain_L_buff_host;
61 int_t *lsub_buf, *usub_buf;
68 int_t* usub_IndirectJ3;
69 int_t* usub_IndirectJ3_host;
81 int_t *LnzvalPtr_host;
85 int_t *UrowindPtr_host;
86 int_t *UnzvalPtr_host;
93 int_t *local_l_blk_infoPtr;
98 int_t *local_u_blk_infoPtr;
100 int_t *ijb_lookupPtr;
103 sSCUbuf_gpu_t scubufs[MAX_NGPU_STREAMS];
104 float *acc_L_buff, *acc_U_buff;
112 double ScatterMOPCounter;
113 double ScatterMOPTimer;
114 double GemmFLOPCounter;
115 double GemmFLOPTimer;
119 double tHost_PCIeH2D;
120 double tHost_PCIeD2H;
124 gpuEvent_t *GemmStart, *GemmEnd, *ScatterEnd;
125 gpuEvent_t *ePCIeH2D;
126 gpuEvent_t *ePCIeD2H_Start;
127 gpuEvent_t *ePCIeD2H_End;
131 int_t first_l_block_gpu, first_u_block_gpu;
137 sLUstruct_gpu_t *A_gpu, *dA_gpu;
138 gpuStream_t funCallStreams[MAX_NGPU_STREAMS], CopyStream;
139 gpublasHandle_t gpublasHandles[MAX_NGPU_STREAMS];
140 int_t lastOffloadStream[MAX_NGPU_STREAMS];
151extern int ssparseTreeFactor_ASYNC_GPU(
164 int_t *gIperm_c_supno,
171 double thresh,
SCT_t *SCT,
int tag_ub,
185extern int sreduceGPUlu(
int last_flag,
d2Hreduce_t* d2Hred,
189extern int swaitGPUscu(
int streamId, ssluGPU_t *sluGPU,
SCT_t *SCT);
190extern int ssendLUpanelGPU2HOST(
int_t k0,
d2Hreduce_t* d2Hred, ssluGPU_t *sluGPU);
191extern int ssendSCUdataHost2GPU(
193 int_t Remain_lbuf_send_size, ssluGPU_t *sluGPU,
HyP_t* HyP
196extern int sinitSluGPU3D_t(
202int sSchurCompUpdate_GPU(
206 int_t Remain_lbuf_send_size,
215extern void sCopyLUToGPU3D (
int* isNodeInMyGrid,
sLocalLU_t *A_host,
219extern int sreduceAllAncestors3d_GPU(
int_t ilvl,
int_t* myNodeCount,
225extern void ssyncAllfunCallStreams(ssluGPU_t* sluGPU,
SCT_t* SCT);
226extern int sfree_LUstruct_gpu (sLUstruct_gpu_t *A_gpu);
230extern void sPrint_matrix(
char *desc,
int_t m,
int_t n,
float *dA,
int_t lda );
233void sprintGPUStats(sLUstruct_gpu_t *A_gpu);
int int_t
Definition: superlu_defs.h:114
double acc_async_cost
Definition: acc_aux.c:56
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: superlu_defs.h:435
Definition: superlu_ddefs.h:329
Definition: superlu_defs.h:770
Definition: util_dist.h:172
Definition: util_dist.h:95
Definition: superlu_defs.h:760
Definition: superlu_defs.h:924
Definition: superlu_defs.h:852
Definition: superlu_defs.h:937
Definition: superlu_defs.h:839
Definition: superlu_defs.h:890
Definition: superlu_defs.h:398
Definition: superlu_defs.h:388
Definition: superlu_defs.h:815
Definition: superlu_defs.h:822
Definition: superlu_defs.h:947
Definition: superlu_ddefs.h:397
Definition: superlu_defs.h:901
Definition: superlu_sdefs.h:357
Definition: superlu_sdefs.h:254
Definition: superlu_sdefs.h:97
Definition: superlu_sdefs.h:391
Definition: superlu_sdefs.h:385
Definition: superlu_defs.h:712
Distributed SuperLU data types and function prototypes.