SuperLU Distributed
8.2.1
Distributed memory sparse direct solver
gpu_wrapper.h
Go to the documentation of this file.
1
21
#ifndef __SUPERLU_GPUWRAPPER
/* allow multiple inclusions */
22
#define __SUPERLU_GPUWRAPPER
23
24
#ifdef HAVE_CUDA
25
#include <cublas_v2.h>
26
#include "cuda.h"
27
#include "cuda_runtime_api.h"
28
#include "cuda_runtime.h"
29
#include <cusparse.h>
30
#include <cuda_profiler_api.h>
31
32
#define gpuDeviceProp cudaDeviceProp
33
#define gpuGetDeviceCount cudaGetDeviceCount
34
#define gpuGetDeviceProperties cudaGetDeviceProperties
35
#define gpuSetDevice cudaSetDevice
36
#define gpuGetDevice cudaGetDevice
37
#define gpuError_t cudaError_t
38
#define gpuSuccess cudaSuccess
39
#define gpuGetErrorString cudaGetErrorString
40
#define gpuMalloc cudaMalloc
41
#define gpuHostMalloc cudaHostAlloc
42
#define gpuHostMallocDefault cudaHostAllocDefault
43
#define gpuMallocManaged cudaMallocManaged
44
#define gpuStream_t cudaStream_t
45
#define gpuStreamCreate cudaStreamCreate
46
#define gpuStreamDestroy cudaStreamDestroy
47
#define gpuMemcpyAsync cudaMemcpyAsync
48
#define gpuMemcpy2DAsync cudaMemcpy2DAsync
49
#define gpuFreeHost cudaFreeHost
50
#define gpuFree cudaFree
51
#define gpuMemPrefetchAsync cudaMemPrefetchAsync
52
#define gpuStreamSynchronize cudaStreamSynchronize
53
#define gpuDeviceSynchronize cudaDeviceSynchronize
54
#define gpuMemcpy cudaMemcpy
55
#define gpuMemAttachGlobal cudaMemAttachGlobal
56
#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
57
#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
58
#define gpuStreamCreateWithFlags cudaStreamCreateWithFlags
59
#define gpuStreamDestroyWithFlags cudaStreamDestroyWithFlags
60
#define gpuStreamDefault cudaStreamDefault
61
#define gpublasStatus_t cublasStatus_t
62
#define gpuEventCreate cudaEventCreate
63
#define gpuEventRecord cudaEventRecord
64
#define gpuMemGetInfo cudaMemGetInfo
65
#define gpuOccupancyMaxPotentialBlockSize cudaOccupancyMaxPotentialBlockSize
66
#define gpuEventElapsedTime cudaEventElapsedTime
67
#define gpuDeviceReset cudaDeviceReset
68
#define gpuMallocHost cudaMallocHost
69
#define gpuEvent_t cudaEvent_t
70
#define gpuMemset cudaMemset
71
#define GPUBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
72
#define GPUBLAS_STATUS_NOT_INITIALIZED CUBLAS_STATUS_NOT_INITIALIZED
73
#define GPUBLAS_STATUS_ALLOC_FAILED CUBLAS_STATUS_ALLOC_FAILED
74
#define GPUBLAS_STATUS_INVALID_VALUE CUBLAS_STATUS_INVALID_VALUE
75
#define GPUBLAS_STATUS_ARCH_MISMATCH CUBLAS_STATUS_ARCH_MISMATCH
76
#define GPUBLAS_STATUS_MAPPING_ERROR CUBLAS_STATUS_MAPPING_ERROR
77
#define GPUBLAS_STATUS_EXECUTION_FAILED CUBLAS_STATUS_EXECUTION_FAILED
78
#define GPUBLAS_STATUS_INTERNAL_ERROR CUBLAS_STATUS_INTERNAL_ERROR
79
#define GPUBLAS_STATUS_LICENSE_ERROR CUBLAS_STATUS_LICENSE_ERROR
80
#define GPUBLAS_STATUS_NOT_SUPPORTED CUBLAS_STATUS_NOT_SUPPORTED
81
#define gpublasCreate cublasCreate
82
#define gpublasDestroy cublasDestroy
83
#define gpublasHandle_t cublasHandle_t
84
#define gpublasSetStream cublasSetStream
85
#define gpublasDgemm cublasDgemm
86
#define gpublasSgemm cublasSgemm
87
#define gpublasZgemm cublasZgemm
88
#define gpublasCgemm cublasCgemm
89
#define GPUBLAS_OP_N CUBLAS_OP_N
90
#define gpuDoubleComplex cuDoubleComplex
91
#define gpuRuntimeGetVersion cudaRuntimeGetVersion
92
#define threadIdx_x threadIdx.x
93
#define threadIdx_y threadIdx.y
94
#define blockIdx_x blockIdx.x
95
#define blockIdx_y blockIdx.y
96
#define blockDim_x blockDim.x
97
#define blockDim_y blockDim.y
98
#define gridDim_x gridDim.x
99
#define gridDim_y gridDim.y
100
101
102
103
104
#elif defined(HAVE_HIP)
105
106
#ifndef __HIP_PLATFORM_AMD__
107
#define __HIP_PLATFORM_AMD__
108
#endif
109
110
#include "hip/hip_runtime_api.h"
111
#include "hip/hip_runtime.h"
112
#include <hipblas/hipblas.h>
113
114
// #include "roctracer_ext.h" // need to pass the include dir directly to HIP_HIPCC_FLAGS
115
// // roctx header file
116
// #include <roctx.h>
117
118
#define gpuDeviceProp hipDeviceProp_t
119
#define gpuGetDeviceCount hipGetDeviceCount
120
#define gpuGetDeviceProperties hipGetDeviceProperties
121
#define gpuSetDevice hipSetDevice
122
#define gpuGetDevice hipGetDevice
123
#define gpuError_t hipError_t
124
#define gpuSuccess hipSuccess
125
#define gpuGetErrorString hipGetErrorString
126
#define gpuMalloc hipMalloc
127
#define gpuHostMalloc hipHostMalloc
128
#define gpuHostMallocDefault hipHostMallocDefault
129
#define gpuMallocManaged hipMallocManaged
130
#define gpuStream_t hipStream_t
131
#define gpuStreamCreate hipStreamCreate
132
#define gpuStreamDestroy hipStreamDestroy
133
#define gpuMemcpyAsync hipMemcpyAsync
134
#define gpuMemcpy2DAsync hipMemcpy2DAsync
135
#define gpuFreeHost hipHostFree
136
#define gpuFree hipFree
137
#define gpuMemPrefetchAsync hipMemPrefetchAsync
// not sure about this
138
#define gpuStreamSynchronize hipStreamSynchronize
139
#define gpuDeviceSynchronize hipDeviceSynchronize
140
#define gpuMemcpy hipMemcpy
141
#define gpuMemAttachGlobal hipMemAttachGlobal
142
#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
143
#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
144
#define gpuStreamCreateWithFlags hipStreamCreateWithFlags
145
#define gpuStreamDestroyWithFlags hipStreamDestroyWithFlags
146
#define gpuStreamDefault hipStreamDefault
147
#define gpublasStatus_t hipblasStatus_t
148
#define gpuEventCreate hipEventCreate
149
#define gpuEventRecord hipEventRecord
150
#define gpuMemGetInfo hipMemGetInfo
151
#define gpuOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
152
#define gpuEventElapsedTime hipEventElapsedTime
153
#define gpuDeviceReset hipDeviceReset
154
#define gpuMallocHost hipHostMalloc
155
#define gpuEvent_t hipEvent_t
156
#define gpuMemset hipMemset
157
#define GPUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
158
#define GPUBLAS_STATUS_NOT_INITIALIZED HIPBLAS_STATUS_NOT_INITIALIZED
159
#define GPUBLAS_STATUS_ALLOC_FAILED HIPBLAS_STATUS_ALLOC_FAILED
160
#define GPUBLAS_STATUS_INVALID_VALUE HIPBLAS_STATUS_INVALID_VALUE
161
#define GPUBLAS_STATUS_ARCH_MISMATCH HIPBLAS_STATUS_ARCH_MISMATCH
162
#define GPUBLAS_STATUS_MAPPING_ERROR HIPBLAS_STATUS_MAPPING_ERROR
163
#define GPUBLAS_STATUS_EXECUTION_FAILED HIPBLAS_STATUS_EXECUTION_FAILED
164
#define GPUBLAS_STATUS_INTERNAL_ERROR HIPBLAS_STATUS_INTERNAL_ERROR
165
#define GPUBLAS_STATUS_LICENSE_ERROR HIPBLAS_STATUS_LICENSE_ERROR
166
#define GPUBLAS_STATUS_NOT_SUPPORTED HIPBLAS_STATUS_NOT_SUPPORTED
167
#define gpublasCreate hipblasCreate
168
#define gpublasDestroy hipblasDestroy
169
#define gpublasHandle_t hipblasHandle_t
170
#define gpublasSetStream hipblasSetStream
171
#define gpublasDgemm hipblasDgemm
172
#define gpublasSgemm hipblasSgemm
173
#define gpublasZgemm hipblasZgemm
174
#define gpublasCgemm hipblasCgemm
175
#define GPUBLAS_OP_N HIPBLAS_OP_N
176
#define gpuDoubleComplex hipblasDoubleComplex
177
#define gpuRuntimeGetVersion hipRuntimeGetVersion
178
#define threadIdx_x hipThreadIdx_x
179
#define threadIdx_y hipThreadIdx_y
180
#define blockIdx_x hipBlockIdx_x
181
#define blockIdx_y hipBlockIdx_y
182
#define blockDim_x hipBlockDim_x
183
#define blockDim_y hipBlockDim_y
184
#define gridDim_x hipGridDim_x
185
#define gridDim_y hipGridDim_y
186
187
188
#endif
189
190
191
#define gpublasCheckErrors(fn) \
192
do { \
193
gpublasStatus_t __err = fn; \
194
if (__err != GPUBLAS_STATUS_SUCCESS) { \
195
fprintf(stderr, "Fatal gpublas error: %d (at %s:%d)\n"
, \
196
(int)(__err), \
197
__FILE__, __LINE__); \
198
fprintf(stderr, "*** FAILED - ABORTING\n"); \
199
exit(1); \
200
} \
201
} while(0);
202
203
204
#endif
/* __SUPERLU_GPUWRAPPER */
SRC
gpu_wrapper.h
Generated by
1.9.4