Skip to content

Commit

Permalink
On failure, nlCUDACheck displays last CUDA error in human readable form.
Browse files Browse the repository at this point in the history
  • Loading branch information
BrunoLevy committed Nov 19, 2024
1 parent 006e95b commit d0a6f51
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 9 deletions.
13 changes: 8 additions & 5 deletions src/lib/geogram/NL/nl_amgcl.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Interface between Warpdrive and the AMGCL solver.
* Interface between OpenNL and the AMGCL solver.
* Works both on the CPU and the GPU.
*/

#include <geogram/NL/nl.h>
Expand Down Expand Up @@ -521,6 +522,7 @@ namespace amgcl { namespace backend {
if(b != 0.0) {
// tmp <- z
if(M.temp_ == nullptr) {
nl_printf("New temp vector, size=%d\n",N);
M.temp_ = NL_NEW_VECTOR(nlCUDABlas(), NL_DEVICE_MEMORY, N);
}
blas->Dcopy(blas,N,z.data_,1,M.temp_,1);
Expand Down Expand Up @@ -740,10 +742,11 @@ NLboolean nlSolveAMGCL() {
nlSolveAMGCL_generic<GPU>() :
nlSolveAMGCL_generic<CPU>() ;

// (usually I do not like templates, that promise customization, which is
// in general either impossible or one has to pay agonizing pain for it,
// with its well-designed abstraction hierarchy, AMGCL is a noticeable
// exception !)
// Usually I do not like templates, because templates promise customization,
// but in general when abstraction is done through templates customization
// is either impossible or one has to pay agonizing pain for it.
// BUT with its well-designed, easy-to-understand, not too deep abstraction
// hierarchy, AMGCL is a noticeable exception !)
}

/******************************************************************************/
Expand Down
29 changes: 25 additions & 4 deletions src/lib/geogram/NL/nl_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,11 @@ typedef cudaError_t (*FUNPTR_cudaMemset)(
);
typedef cudaError_t (*FUNPTR_cudaMemGetInfo)(size_t* free, size_t* total);

typedef cudaError_t (*FUNPTR_cudaGetLastError)(void);
typedef cudaError_t (*FUNPTR_cudaPeekAtLastError)(void);
typedef const char* (*FUNPTR_cudaGetErrorString)(cudaError_t error);
typedef const char* (*FUNPTR_cudaGetErrorName)(cudaError_t error);

/**
* \brief Finds and initializes a function pointer to
* one of the functions in CUDA.
Expand Down Expand Up @@ -543,6 +548,10 @@ typedef struct {
FUNPTR_cudaMemcpy cudaMemcpy;
FUNPTR_cudaMemset cudaMemset;
FUNPTR_cudaMemGetInfo cudaMemGetInfo;
FUNPTR_cudaGetLastError cudaGetLastError;
FUNPTR_cudaPeekAtLastError cudaPeekAtLastError;
FUNPTR_cudaGetErrorString cudaGetErrorString;
FUNPTR_cudaGetErrorName cudaGetErrorName;

NLdll DLL_cublas;
cublasHandle_t HNDL_cublas;
Expand Down Expand Up @@ -603,6 +612,10 @@ NLboolean nlExtensionIsInitialized_CUDA(void) {
CUDA()->cudaMemcpy == NULL ||
CUDA()->cudaMemset == NULL ||
CUDA()->cudaMemGetInfo == NULL ||
CUDA()->cudaGetLastError == NULL ||
CUDA()->cudaPeekAtLastError == NULL ||
CUDA()->cudaGetErrorString == NULL ||
CUDA()->cudaGetErrorName == NULL ||

CUDA()->DLL_cublas == NULL ||
CUDA()->HNDL_cublas == NULL ||
Expand Down Expand Up @@ -875,6 +888,10 @@ NLboolean nlInitExtension_CUDA(void) {
find_cuda_func(cudaMemcpy);
find_cuda_func(cudaMemset);
find_cuda_func(cudaMemGetInfo);
find_cuda_func(cudaGetLastError);
find_cuda_func(cudaPeekAtLastError);
find_cuda_func(cudaGetErrorString);
find_cuda_func(cudaGetErrorName);

CUDA()->devID = getBestDeviceID();

Expand Down Expand Up @@ -1074,8 +1091,14 @@ NLboolean nlInitExtension_CUDA(void) {
}

static void nlCUDACheckImpl(int status, int line) {
cudaError_t last_error = CUDA()->cudaGetLastError();
if(status != 0) {
nl_fprintf(stderr,"nl_cuda.c:%d fatal error %d\n",line, status);
nl_fprintf(
stderr,"%s (%s)\n",
CUDA()->cudaGetErrorName(last_error),
CUDA()->cudaGetErrorString(last_error)
);
CUDA()->cudaDeviceReset();
exit(-1);
}
Expand Down Expand Up @@ -1292,7 +1315,7 @@ void nlCUDAMatrixSpMV(
Mcuda = Mcuda->next_slice
) {
/*
* Note: y is computed slice-per-slice !
* Note: each slice computes a different part of y
*/
nlCRSMatrixCUDASliceSpMV(Mcuda, x, y, alpha, beta);
}
Expand Down Expand Up @@ -1402,8 +1425,7 @@ NLCUDASparseMatrix* CreateCUDASlicesFromCRSMatrixSlices(
);

/*
* If there are still rows in the CRS matrix,
* create new slices (recursively)
* If there are still rows in the CRS matrix, create new slices (recursively)
*/
if(row_offset + Mcuda->m < CRS->m) {
Mcuda->next_slice = CreateCUDASlicesFromCRSMatrixSlices(
Expand Down Expand Up @@ -1443,7 +1465,6 @@ NLMatrix nlCUDAMatrixNewFromCRSMatrix(NLMatrix M_in) {
Mcuda->next_slice = CreateCUDASlicesFromCRSMatrixSlices(
Mcuda, M, 0
);
nl_printf("Matrix has %d slices\n", Mcuda->nb_slices);
return (NLMatrix)Mcuda;
}

Expand Down

0 comments on commit d0a6f51

Please sign in to comment.