Skip to content

Commit

Permalink
Merge pull request #2113 from psychocrypt/fix-OpenCLNvidia
Browse files Browse the repository at this point in the history
OpenCl: fix NVIDIA
  • Loading branch information
fireice-uk authored Dec 3, 2018
2 parents 35fb646 + ab19d37 commit b1d8b55
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 9 deletions.
4 changes: 2 additions & 2 deletions xmrstak/backend/amd/amd_gpu/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
options += " -DSTRIDED_INDEX=" + std::to_string(strided_index);
options += " -DMEM_CHUNK_EXPONENT=" + std::to_string(mem_chunk_exp) + "U";
options += " -DCOMP_MODE=" + std::to_string(needCompMode);
options += " -DMEMORY=" + std::to_string(hashMemSize) + "LLU";
options += " -DMEMORY=" + std::to_string(hashMemSize) + "LU";
options += " -DALGO=" + std::to_string(miner_algo[ii]);
options += " -DCN_UNROLL=" + std::to_string(ctx->unroll);
/* AMD driver output is something like: `1445.5 (VM)`
Expand Down Expand Up @@ -1276,7 +1276,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)

if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to fetch results.", err_to_str(ret));
return ERR_OCL_API;
}

Expand Down
15 changes: 10 additions & 5 deletions xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
Original file line number Diff line number Diff line change
Expand Up @@ -426,8 +426,13 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,
if (get_local_id(1) == 0)
{
__local ulong* State = State_buf + get_local_id(0) * 25;

// NVIDIA
#ifdef __NV_CL_C_VERSION
for(uint i = 0; i < 8; ++i)
State[i] = input[i];
#else
((__local ulong8 *)State)[0] = vload8(0, input);
#endif
State[8] = input[8];
State[9] = input[9];
State[10] = input[10];
Expand Down Expand Up @@ -477,7 +482,7 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,

mem_fence(CLK_LOCAL_MEM_FENCE);

// cryptonight_heavy || cryptonight_haven || cryptonight_bittube2 || cryptonight_superfast
// cryptonight_heavy || cryptonight_haven || cryptonight_bittube2 || cryptonight_superfast
#if (ALGO == 4 || ALGO == 9 || ALGO == 10 || ALGO == 12)
__local uint4 xin[8][8];
{
Expand Down Expand Up @@ -567,7 +572,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states

// cryptonight_monero_v8
#if(ALGO==11)
# ifdef __clang__
# if defined(__clang__) && !defined(__NV_CL_C_VERSION)
__local uint RCP[256];
# endif

Expand All @@ -582,7 +587,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
AES0[i] = tmp;
AES1[i] = rotate(tmp, 8U);
// cryptonight_monero_v8
#if(ALGO==11 && defined(__clang__))
#if(ALGO==11 && (defined(__clang__) && !defined(__NV_CL_C_VERSION)))
RCP[i] = RCP_C[i];
#endif
}
Expand Down Expand Up @@ -718,7 +723,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
// Quotient may be as large as (2^64 - 1)/(2^31 + 1) = 8589934588 = 2^33 - 4
// We drop the highest bit to fit both quotient and remainder in 32 bits

# ifdef __clang__
# if defined(__clang__) && !defined(__NV_CL_C_VERSION)
division_result = fast_div_v2(RCP, c[1], d);
# else
division_result = fast_div_v2(c[1], d);
Expand Down
4 changes: 2 additions & 2 deletions xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static const __constant uint RCP_C[256] =
};

// Rocm produce invalid results if get_reciprocal without lookup table is used
#ifdef __clang__
#if defined(__clang__) && !defined(__NV_CL_C_VERSION)

inline uint get_reciprocal(const __local uchar *RCP, uint a)
{
Expand Down Expand Up @@ -83,7 +83,7 @@ inline uint get_reciprocal(uint a)

#endif

#ifdef __clang__
#if defined(__clang__) && !defined(__NV_CL_C_VERSION)

inline uint2 fast_div_v2(const __local uint *RCP, ulong a, uint b)
{
Expand Down

0 comments on commit b1d8b55

Please sign in to comment.