Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenCl: fix NVIDIA #2113

Merged
merged 2 commits into from
Dec 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions xmrstak/backend/amd/amd_gpu/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
options += " -DSTRIDED_INDEX=" + std::to_string(strided_index);
options += " -DMEM_CHUNK_EXPONENT=" + std::to_string(mem_chunk_exp) + "U";
options += " -DCOMP_MODE=" + std::to_string(needCompMode);
options += " -DMEMORY=" + std::to_string(hashMemSize) + "LLU";
options += " -DMEMORY=" + std::to_string(hashMemSize) + "LU";
options += " -DALGO=" + std::to_string(miner_algo[ii]);
options += " -DCN_UNROLL=" + std::to_string(ctx->unroll);
/* AMD driver output is something like: `1445.5 (VM)`
Expand Down Expand Up @@ -1276,7 +1276,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)

if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to fetch results.", err_to_str(ret));
return ERR_OCL_API;
}

Expand Down
15 changes: 10 additions & 5 deletions xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
Original file line number Diff line number Diff line change
Expand Up @@ -426,8 +426,13 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,
if (get_local_id(1) == 0)
{
__local ulong* State = State_buf + get_local_id(0) * 25;

// NVIDIA
#ifdef __NV_CL_C_VERSION
for(uint i = 0; i < 8; ++i)
State[i] = input[i];
#else
((__local ulong8 *)State)[0] = vload8(0, input);
#endif
State[8] = input[8];
State[9] = input[9];
State[10] = input[10];
Expand Down Expand Up @@ -477,7 +482,7 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,

mem_fence(CLK_LOCAL_MEM_FENCE);

// cryptonight_heavy || cryptonight_haven || cryptonight_bittube2 || cryptonight_superfast
// cryptonight_heavy || cryptonight_haven || cryptonight_bittube2 || cryptonight_superfast
#if (ALGO == 4 || ALGO == 9 || ALGO == 10 || ALGO == 12)
__local uint4 xin[8][8];
{
Expand Down Expand Up @@ -567,7 +572,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states

// cryptonight_monero_v8
#if(ALGO==11)
# ifdef __clang__
# if defined(__clang__) && !defined(__NV_CL_C_VERSION)
__local uint RCP[256];
# endif

Expand All @@ -582,7 +587,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
AES0[i] = tmp;
AES1[i] = rotate(tmp, 8U);
// cryptonight_monero_v8
#if(ALGO==11 && defined(__clang__))
#if(ALGO==11 && (defined(__clang__) && !defined(__NV_CL_C_VERSION)))
RCP[i] = RCP_C[i];
#endif
}
Expand Down Expand Up @@ -718,7 +723,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
// Quotient may be as large as (2^64 - 1)/(2^31 + 1) = 8589934588 = 2^33 - 4
// We drop the highest bit to fit both quotient and remainder in 32 bits

# ifdef __clang__
# if defined(__clang__) && !defined(__NV_CL_C_VERSION)
division_result = fast_div_v2(RCP, c[1], d);
# else
division_result = fast_div_v2(c[1], d);
Expand Down
4 changes: 2 additions & 2 deletions xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static const __constant uint RCP_C[256] =
};

// Rocm produce invalid results if get_reciprocal without lookup table is used
#ifdef __clang__
#if defined(__clang__) && !defined(__NV_CL_C_VERSION)

inline uint get_reciprocal(const __local uchar *RCP, uint a)
{
Expand Down Expand Up @@ -83,7 +83,7 @@ inline uint get_reciprocal(uint a)

#endif

#ifdef __clang__
#if defined(__clang__) && !defined(__NV_CL_C_VERSION)

inline uint2 fast_div_v2(const __local uint *RCP, ulong a, uint b)
{
Expand Down