Skip to content

Commit

Permalink
Improved loading of large list
Browse files Browse the repository at this point in the history
  • Loading branch information
JeanLucPons committed Mar 20, 2019
1 parent 06acb37 commit e1d8a9c
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 92 deletions.
2 changes: 1 addition & 1 deletion Base58.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ bool DecodeBase58(const char* psz, std::vector<uint8_t> &vch)
psz++;
}

int length = strlen(psz);
int length = (int)strlen(psz);

// Process the characters
int digitslen = 1;
Expand Down
54 changes: 26 additions & 28 deletions GPU/GPUCompute.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
//
// We use affine coordinates for elliptic curve point (ie Z=1)

__device__ __noinline__ void CheckPoint(uint32_t *_h, int32_t incr, int32_t endo, int32_t mode,prefix_t *prefix, uint32_t tid, uint32_t *lookup32, uint32_t *out) {
__device__ __noinline__ void CheckPoint(uint32_t *_h, int32_t incr, int32_t endo, int32_t mode,prefix_t *prefix,
uint32_t tid, uint32_t *lookup32, uint32_t maxFound, uint32_t *out) {

uint32_t off;
prefixl_t l32;
Expand All @@ -41,32 +42,29 @@ __device__ __noinline__ void CheckPoint(uint32_t *_h, int32_t incr, int32_t endo
if (hit) {

if (lookup32) {

off = lookup32[pr0];
l32 = _h[0];
st= off;
ed = off + hit - 1;
while(st<=ed) {
mi = (st+ed)/2;
lmi = lookup32[mi];
if(l32<lmi) {
ed = mi - 1;
} else if(l32==lmi) {
// found
goto addItem;
} else {
st = mi + 1;
}
}

return;

if(l32<lmi) {
ed = mi - 1;
} else if(l32==lmi) {
// found
goto addItem;
} else {
st = mi + 1;
}
}
return;
}

addItem:

pos = atomicAdd(out, 1);
if (pos < MAX_FOUND) {
if (pos < maxFound) {
out[pos*ITEM_SIZE32 + 1] = tid;
out[pos*ITEM_SIZE32 + 2] = (uint32_t)(incr << 16) | (uint32_t)(mode << 15) | (uint32_t)(endo);
out[pos*ITEM_SIZE32 + 3] = _h[0];
Expand All @@ -80,10 +78,10 @@ __device__ __noinline__ void CheckPoint(uint32_t *_h, int32_t incr, int32_t endo

}

#define CHECK_POINT(_h,incr,endo,mode) CheckPoint(_h,incr,endo,mode,prefix,tid,lookup32,out)
#define CHECK_POINT(_h,incr,endo,mode) CheckPoint(_h,incr,endo,mode,prefix,tid,lookup32,maxFound,out)

__device__ __noinline__ void CheckHashComp(prefix_t *prefix, uint64_t *px, uint64_t *py,
int32_t incr, uint32_t tid, uint32_t *lookup32, uint32_t *out) {
__device__ __noinline__ void CheckHashComp(prefix_t *prefix, uint64_t *px, uint64_t *py, int32_t incr,
uint32_t tid, uint32_t *lookup32, uint32_t maxFound, uint32_t *out) {

uint32_t h[20];
uint64_t pe1x[4];
Expand All @@ -110,8 +108,8 @@ __device__ __noinline__ void CheckHashComp(prefix_t *prefix, uint64_t *px, uint6

}

__device__ __noinline__ void CheckHashUncomp(prefix_t *prefix, uint64_t *px, uint64_t *py,
int32_t incr, uint32_t tid, uint32_t *lookup32, uint32_t *out) {
__device__ __noinline__ void CheckHashUncomp(prefix_t *prefix, uint64_t *px, uint64_t *py, int32_t incr,
uint32_t tid, uint32_t *lookup32, uint32_t maxFound, uint32_t *out) {

uint32_t h[5];
uint64_t pe1x[4];
Expand All @@ -138,28 +136,28 @@ __device__ __noinline__ void CheckHashUncomp(prefix_t *prefix, uint64_t *px, uin

}

__device__ __noinline__ void CheckHash(uint32_t mode, prefix_t *prefix, uint64_t *px, uint64_t *py,
int32_t incr, uint32_t tid, uint32_t *lookup32, uint32_t *out) {
__device__ __noinline__ void CheckHash(uint32_t mode, prefix_t *prefix, uint64_t *px, uint64_t *py, int32_t incr,
uint32_t tid, uint32_t *lookup32, uint32_t maxFound, uint32_t *out) {

switch (mode) {
case SEARCH_COMPRESSED:
CheckHashComp(prefix, px, py, incr, tid, lookup32, out);
CheckHashComp(prefix, px, py, incr, tid, lookup32, maxFound, out);
break;
case SEARCH_UNCOMPRESSED:
CheckHashUncomp(prefix, px, py, incr, tid, lookup32, out);
CheckHashUncomp(prefix, px, py, incr, tid, lookup32, maxFound, out);
break;
case SEARCH_BOTH:
CheckHashComp(prefix, px, py, incr, tid, lookup32, out);
CheckHashUncomp(prefix, px, py, incr, tid, lookup32, out);
CheckHashComp(prefix, px, py, incr, tid, lookup32, maxFound, out);
CheckHashUncomp(prefix, px, py, incr, tid, lookup32, maxFound, out);
break;
}

}

#define CHECK_PREFIX(incr) CheckHash(mode, sPrefix, px, py, j*GRP_SIZE + (incr), tid, lookup32, out)
#define CHECK_PREFIX(incr) CheckHash(mode, sPrefix, px, py, j*GRP_SIZE + (incr), tid, lookup32, maxFound, out)

__device__ void ComputeKeys(uint32_t mode, uint64_t *startx, uint64_t *starty,
prefix_t *sPrefix, uint32_t *lookup32, uint32_t *out) {
prefix_t *sPrefix, uint32_t *lookup32, uint32_t maxFound, uint32_t *out) {

uint64_t dx[GRP_SIZE/2+1][4];
uint64_t px[4];
Expand Down
31 changes: 14 additions & 17 deletions GPU/GPUEngine.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1152,11 +1152,11 @@ __device__ __noinline__ void _GetHash160(uint64_t *x, uint64_t *y, uint8_t *hash

// ---------------------------------------------------------------------------------------

__global__ void comp_keys(uint32_t mode,prefix_t *prefix, uint32_t *lookup32, uint64_t *keys, uint32_t *found) {
__global__ void comp_keys(uint32_t mode,prefix_t *prefix, uint32_t *lookup32, uint64_t *keys, uint32_t maxFound, uint32_t *found) {

int xPtr = (blockIdx.x*blockDim.x) * 8;
int yPtr = xPtr + 4 * NB_TRHEAD_PER_GROUP;
ComputeKeys(mode, keys + xPtr, keys + yPtr, prefix, lookup32, found);
ComputeKeys(mode, keys + xPtr, keys + yPtr, prefix, lookup32, maxFound, found);

}

Expand Down Expand Up @@ -1278,7 +1278,7 @@ int _ConvertSMVer2Cores(int major, int minor) {

}

GPUEngine::GPUEngine(int nbThreadGroup, int gpuId) {
GPUEngine::GPUEngine(int nbThreadGroup, int gpuId, uint32_t maxFound) {

// Initialise CUDA
initialised = false;
Expand Down Expand Up @@ -1310,8 +1310,10 @@ GPUEngine::GPUEngine(int nbThreadGroup, int gpuId) {
if (nbThreadGroup == -1)
nbThreadGroup = deviceProp.multiProcessorCount * 8;

nbThread = nbThreadGroup * NB_TRHEAD_PER_GROUP;

this->nbThread = nbThreadGroup * NB_TRHEAD_PER_GROUP;
this->maxFound = maxFound;
this->outputSize = (maxFound*ITEM_SIZE + 4);

char tmp[256];
sprintf(tmp,"GPU #%d %s (%dx%d cores) Grid(%dx%d)",
gpuId,deviceProp.name,deviceProp.multiProcessorCount,
Expand Down Expand Up @@ -1364,21 +1366,17 @@ GPUEngine::GPUEngine(int nbThreadGroup, int gpuId) {
printf("GPUEngine: Allocate input pinned memory: %s\n", cudaGetErrorString(err));
return;
}
err = cudaMalloc((void **)&outputPrefix, OUTPUT_SIZE);
err = cudaMalloc((void **)&outputPrefix, outputSize);
if (err != cudaSuccess) {
printf("GPUEngine: Allocate output memory: %s\n", cudaGetErrorString(err));
return;
}
err = cudaHostAlloc(&outputPrefixPinned, OUTPUT_SIZE, cudaHostAllocWriteCombined | cudaHostAllocMapped);
err = cudaHostAlloc(&outputPrefixPinned, outputSize, cudaHostAllocWriteCombined | cudaHostAllocMapped);
if (err != cudaSuccess) {
printf("GPUEngine: Allocate output pinned memory: %s\n", cudaGetErrorString(err));
return;
}

//double P = 1/65536.0;
//double Plost = Psk(STEP_SIZE,MAX_FOUND,P);
//printf("Plost=%g\n",Plost);

searchMode = SEARCH_COMPRESSED;
initialised = true;
inputPrefixLookUp = NULL;
Expand Down Expand Up @@ -1532,7 +1530,7 @@ bool GPUEngine::callKernel() {

// Call the kernel (Perform STEP_SIZE keys per thread)
comp_keys<<< nbThread / NB_TRHEAD_PER_GROUP, NB_TRHEAD_PER_GROUP >>>
(searchMode, inputPrefix, inputPrefixLookUp, inputKey, outputPrefix);
(searchMode, inputPrefix, inputPrefixLookUp, inputKey, maxFound, outputPrefix);

cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
Expand Down Expand Up @@ -1587,8 +1585,7 @@ bool GPUEngine::Launch(std::vector<ITEM> &prefixFound,bool spinWait) {

if(spinWait) {

cudaMemcpy(outputPrefixPinned, outputPrefix, OUTPUT_SIZE,
cudaMemcpyDeviceToHost);
cudaMemcpy(outputPrefixPinned, outputPrefix, outputSize, cudaMemcpyDeviceToHost);

} else {

Expand All @@ -1613,13 +1610,13 @@ bool GPUEngine::Launch(std::vector<ITEM> &prefixFound,bool spinWait) {

// Look for prefix found
uint32_t nbFound = outputPrefixPinned[0];
if (nbFound > MAX_FOUND) {
if (nbFound > maxFound) {
// prefix has been lost
if (!lostWarning) {
printf("\nWarning, %d items lost (try to search with less prefixes or less thread (use -g))\n", (nbFound - MAX_FOUND));
printf("\nWarning, %d items lost\nHint: Search with less prefixes, less threads (-g) or increase maxFound (-m)\n", (nbFound - maxFound));
lostWarning = true;
}
nbFound = MAX_FOUND;
nbFound = maxFound;
}

// When can perform a standard copy, the kernel is eneded
Expand Down
11 changes: 5 additions & 6 deletions GPU/GPUEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,8 @@ static const char *searchModes[] = {"Compressed","Uncompressed","Compressed or U

// Number of thread per block
#define NB_TRHEAD_PER_GROUP 128

// Maximum number of 16bit prefix found per kernel
// Avg = (nbThread*STEP_SIZE*nbPrefix16)/65536
#define MAX_FOUND 131072
#define ITEM_SIZE 28
#define ITEM_SIZE32 (ITEM_SIZE/4)
#define OUTPUT_SIZE (MAX_FOUND*ITEM_SIZE+4)
#define _64K 65536

typedef uint16_t prefix_t;
Expand All @@ -62,12 +57,13 @@ class GPUEngine {

public:

GPUEngine(int nbThreadGroup,int gpuId);
GPUEngine(int nbThreadGroup,int gpuId,uint32_t maxFound);
~GPUEngine();
void SetPrefix(std::vector<prefix_t> prefixes);
void SetPrefix(std::vector<LPREFIX> prefixes,uint32_t totalPrefix);
bool SetKeys(Point *p);
void SetSearchMode(int serachMode);
void SetMaxFound(uint32_t max);
bool Launch(std::vector<ITEM> &prefixFound,bool spinWait=false);
int GetNbThread();
int GetGroupSize();
Expand Down Expand Up @@ -98,6 +94,9 @@ class GPUEngine {
uint32_t searchMode;
bool littleEndian;
bool lostWarning;
uint32_t maxFound;
uint32_t outputSize;

};

#endif // GPUENGINEH
15 changes: 8 additions & 7 deletions Vanity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Point _2Gn;
// ----------------------------------------------------------------------------

VanitySearch::VanitySearch(Secp256K1 &secp, vector<std::string> &inputPrefixes,string seed,int searchMode,
bool useGpu, bool stop, string outputFile, bool useSSE) {
bool useGpu, bool stop, string outputFile, bool useSSE, uint32_t maxFound) {

this->secp = secp;
this->searchMode = searchMode;
Expand All @@ -46,6 +46,7 @@ VanitySearch::VanitySearch(Secp256K1 &secp, vector<std::string> &inputPrefixes,s
this->outputFile = outputFile;
this->useSSE = useSSE;
this->nbGPUThread = 0;
this->maxFound = maxFound;
prefixes.clear();

// Create a 65536 items lookup table
Expand Down Expand Up @@ -105,8 +106,8 @@ VanitySearch::VanitySearch(Secp256K1 &secp, vector<std::string> &inputPrefixes,s
lit.lPrefixes.push_back((*items)[j].lPrefix);
sort(lit.lPrefixes.begin(), lit.lPrefixes.end());
usedPrefixL.push_back(lit);
if( lit.lPrefixes.size()>maxI ) maxI = lit.lPrefixes.size();
if( lit.lPrefixes.size()<minI ) minI = lit.lPrefixes.size();
if( (uint32_t)lit.lPrefixes.size()>maxI ) maxI = (uint32_t)lit.lPrefixes.size();
if( (uint32_t)lit.lPrefixes.size()<minI ) minI = (uint32_t)lit.lPrefixes.size();
unique_sPrefix++;
}
if (loadingProgress)
Expand Down Expand Up @@ -227,7 +228,7 @@ bool VanitySearch::initPrefix(std::string &prefix,PREFIX_ITEM *it) {
it->sPrefix = *(prefix_t *)(it->hash160);
it->lPrefix = *(prefixl_t *)(it->hash160);
it->prefix = (char *)prefix.c_str();
it->prefixLength = prefix.length();
it->prefixLength = (int)prefix.length();
it->found = false;
return true;

Expand All @@ -248,7 +249,7 @@ bool VanitySearch::initPrefix(std::string &prefix,PREFIX_ITEM *it) {
it->sPrefix = 0;
it->lPrefix = 0;
it->prefix = (char *)prefix.c_str();
it->prefixLength = prefix.length();
it->prefixLength = (int)prefix.length();
it->found = false;
return true;

Expand Down Expand Up @@ -291,7 +292,7 @@ bool VanitySearch::initPrefix(std::string &prefix,PREFIX_ITEM *it) {
it->isFull = false;
it->lPrefix = 0;
it->prefix = (char *)prefix.c_str();
it->prefixLength = prefix.length();
it->prefixLength = (int)prefix.length();
it->found = false;

return true;
Expand Down Expand Up @@ -1006,7 +1007,7 @@ void VanitySearch::FindKeyGPU(TH_PARAM *ph) {

// Global init
int thId = ph->threadId;
GPUEngine g(ph->gridSize, ph->gpuId);
GPUEngine g(ph->gridSize, ph->gpuId, maxFound);
int nbThread = g.GetNbThread();
Point *p = new Point[nbThread];
Int *keys = new Int[nbThread];
Expand Down
3 changes: 2 additions & 1 deletion Vanity.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class VanitySearch {
public:

VanitySearch(Secp256K1 &secp, std::vector<std::string> &prefix, std::string seed, int searchMode,
bool useGpu,bool stop,std::string outputFile, bool useSSE);
bool useGpu,bool stop,std::string outputFile, bool useSSE,uint32_t maxFound);
void Search(int nbThread,std::vector<int> gpuId,std::vector<int> gridSize);
void FindKeyCPU(TH_PARAM *p);
void FindKeyGPU(TH_PARAM *p);
Expand Down Expand Up @@ -104,6 +104,7 @@ class VanitySearch {
std::string outputFile;
bool useSSE;
bool onlyFull;
uint32_t maxFound;
double _difficulty;
std::vector<PREFIX_TABLE_ITEM> prefixes;
std::vector<prefix_t> usedPrefix;
Expand Down
Loading

0 comments on commit e1d8a9c

Please sign in to comment.