diff --git a/WifSolverCuda/main.cu b/WifSolverCuda/main.cu index 6e7a3f4..8719102 100644 --- a/WifSolverCuda/main.cu +++ b/WifSolverCuda/main.cu @@ -15,7 +15,6 @@ #include "lib/SECP256k1.h" - using namespace std; void processCandidate(Int& toTest); @@ -32,7 +31,6 @@ void restoreSettings(string fileStatusRestore); cudaError_t processCuda(); - int DEVICE_NR = 0; unsigned int BLOCK_THREADS = 0; unsigned int BLOCK_NUMBER = 0; @@ -74,7 +72,8 @@ Secp256K1* secp; int main(int argc, char** argv) { - printf("WifSolver 0.4.8\n\n"); + printf("WifSolver 0.4.9\n\n"); + printf("Use parameter '-h' for help and list of available parameters\n\n"); if (readArgs(argc, argv)) { showHelp(); @@ -83,6 +82,7 @@ int main(int argc, char** argv) } if (showDevices) { listDevices(); + printFooter(); return 0; } if (DECODE) { @@ -167,15 +167,18 @@ cudaError_t processCuda() { const uint32_t expectedChecksum = IS_CHECKSUM ? CHECKSUM.GetInt32() : 0; uint64_t counter = 0; - + bool anyResult = false; + + size_t RANGE_TRANSFER_SIZE = NB64BLOCK * sizeof(uint64_t); + size_t COLLECTOR_TRANSFER_SIZE = COLLECTOR_SIZE * sizeof(uint64_t); + std::chrono::steady_clock::time_point beginCountHashrate = std::chrono::steady_clock::now(); std::chrono::steady_clock::time_point beginCountStatus = std::chrono::steady_clock::now(); - - while (!RESULT && RANGE_START.IsLower(&RANGE_END)) { + while (!RESULT && RANGE_START.IsLower(&RANGE_END)) { //prepare launch __Load(buffRangeStart, RANGE_START.bits64); - cudaStatus = cudaMemcpy(dev_buffRangeStart, buffRangeStart, NB64BLOCK * sizeof(uint64_t), cudaMemcpyHostToDevice); + cudaStatus = cudaMemcpy(dev_buffRangeStart, buffRangeStart, RANGE_TRANSFER_SIZE, cudaMemcpyHostToDevice); //launch work if (COMPRESSED) { if (IS_CHECKSUM) { @@ -205,45 +208,46 @@ cudaError_t processCuda() { //if (useCollector) { //summarize results - cudaStatus = cudaMemcpy(buffCollectorWork, dev_buffCollectorWork, 1 * sizeof(bool), cudaMemcpyDeviceToHost); - bool anyResult = buffCollectorWork[0]; - buffCollectorWork[0] = false; - cudaStatus = cudaMemcpyAsync(dev_buffCollectorWork, buffCollectorWork, 1 * sizeof(bool), cudaMemcpyHostToDevice); - if (anyResult) { + cudaStatus = cudaMemcpy(buffCollectorWork, dev_buffCollectorWork, sizeof(bool), cudaMemcpyDeviceToHost); + if (buffCollectorWork[0]) { + anyResult = true; + buffCollectorWork[0] = false; + cudaStatus = cudaMemcpyAsync(dev_buffCollectorWork, buffCollectorWork, sizeof(bool), cudaMemcpyHostToDevice); for (int i = 0; i < COLLECTOR_SIZE; i++) { buffResult[i] = 0; } - cudaStatus = cudaMemcpy(dev_buffResult, buffResult, COLLECTOR_SIZE * sizeof(uint64_t), cudaMemcpyHostToDevice); - } - while (anyResult && !RESULT) { - resultCollector << > > (dev_buffDeviceResult, dev_buffResult, THREAD_STEPS * BLOCK_THREADS); - cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "kernel launch failed: %s\n", cudaGetErrorString(cudaStatus)); - goto Error; - } - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel!\n", cudaStatus); - goto Error; - } - cudaStatus = cudaMemcpy(buffResult, dev_buffResult, COLLECTOR_SIZE * sizeof(uint64_t), cudaMemcpyDeviceToHost); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMemcpy failed!"); - goto Error; - } - anyResult = false; - for (int i = 0; i < COLLECTOR_SIZE; i++) { - if (buffResult[i] != 0xffffffffffff) { - Int toTest = new Int(&RANGE_START); - Int diff = new Int(&STRIDE); - diff.Mult(buffResult[i]); - toTest.Add(&diff); - processCandidate(toTest); - anyResult = true; + cudaStatus = cudaMemcpy(dev_buffResult, buffResult, COLLECTOR_TRANSFER_SIZE, cudaMemcpyHostToDevice); + while (anyResult && !RESULT) { + resultCollector << > > (dev_buffDeviceResult, dev_buffResult, THREAD_STEPS * BLOCK_THREADS); + cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "kernel 'resultCollector' launch failed: %s\n", cudaGetErrorString(cudaStatus)); + goto Error; } - } - } + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize 'resultCollector' returned error code %d after launching kernel!\n", cudaStatus); + goto Error; + } + cudaStatus = cudaMemcpy(buffResult, dev_buffResult, COLLECTOR_TRANSFER_SIZE, cudaMemcpyDeviceToHost); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + anyResult = false; + + for (int i = 0; i < COLLECTOR_SIZE; i++) { + if (buffResult[i] != 0xffffffffffff) { + Int toTest = new Int(&RANGE_START); + Int diff = new Int(&STRIDE); + diff.Mult(buffResult[i]); + toTest.Add(&diff); + processCandidate(toTest); + anyResult = true; + } + } + }//while + }//anyResult to test //} /*else { //pure output, for debug @@ -265,14 +269,14 @@ cudaError_t processCuda() { RANGE_START.Add(&loopStride); counter += outputSize; int64_t tHash = std::chrono::duration_cast(std::chrono::steady_clock::now() - beginCountHashrate).count(); - int64_t tStatus = std::chrono::duration_cast(std::chrono::steady_clock::now() - beginCountStatus).count(); + //int64_t tStatus = std::chrono::duration_cast(std::chrono::steady_clock::now() - beginCountStatus).count(); if (tHash > 5) { double speed = (double)((double)counter / tHash) / 1000000.0; printSpeed(speed); counter = 0; beginCountHashrate = std::chrono::steady_clock::now(); } - if (tStatus > fileStatusInterval) { + if (std::chrono::duration_cast(std::chrono::steady_clock::now() - beginCountStatus).count() > fileStatusInterval) { saveStatus(); beginCountStatus = std::chrono::steady_clock::now(); } @@ -476,6 +480,9 @@ void printConfig() { printf("Range start: %s\n", RANGE_START.GetBase16().c_str()); printf("Range end : %s\n", RANGE_END.GetBase16().c_str()); printf("Stride : %s\n", STRIDE.GetBase16().c_str()); + if (IS_CHECKSUM) { + printf("Checksum : %s\n", CHECKSUM.GetBase16().c_str()); + } if (!TARGET_ADDRESS.empty()) { printf( "Target : %s\n", TARGET_ADDRESS.c_str()); } @@ -483,10 +490,7 @@ void printConfig() { printf("Target COMPRESSED\n"); } else { printf("Target UNCOMPRESSED\n"); - } - if (IS_CHECKSUM) { - printf("Checksum : %s\n", CHECKSUM.GetBase16().c_str()); - } + } printf( "\n"); printf( "number of blocks: %d\n", BLOCK_NUMBER); printf( "number of threads: %d\n", BLOCK_THREADS); @@ -508,11 +512,12 @@ bool checkDevice() { else { cudaDeviceProp props; cudaStatus = cudaGetDeviceProperties(&props, DEVICE_NR); - printf("Using:\n"); + printf("Using GPU nr %d:\n", DEVICE_NR); printf("%s (%2d procs)\n", props.name, props.multiProcessorCount); printf("maxThreadsPerBlock: %2d\n\n", props.maxThreadsPerBlock); if (BLOCK_NUMBER == 0) { BLOCK_NUMBER = props.multiProcessorCount * 8; + } if (BLOCK_THREADS == 0) { BLOCK_THREADS = (props.maxThreadsPerBlock / 8) * 5; @@ -588,6 +593,9 @@ bool readArgs(int argc, char** argv) { } else if (strcmp(argv[a], "-u") == 0) { COMPRESSED = false; + if (p2sh) { + COMPRESSED = true; + } } else if (strcmp(argv[a], "-t") == 0) { a++; @@ -637,6 +645,7 @@ bool readArgs(int argc, char** argv) { TARGET_ADDRESS = string(argv[a]); if (argv[a][0] == '3') { p2sh = true; + COMPRESSED = true; } } else if (strcmp(argv[a], "-checksum") == 0) { @@ -697,6 +706,7 @@ void listDevices() { printf("Device Number: %d\n", i); printf(" %s\n", prop.name); printf(" %2d procs\n", prop.multiProcessorCount); - printf(" maxThreadsPerBlock: %2d\n\n", prop.maxThreadsPerBlock); + printf(" maxThreadsPerBlock: %2d\n", prop.maxThreadsPerBlock); + printf(" version majorminor: %d%d\n\n", prop.major, prop.minor); } } \ No newline at end of file