forked from cryptonotefoundation/cryptonote
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
2,517 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#ifndef BLAKE3_H | ||
#define BLAKE3_H | ||
|
||
#include <stddef.h> | ||
#include <stdint.h> | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
#define BLAKE3_NO_AVX512 1 | ||
#define BLAKE3_NO_AVX2 1 | ||
#define BLAKE3_NO_SSE41 1 | ||
#define BLAKE3_NO_SSE2 1 | ||
|
||
#define BLAKE3_VERSION_STRING "1.3.1" | ||
#define BLAKE3_KEY_LEN 32 | ||
#define BLAKE3_OUT_LEN 32 | ||
#define BLAKE3_BLOCK_LEN 64 | ||
#define BLAKE3_CHUNK_LEN 1024 | ||
#define BLAKE3_MAX_DEPTH 54 | ||
|
||
// This struct is a private implementation detail. It has to be here because | ||
// it's part of blake3_hasher below. | ||
typedef struct { | ||
uint32_t cv[8]; | ||
uint64_t chunk_counter; | ||
uint8_t buf[BLAKE3_BLOCK_LEN]; | ||
uint8_t buf_len; | ||
uint8_t blocks_compressed; | ||
uint8_t flags; | ||
} blake3_chunk_state; | ||
|
||
typedef struct { | ||
uint32_t key[8]; | ||
blake3_chunk_state chunk; | ||
uint8_t cv_stack_len; | ||
// The stack size is MAX_DEPTH + 1 because we do lazy merging. For example, | ||
// with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk | ||
// requires a 4th entry, rather than merging everything down to 1, because we | ||
// don't know whether more input is coming. This is different from how the | ||
// reference implementation does things. | ||
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN]; | ||
} blake3_hasher; | ||
|
||
const char *blake3_version(void); | ||
void blake3_hasher_init(blake3_hasher *self); | ||
void blake3_hasher_init_keyed(blake3_hasher *self, | ||
const uint8_t key[BLAKE3_KEY_LEN]); | ||
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context); | ||
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context, | ||
size_t context_len); | ||
void blake3_hasher_update(blake3_hasher *self, const void *input, | ||
size_t input_len); | ||
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out, | ||
size_t out_len); | ||
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek, | ||
uint8_t *out, size_t out_len); | ||
void blake3_hasher_reset(blake3_hasher *self); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
||
#endif /* BLAKE3_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,277 @@ | ||
#include <stdbool.h> | ||
#include <stddef.h> | ||
#include <stdint.h> | ||
|
||
#include "blake3_impl.h" | ||
|
||
#if defined(IS_X86) | ||
#if defined(_MSC_VER) | ||
#include <intrin.h> | ||
#elif defined(__GNUC__) | ||
#include <immintrin.h> | ||
#else | ||
#error "Unimplemented!" | ||
#endif | ||
#endif | ||
|
||
#define MAYBE_UNUSED(x) (void)((x)) | ||
|
||
#if defined(IS_X86) | ||
static uint64_t xgetbv() { | ||
#if defined(_MSC_VER) | ||
return _xgetbv(0); | ||
#else | ||
uint32_t eax = 0, edx = 0; | ||
__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0)); | ||
return ((uint64_t)edx << 32) | eax; | ||
#endif | ||
} | ||
|
||
static void cpuid(uint32_t out[4], uint32_t id) { | ||
#if defined(_MSC_VER) | ||
__cpuid((int *)out, id); | ||
#elif defined(__i386__) || defined(_M_IX86) | ||
__asm__ __volatile__("movl %%ebx, %1\n" | ||
"cpuid\n" | ||
"xchgl %1, %%ebx\n" | ||
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) | ||
: "a"(id)); | ||
#else | ||
__asm__ __volatile__("cpuid\n" | ||
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) | ||
: "a"(id)); | ||
#endif | ||
} | ||
|
||
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) { | ||
#if defined(_MSC_VER) | ||
__cpuidex((int *)out, id, sid); | ||
#elif defined(__i386__) || defined(_M_IX86) | ||
__asm__ __volatile__("movl %%ebx, %1\n" | ||
"cpuid\n" | ||
"xchgl %1, %%ebx\n" | ||
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) | ||
: "a"(id), "c"(sid)); | ||
#else | ||
__asm__ __volatile__("cpuid\n" | ||
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) | ||
: "a"(id), "c"(sid)); | ||
#endif | ||
} | ||
|
||
#endif | ||
|
||
enum cpu_feature { | ||
ZERO = 0, | ||
SSE2 = 1 << 0, | ||
SSSE3 = 1 << 1, | ||
SSE41 = 1 << 2, | ||
AVX = 1 << 3, | ||
AVX2 = 1 << 4, | ||
AVX512F = 1 << 5, | ||
AVX512VL = 1 << 6, | ||
/* ... */ | ||
UNDEFINED = 1 << 30 | ||
}; | ||
|
||
#if !defined(BLAKE3_TESTING) | ||
static /* Allow the variable to be controlled manually for testing */ | ||
#endif | ||
enum cpu_feature g_cpu_features = UNDEFINED; | ||
|
||
#if !defined(BLAKE3_TESTING) | ||
static | ||
#endif | ||
enum cpu_feature | ||
get_cpu_features() { | ||
|
||
if (g_cpu_features != UNDEFINED) { | ||
return g_cpu_features; | ||
} else { | ||
#if defined(IS_X86) | ||
uint32_t regs[4] = {0}; | ||
uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3]; | ||
(void)edx; | ||
enum cpu_feature features = 0; | ||
cpuid(regs, 0); | ||
const int max_id = *eax; | ||
cpuid(regs, 1); | ||
#if defined(__amd64__) || defined(_M_X64) | ||
features |= SSE2; | ||
#else | ||
if (*edx & (1UL << 26)) | ||
features |= SSE2; | ||
#endif | ||
if (*ecx & (1UL << 0)) | ||
features |= SSSE3; | ||
if (*ecx & (1UL << 19)) | ||
features |= SSE41; | ||
|
||
if (*ecx & (1UL << 27)) { // OSXSAVE | ||
const uint64_t mask = xgetbv(); | ||
if ((mask & 6) == 6) { // SSE and AVX states | ||
if (*ecx & (1UL << 28)) | ||
features |= AVX; | ||
if (max_id >= 7) { | ||
cpuidex(regs, 7, 0); | ||
if (*ebx & (1UL << 5)) | ||
features |= AVX2; | ||
if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm | ||
if (*ebx & (1UL << 31)) | ||
features |= AVX512VL; | ||
if (*ebx & (1UL << 16)) | ||
features |= AVX512F; | ||
} | ||
} | ||
} | ||
} | ||
g_cpu_features = features; | ||
return features; | ||
#else | ||
/* How to detect NEON? */ | ||
return ZERO; | ||
#endif | ||
} | ||
} | ||
|
||
void blake3_compress_in_place(uint32_t cv[8], | ||
const uint8_t block[BLAKE3_BLOCK_LEN], | ||
uint8_t block_len, uint64_t counter, | ||
uint8_t flags) { | ||
#if defined(IS_X86) | ||
const enum cpu_feature features = get_cpu_features(); | ||
MAYBE_UNUSED(features); | ||
#if !defined(BLAKE3_NO_AVX512) | ||
if (features & AVX512VL) { | ||
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags); | ||
return; | ||
} | ||
#endif | ||
#if !defined(BLAKE3_NO_SSE41) | ||
if (features & SSE41) { | ||
blake3_compress_in_place_sse41(cv, block, block_len, counter, flags); | ||
return; | ||
} | ||
#endif | ||
#if !defined(BLAKE3_NO_SSE2) | ||
if (features & SSE2) { | ||
blake3_compress_in_place_sse2(cv, block, block_len, counter, flags); | ||
return; | ||
} | ||
#endif | ||
#endif | ||
blake3_compress_in_place_portable(cv, block, block_len, counter, flags); | ||
} | ||
|
||
void blake3_compress_xof(const uint32_t cv[8], | ||
const uint8_t block[BLAKE3_BLOCK_LEN], | ||
uint8_t block_len, uint64_t counter, uint8_t flags, | ||
uint8_t out[64]) { | ||
#if defined(IS_X86) | ||
const enum cpu_feature features = get_cpu_features(); | ||
MAYBE_UNUSED(features); | ||
#if !defined(BLAKE3_NO_AVX512) | ||
if (features & AVX512VL) { | ||
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out); | ||
return; | ||
} | ||
#endif | ||
#if !defined(BLAKE3_NO_SSE41) | ||
if (features & SSE41) { | ||
blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out); | ||
return; | ||
} | ||
#endif | ||
#if !defined(BLAKE3_NO_SSE2) | ||
if (features & SSE2) { | ||
blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out); | ||
return; | ||
} | ||
#endif | ||
#endif | ||
blake3_compress_xof_portable(cv, block, block_len, counter, flags, out); | ||
} | ||
|
||
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, | ||
size_t blocks, const uint32_t key[8], uint64_t counter, | ||
bool increment_counter, uint8_t flags, | ||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) { | ||
#if defined(IS_X86) | ||
const enum cpu_feature features = get_cpu_features(); | ||
MAYBE_UNUSED(features); | ||
#if !defined(BLAKE3_NO_AVX512) | ||
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) { | ||
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, | ||
increment_counter, flags, flags_start, flags_end, | ||
out); | ||
return; | ||
} | ||
#endif | ||
#if !defined(BLAKE3_NO_AVX2) | ||
if (features & AVX2) { | ||
blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, | ||
increment_counter, flags, flags_start, flags_end, | ||
out); | ||
return; | ||
} | ||
#endif | ||
#if !defined(BLAKE3_NO_SSE41) | ||
if (features & SSE41) { | ||
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, | ||
increment_counter, flags, flags_start, flags_end, | ||
out); | ||
return; | ||
} | ||
#endif | ||
#if !defined(BLAKE3_NO_SSE2) | ||
if (features & SSE2) { | ||
blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter, | ||
increment_counter, flags, flags_start, flags_end, | ||
out); | ||
return; | ||
} | ||
#endif | ||
#endif | ||
|
||
#if BLAKE3_USE_NEON == 1 | ||
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter, | ||
increment_counter, flags, flags_start, flags_end, out); | ||
return; | ||
#endif | ||
|
||
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter, | ||
increment_counter, flags, flags_start, flags_end, | ||
out); | ||
} | ||
|
||
// The dynamically detected SIMD degree of the current platform. | ||
size_t blake3_simd_degree(void) { | ||
#if defined(IS_X86) | ||
const enum cpu_feature features = get_cpu_features(); | ||
MAYBE_UNUSED(features); | ||
#if !defined(BLAKE3_NO_AVX512) | ||
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) { | ||
return 16; | ||
} | ||
#endif | ||
#if !defined(BLAKE3_NO_AVX2) | ||
if (features & AVX2) { | ||
return 8; | ||
} | ||
#endif | ||
#if !defined(BLAKE3_NO_SSE41) | ||
if (features & SSE41) { | ||
return 4; | ||
} | ||
#endif | ||
#if !defined(BLAKE3_NO_SSE2) | ||
if (features & SSE2) { | ||
return 4; | ||
} | ||
#endif | ||
#endif | ||
#if BLAKE3_USE_NEON == 1 | ||
return 4; | ||
#endif | ||
return 1; | ||
} |
Oops, something went wrong.