Skip to content

Commit

Permalink
FishHash files
Browse files Browse the repository at this point in the history
  • Loading branch information
HashHound committed Jul 9, 2024
1 parent d2c083d commit 333e1bc
Show file tree
Hide file tree
Showing 11 changed files with 2,517 additions and 0 deletions.
619 changes: 619 additions & 0 deletions src/Fishhash/3rdParty/blake3.c

Large diffs are not rendered by default.

65 changes: 65 additions & 0 deletions src/Fishhash/3rdParty/blake3.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#ifndef BLAKE3_H
#define BLAKE3_H

#include <stddef.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

#define BLAKE3_NO_AVX512 1
#define BLAKE3_NO_AVX2 1
#define BLAKE3_NO_SSE41 1
#define BLAKE3_NO_SSE2 1

#define BLAKE3_VERSION_STRING "1.3.1"
#define BLAKE3_KEY_LEN 32
#define BLAKE3_OUT_LEN 32
#define BLAKE3_BLOCK_LEN 64
#define BLAKE3_CHUNK_LEN 1024
#define BLAKE3_MAX_DEPTH 54

// This struct is a private implementation detail. It has to be here because
// it's part of blake3_hasher below.
typedef struct {
uint32_t cv[8];
uint64_t chunk_counter;
uint8_t buf[BLAKE3_BLOCK_LEN];
uint8_t buf_len;
uint8_t blocks_compressed;
uint8_t flags;
} blake3_chunk_state;

typedef struct {
uint32_t key[8];
blake3_chunk_state chunk;
uint8_t cv_stack_len;
// The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,
// with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk
// requires a 4th entry, rather than merging everything down to 1, because we
// don't know whether more input is coming. This is different from how the
// reference implementation does things.
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
} blake3_hasher;

const char *blake3_version(void);
void blake3_hasher_init(blake3_hasher *self);
void blake3_hasher_init_keyed(blake3_hasher *self,
const uint8_t key[BLAKE3_KEY_LEN]);
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
size_t context_len);
void blake3_hasher_update(blake3_hasher *self, const void *input,
size_t input_len);
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
size_t out_len);
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
uint8_t *out, size_t out_len);
void blake3_hasher_reset(blake3_hasher *self);

#ifdef __cplusplus
}
#endif

#endif /* BLAKE3_H */
277 changes: 277 additions & 0 deletions src/Fishhash/3rdParty/blake3_dispatch.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

#include "blake3_impl.h"

#if defined(IS_X86)
#if defined(_MSC_VER)
#include <intrin.h>
#elif defined(__GNUC__)
#include <immintrin.h>
#else
#error "Unimplemented!"
#endif
#endif

#define MAYBE_UNUSED(x) (void)((x))

#if defined(IS_X86)
static uint64_t xgetbv() {
#if defined(_MSC_VER)
return _xgetbv(0);
#else
uint32_t eax = 0, edx = 0;
__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64_t)edx << 32) | eax;
#endif
}

static void cpuid(uint32_t out[4], uint32_t id) {
#if defined(_MSC_VER)
__cpuid((int *)out, id);
#elif defined(__i386__) || defined(_M_IX86)
__asm__ __volatile__("movl %%ebx, %1\n"
"cpuid\n"
"xchgl %1, %%ebx\n"
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
: "a"(id));
#else
__asm__ __volatile__("cpuid\n"
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
: "a"(id));
#endif
}

static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
#if defined(_MSC_VER)
__cpuidex((int *)out, id, sid);
#elif defined(__i386__) || defined(_M_IX86)
__asm__ __volatile__("movl %%ebx, %1\n"
"cpuid\n"
"xchgl %1, %%ebx\n"
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
: "a"(id), "c"(sid));
#else
__asm__ __volatile__("cpuid\n"
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
: "a"(id), "c"(sid));
#endif
}

#endif

enum cpu_feature {
ZERO = 0,
SSE2 = 1 << 0,
SSSE3 = 1 << 1,
SSE41 = 1 << 2,
AVX = 1 << 3,
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
/* ... */
UNDEFINED = 1 << 30
};

#if !defined(BLAKE3_TESTING)
static /* Allow the variable to be controlled manually for testing */
#endif
enum cpu_feature g_cpu_features = UNDEFINED;

#if !defined(BLAKE3_TESTING)
static
#endif
enum cpu_feature
get_cpu_features() {

if (g_cpu_features != UNDEFINED) {
return g_cpu_features;
} else {
#if defined(IS_X86)
uint32_t regs[4] = {0};
uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
(void)edx;
enum cpu_feature features = 0;
cpuid(regs, 0);
const int max_id = *eax;
cpuid(regs, 1);
#if defined(__amd64__) || defined(_M_X64)
features |= SSE2;
#else
if (*edx & (1UL << 26))
features |= SSE2;
#endif
if (*ecx & (1UL << 0))
features |= SSSE3;
if (*ecx & (1UL << 19))
features |= SSE41;

if (*ecx & (1UL << 27)) { // OSXSAVE
const uint64_t mask = xgetbv();
if ((mask & 6) == 6) { // SSE and AVX states
if (*ecx & (1UL << 28))
features |= AVX;
if (max_id >= 7) {
cpuidex(regs, 7, 0);
if (*ebx & (1UL << 5))
features |= AVX2;
if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
if (*ebx & (1UL << 31))
features |= AVX512VL;
if (*ebx & (1UL << 16))
features |= AVX512F;
}
}
}
}
g_cpu_features = features;
return features;
#else
/* How to detect NEON? */
return ZERO;
#endif
}
}

void blake3_compress_in_place(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter,
uint8_t flags) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if (features & AVX512VL) {
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
return;
}
#endif
#if !defined(BLAKE3_NO_SSE41)
if (features & SSE41) {
blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
return;
}
#endif
#if !defined(BLAKE3_NO_SSE2)
if (features & SSE2) {
blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);
return;
}
#endif
#endif
blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
}

void blake3_compress_xof(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter, uint8_t flags,
uint8_t out[64]) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if (features & AVX512VL) {
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
return;
}
#endif
#if !defined(BLAKE3_NO_SSE41)
if (features & SSE41) {
blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
return;
}
#endif
#if !defined(BLAKE3_NO_SSE2)
if (features & SSE2) {
blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);
return;
}
#endif
#endif
blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
}

void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8], uint64_t counter,
bool increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
return;
}
#endif
#if !defined(BLAKE3_NO_AVX2)
if (features & AVX2) {
blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
return;
}
#endif
#if !defined(BLAKE3_NO_SSE41)
if (features & SSE41) {
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
return;
}
#endif
#if !defined(BLAKE3_NO_SSE2)
if (features & SSE2) {
blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
return;
}
#endif
#endif

#if BLAKE3_USE_NEON == 1
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
#endif

blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
}

// The dynamically detected SIMD degree of the current platform.
size_t blake3_simd_degree(void) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
return 16;
}
#endif
#if !defined(BLAKE3_NO_AVX2)
if (features & AVX2) {
return 8;
}
#endif
#if !defined(BLAKE3_NO_SSE41)
if (features & SSE41) {
return 4;
}
#endif
#if !defined(BLAKE3_NO_SSE2)
if (features & SSE2) {
return 4;
}
#endif
#endif
#if BLAKE3_USE_NEON == 1
return 4;
#endif
return 1;
}
Loading

0 comments on commit 333e1bc

Please sign in to comment.