From 619b9827dec2c2a087f9037d311ea69a6ca131fc Mon Sep 17 00:00:00 2001 From: Marshall Lochbaum Date: Thu, 8 Jul 2021 16:51:39 -0400 Subject: [PATCH 1/2] Faster random deal, and sorted subset --- src/builtins/sysfn.c | 129 ++++++++++++++++++++++++++++++------------- 1 file changed, 92 insertions(+), 37 deletions(-) diff --git a/src/builtins/sysfn.c b/src/builtins/sysfn.c index b94f292d..f2cf7546 100644 --- a/src/builtins/sysfn.c +++ b/src/builtins/sysfn.c @@ -260,34 +260,56 @@ B rand_deal_c2(B t, B w, B x) { if (RARE(xi<0)) thrM("(rand).Deal: 𝕩 cannot be negative"); if (RARE(wi>xi)) thrM("(rand).Deal: 𝕨 cannot exceed 𝕩"); if (wi==0) return emptyIVec(); + B r; RAND_START; - TALLOC(i32,s,xi); - for (i64 i = 0; i < xi; i++) s[i] = i; - for (i64 i = 0; i < wi; i++) { - i32 j = wy2u0k(wyrand(&seed), xi-i) + i; - i32 c = s[j]; - s[j] = s[i]; - s[i] = c; + if (wi > xi/64) { + // Dense shuffle + TALLOC(i32,s,xi); + for (i64 i = 0; i < xi; i++) s[i] = i; + for (i64 i = 0; i < wi; i++) { + i32 j = wy2u0k(wyrand(&seed), xi-i) + i; + i32 c = s[j]; + s[j] = s[i]; + s[i] = c; + } + i32* rp; r = m_i32arrv(&rp, wi); + memcpy(rp, s, wi*4); + TFREE(s); + } else { + // Hash-based shuffle + i32* rp; r = m_i32arrv(&rp, wi); + i64 sz = 1; + while (sz < wi*2) sz*= 2; + TALLOC(i32, hash, 2*sz); i32* val = hash+1; + for (u64 i = 0; i < 2*sz; i++) hash[i] = 0; + for (i64 i = 0; i < wi; i++) rp[i] = i; + u64 mask = 2*(sz-1); + for (i64 i = 0; i < wi; i++) { + u64 j = wy2u0k(wyrand(&seed), xi-i) + i; + if (jxi)) thrM("(rand).Subset: 𝕨 cannot exceed 𝕩"); if (wi==0) return emptyIVec(); + B r; + if (wi==xi) { // Only one complete subset; will hang without this + i32* rp; r = m_i32arrv(&rp, wi); + for (u64 i = 0; i < wi; i++) rp[i] = i; + return r; + } + bool invert = wi > xi/2; + i32 wn = invert ? xi-wi : wi; RAND_START; - i32* rp; B r = m_i32arrv(&rp, wi); - i64 sz = 1; - while (sz < wi*2) sz*= 2; - sz*= 2; - H_i2i* map = m_i2i(sz); - for (i64 i = 0; i < wi; i++) rp[i] = i; - for (i64 i = 0; i < wi; i++) { - i32 j = wy2u0k(wyrand(&seed), xi-i) + i; - if (j xi/8) { + // Bit set (as bytes) + TALLOC(u8, set, xi); + for (u64 i = 0; i < xi; i++) set[i] = 0; + for (i32 i = xi-wn; i < xi; i++) { + i32 j = wy2u0k(wyrand(&seed), i+1); + if (set[j]) j=i; + set[j] = 1; + } + OUTPUT(set[i], xi, i); + TFREE(set); + } else { + // Sorted "hash" set + u64 sh = 0; + for (u64 xt=xi/4; xt>=wn; xt>>=1) sh++; + u64 sz = ((xi-1)>>sh)+1 + wn; + TALLOC(i32, hash, sz); + for (u64 i = 0; i < sz; i++) hash[i] = xi; + for (i32 i = xi-wn; i < xi; i++) { + i32 j = wy2u0k(wyrand(&seed), i+1); + u64 p = (u64)j >> sh; + while (true) { + i32 h = hash[p]; + if (LIKELY(j>sh; continue; } + p++; + } } + OUTPUT(hash[i]!=xi, sz, hash[i]); + TFREE(hash); } - free_i2i(map); + #undef FILT + #undef OUTPUT RAND_END; return r; } From 3546419816d4d311417b1d7efc2b3e69f031b637 Mon Sep 17 00:00:00 2001 From: Marshall Lochbaum Date: Thu, 8 Jul 2021 17:54:34 -0400 Subject: [PATCH 2/2] Use the correct complement algorithm for the hash set --- src/builtins/sysfn.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/builtins/sysfn.c b/src/builtins/sysfn.c index f2cf7546..59ba601a 100644 --- a/src/builtins/sysfn.c +++ b/src/builtins/sysfn.c @@ -326,11 +326,6 @@ B rand_subset_c2(B t, B w, B x) { bool invert = wi > xi/2; i32 wn = invert ? xi-wi : wi; RAND_START; - #define FILTER(COND, SIZE, ELT) \ - for (u64 i = 0; i < SIZE; i++) if (COND) *rp++=ELT; - #define OUTPUT(COND, S,E) \ - i32* rp; r = m_i32arrv(&rp, wi); \ - if (invert) { FILTER(!(COND), S,E); } else { FILTER(COND, S,E); } if (wn > xi/8) { // Bit set (as bytes) TALLOC(u8, set, xi); @@ -340,7 +335,9 @@ B rand_subset_c2(B t, B w, B x) { if (set[j]) j=i; set[j] = 1; } - OUTPUT(set[i], xi, i); + i32* rp; r = m_i32arrv(&rp, wi); + if (!invert) { for (u64 i = 0; i < xi; i++) if ( set[i]) *rp++=i; } + else { for (u64 i = 0; i < xi; i++) if (!set[i]) *rp++=i; } TFREE(set); } else { // Sorted "hash" set @@ -363,11 +360,19 @@ B rand_subset_c2(B t, B w, B x) { p++; } } - OUTPUT(hash[i]!=xi, sz, hash[i]); + i32* rp; r = m_i32arrv(&rp, wi); + if (!invert) { + for (u64 i = 0; i < sz; i++) if (hash[i]!=xi) *rp++=hash[i]; + } else { + i32 e = 0; + for (u64 i = 0; i < sz; i++) { + i32 f = hash[i]; + if (f!=xi) { while (e