From 155a6c884661f0de790a19c6648a16479cba7c95 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 24 Feb 2014 17:32:18 -0500 Subject: [PATCH 01/18] WIP: new approach to efficiently hashing 1, 1.0, big(1), the same. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The major change is that numeric values that are mathematically equal hash the same. This is tricky to do in a way that allows the hashing of Int64, Float64, Uint64 and such common numeric types to all be fast – nearly as fast as just applying the core hash to them as raw bits. Although tests pass, this is an inherently half-baked state since isequal and hash are now badly out of sync. Many decisions still need to be made about how to hash collections: does the collection type matter or just the element type of the collection? Or neither? This also does away with the bitmix function, instead favoring a Merkle-Damgård style of combining arbitrary amounts of data into a single, fixed-size hash value output. In a sense the hash function with two arguments replaces the bitmix function – you can give the result of hashing previous values as the second argument to hash and the result will depend on both in a difficult-to-predict way. --- base/base.jl | 3 +- base/bitarray.jl | 4 - base/complex.jl | 2 - base/dict.jl | 98 ---------------------- base/exports.jl | 1 - base/float16.jl | 2 - base/hashing.jl | 174 +++++++++++++++++++++++++++++++++++++++ base/hashing2.jl | 58 +++++++++++++ base/mpfr.jl | 22 +---- base/multi.jl | 2 +- base/multidimensional.jl | 4 +- base/pkg/types.jl | 2 +- base/precompile.jl | 3 - base/profile.jl | 7 +- base/random.jl | 6 +- base/range.jl | 4 - base/rational.jl | 2 - base/set.jl | 2 - base/string.jl | 16 ---- base/sysimg.jl | 4 + base/utf8proc.jl | 2 +- base/version.jl | 9 +- test/hashing.jl | 4 - 23 files changed, 261 insertions(+), 170 deletions(-) create mode 100644 base/hashing.jl create mode 100644 base/hashing2.jl diff --git a/base/base.jl b/base/base.jl index 1309006009ab1..004fcabd67e85 100644 --- a/base/base.jl +++ b/base/base.jl @@ -108,11 +108,12 @@ type Colon end const (:) = Colon() -hash(w::WeakRef) = hash(w.value) isequal(w::WeakRef, v::WeakRef) = isequal(w.value, v.value) isequal(w::WeakRef, v) = isequal(w.value, v) isequal(w, v::WeakRef) = isequal(w, v.value) +hash(w::WeakRef, h::Uint=zero(Uint)) = hash(w.value, h) + function finalizer(o::ANY, f::Union(Function,Ptr)) if isimmutable(o) error("objects of type ", typeof(o), " cannot be finalized") diff --git a/base/bitarray.jl b/base/bitarray.jl index 23fa71472a857..9f92e665ff195 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -1828,7 +1828,3 @@ end # hvcat -> use fallbacks in abstractarray.jl isequal(A::BitArray, B::BitArray) = (A == B) - -# Hashing - -hash(B::BitArray) = hash((size(B), B.chunks)) diff --git a/base/complex.jl b/base/complex.jl index 29c89042b653e..7203fc4788c43 100644 --- a/base/complex.jl +++ b/base/complex.jl @@ -99,8 +99,6 @@ end isequal(z::Complex, w::Complex) = isequal(real(z),real(w)) & isequal(imag(z),imag(w)) -hash(z::Complex) = bitmix(hash(real(z)),hash(imag(z))) - conj(z::Complex) = Complex(real(z),-imag(z)) abs(z::Complex) = hypot(real(z), imag(z)) abs2(z::Complex) = real(z)*real(z) + imag(z)*imag(z) diff --git a/base/dict.jl b/base/dict.jl index 945a4faa48df2..8527690430037 100644 --- a/base/dict.jl +++ b/base/dict.jl @@ -94,14 +94,6 @@ filter(f::Function, d::Associative) = filter!(f,copy(d)) eltype{K,V}(a::Associative{K,V}) = (K,V) -function hash(d::Associative) - h::Uint = 0 - for (k,v) in d - h $= bitmix(hash(k),~hash(v)) - end - h -end - function isequal(l::Associative, r::Associative) if isa(l,ObjectIdDict) != isa(r,ObjectIdDict) return false @@ -201,96 +193,6 @@ function length(d::ObjectIdDict) n end -# hashing - -function int32hash(n::Uint32) - local a::Uint32 = n - a = (a + 0x7ed55d16) + a << 12 - a = (a $ 0xc761c23c) $ a >> 19 - a = (a + 0x165667b1) + a << 5 - a = (a + 0xd3a2646c) $ a << 9 - a = (a + 0xfd7046c5) + a << 3 - a = (a $ 0xb55a4f09) $ a >> 16 - return a -end - -function int64hash(n::Uint64) - local a::Uint64 = n - a = ~a + (a << 21) - a = a $ (a >> 24) - a = (a + (a << 3)) + (a << 8) - a = a $ (a >> 14) - a = (a + (a << 2)) + (a << 4) - a = a $ (a >> 28) - a = a + (a << 31) - return a -end - -function int64to32hash(n::Uint64) - local key::Uint64 = n - key = ~key + (key << 18) - key = key $ (key >> 31) - key = key * 21 - key = key $ (key >> 11) - key = key + (key << 6 ) - key = key $ (key >> 22) - return uint32(key) -end - -bitmix(a::Union(Int32,Uint32), b::Union(Int32,Uint32)) = int64to32hash((uint64(a)<<32)|uint64(b)) -bitmix(a::Union(Int64,Uint64), b::Union(Int64, Uint64)) = int64hash(uint64(a$((b<<32)|(b>>>32)))) - -if WORD_SIZE == 64 - hash64(x::Float64) = int64hash(reinterpret(Uint64,x)) - hash64(x::Union(Int64,Uint64)) = int64hash(reinterpret(Uint64,x)) -else - hash64(x::Float64) = int64to32hash(reinterpret(Uint64,x)) - hash64(x::Union(Int64,Uint64)) = int64to32hash(reinterpret(Uint64,x)) -end - -hash(x::Union(Bool,Char,Int8,Uint8,Int16,Uint16,Int32,Uint32,Int64,Uint64)) = - hash64(uint64(x)) - -function hash(x::Integer) - h::Uint = hash(uint64(x&0xffffffffffffffff)) - if typemin(Int64) <= x <= typemax(Uint64) - return h - end - x >>>= 64 - while x != 0 && x != -1 - h = bitmix(h, hash(uint64(x&0xffffffffffffffff))) - x >>>= 64 - end - return h -end - -hash(x::Float32) = hash(reinterpret(Uint32, ifelse(isnan(x), NaN32, x))) -hash(x::Float64) = hash(reinterpret(Uint64, ifelse(isnan(x), NaN, x))) - -function hash(t::Tuple) - h::Uint = 0 - for i=1:length(t) - h = bitmix(h,int(hash(t[i]))+42) - end - return h -end - -function hash(a::AbstractArray) - h::Uint = hash(size(a))+1 - for i=1:length(a) - h = bitmix(h,int(hash(a[i]))) - end - return h -end - -# make sure Array{Bool} and BitArray can be equivalent -hash(a::AbstractArray{Bool}) = hash(bitpack(a)) - -hash(x::ANY) = object_id(x) - -hash(x::Expr) = bitmix(hash(x.head),hash(x.args)+43) - - # dict type Dict{K,V} <: Associative{K,V} diff --git a/base/exports.jl b/base/exports.jl index 8eef21bbbf561..e07b86ec8f352 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -279,7 +279,6 @@ export atanh, big, binomial, - bitmix, bool, bswap, cbrt, diff --git a/base/float16.jl b/base/float16.jl index 303529d2afe7d..158ed6dbf3e4c 100644 --- a/base/float16.jl +++ b/base/float16.jl @@ -139,5 +139,3 @@ hypot(a::Float16, b::Float16) = float16(hypot(float32(a), float32(b))) ldexp(a::Float16, b::Integer) = float16(ldexp(float32(a), b)) exponent(x::Float16) = exponent(float32(x)) ^(x::Float16, y::Integer) = x^float16(y) - -hash(x::Float16) = hash(reinterpret(Uint16, isnan(x) ? NaN16 : x)) diff --git a/base/hashing.jl b/base/hashing.jl new file mode 100644 index 0000000000000..09e0d48b65002 --- /dev/null +++ b/base/hashing.jl @@ -0,0 +1,174 @@ +## core data hashing functions ## + +function hash_uint(n::Uint64) + local a::Uint64 = n + a = ~a + a << 21 + a = a $ a >> 24 + a = a + a << 3 + a << 8 + a = a $ a >> 14 + a = a + a << 2 + a << 4 + a = a $ a >> 28 + a = a + a << 31 + return a +end + +function hash_uint(n::Uint32) + local a::Uint32 = n + a = a + 0x7ed55d16 + a << 12 + a = a $ 0xc761c23c $ a >> 19 + a = a + 0x165667b1 + a << 5 + a = a + 0xd3a2646c $ a << 9 + a = a + 0xfd7046c5 + a << 3 + a = a $ 0xb55a4f09 $ a >> 16 + return a +end + +## efficient value-based hashing of integers ## + +function hash_integer(n::Integer, h::Uint=zero(Uint)) + h = hash_uint(uint(n & typemax(Uint)) $ h) $ h + n = ifelse(n < 0, oftype(n,-n), n) + n >>>= sizeof(Uint) << 3 + while n != 0 + h = hash_uint(uint(n & typemax(Uint)) $ h) $ h + n >>>= sizeof(Uint) << 3 + end + return h +end + +## hashing rational values ## + +#= +`decompose(x)`: non-canonical decomposition of rational values as `den*2^pow/num`. + +The decompose function is the point where rational-valued numeric types that support +hashing hook into the hashing protocol. `decompose(x)` should return three integer +values `num, pow, den`, such that the value of `x` is mathematically equal to + + num*2^pow/den + +The decomposition need not be canonical in the sense that it just needs to be *some* +way to express `x` in this form, not any particular way – with the restriction that +`num` and `den` may not share any odd common factors. They may, however, have powers +of two in common – the generic hashing code will normalize those as necessary. + +Special values: + + - `x` is zero: `num` should be zero and `den` should have the same sign as `x` + - `x` is infinite: `den` should be zero and `num` should have the same sign as `x` + - `x` is not a number: `num` and `den` should both be zero +=# + +decompose(x::Integer) = x, 0, 1 +decompose(x::Rational) = num(x), 0, den(x) + +function decompose(x::Float32) + isnan(x) && return 0, 0, 0 + isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 + n = reinterpret(Int32, x) + s = int32(n & 0x007fffff) + e = int32(n & 0x7f800000 >> 23) + s |= int32(e != 0) << 23 + d = ifelse(signbit(n) == 1, -1, 1) + int(s), int(e - 150 + (e == 0)), d +end + +function decompose(x::Float64) + isnan(x) && return 0, 0, 0 + isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 + n = reinterpret(Int64, x) + s = int64(n & 0x000fffffffffffff) + e = int64(n & 0x7ff0000000000000 >> 52) + s |= int64(e != 0) << 52 + d = ifelse(signbit(n) == 1, -1, 1) + int(s), int(e - 1075 + (e == 0)), d +end + +# hashing methods for rational-valued types + +hx(a::Uint64, b::Float64, h::Uint) = hash_uint((3a + reinterpret(Uint64,b)) - h) + +hash(x::Uint64, h::Uint=zero(Uint)) = hx(x, float64(x), h) +hash(x::Int64, h::Uint=zero(Uint)) = hx(reinterpret(Uint64,x), float64(x), h) +hash(x::Float64, h::Uint=zero(Uint)) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(x==x,x,NaN), h) + +hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32)) = hash(int64(x)) +hash(x::Union(Float16,Float32)) = hash(float64(x)) + +const hash_NaN = hash(NaN) +const hash_pos_Inf = hash(+Inf) +const hash_neg_Inf = hash(-Inf) +const hash_pos_zero = hash(+0.) +const hash_neg_zero = hash(-0.) + +function hash(x::Real, h::Uint=zero(Uint)) + # decompose x as num*2^pow/den + num, pow, den = decompose(x)::(Integer,Integer,Integer) + + # handle special values + num == 0 && den == 0 && return hash(NaN, h) + if num == 0 + den > 0 && return hash(+0.0, h) + den < 0 && return hash(-0.0, h) + end + if den == 0 + num > 0 && return hash(+Inf, h) + num < 0 && return hash(-Inf, h) + end + + # normalize decomposition + if den < 0 + num = -num + den = -den + end + z = trailing_zeros(num) + if z != 0 + num >>= z + pow += z + end + z = trailing_zeros(den) + if z != 0 + den >>= z + pow -= z + end + + # handle values representable as Int64, Uint64, Float64 + if den == 1 + left = ndigits0z(num,2) + pow + right = trailing_zeros(num) + pow + if -1074 <= right + if 0 <= right && left <= 64 + left <= 63 && return hash(int64(num) << int(pow), h) + signbit(num) == signbit(den) && return hash(uint64(num) << int(pow), h) + end + left <= 1024 && left - right <= 53 && return hash(float64(num) * 2.0^pow, h) + end + end + + # handle "generic" real values + h = hash_integer(den, h) + h = hash_integer(pow, h) + h = hash_integer(num, h) + return h +end + +## hashing complex values ## + +const h_imag = 0x32a7a07f3e7cd1f9 +const hash_0_imag = hash(0, h_imag) + +function hash(z::Complex, h::Uint=zero(Uint)) + # TODO: with default argument specialization, this would be better: + # hash(real(z), h $ hash(imag(z), h $ h_imag) $ hash(0, h $ h_imag)) + hash(real(z), h $ hash(imag(z), h_imag) $ hash_0_imag) +end + +## special hashing for booleans and characters ## + +hash(x::Bool, h::Uint=zero(Uint)) = hash(int(x), h + 0x4cd135a1755139a5) +hash(x::Char, h::Uint=zero(Uint)) = hash(int(x), h + 0x10f989ff0f886f11) + +## expression hashing ## + +hash(x::Symbol, h::Uint=zero(Uint)) = hash(object_id(x), h) +hash(x::Expr, h::Uint=zero(Uint)) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6)) diff --git a/base/hashing2.jl b/base/hashing2.jl new file mode 100644 index 0000000000000..ebb459465655a --- /dev/null +++ b/base/hashing2.jl @@ -0,0 +1,58 @@ +## hashing BigInts, BigFloats, and Float16s ## + +function hash_integer(n::BigInt, h::Uint=zero(Uint)) + s = n.size + s == 0 && return hash_integer(0, h) + p = convert(Ptr{Uint}, n.d) + b = unsafe_load(p) + h = hash_uint(ifelse(s < 0, -b, b) $ h) $ h + for k = 2:abs(s) + h = hash_uint(unsafe_load(p, k) $ h) $ h + end + return h +end + +function decompose(x::BigFloat) + isnan(x) && return big(0), 0, 0 + isinf(x) && return big(x.sign), 0, 0 + x == 0 && return big(0), 0, int(x.sign) + s = BigInt() + ccall((:__gmpz_realloc2, :libgmp), Void, (Ptr{BigInt}, Culong), &s, x.prec) + s.size = -fld(-x.prec,(sizeof(Culong)<<3)) + ccall(:memcpy, Ptr{Void}, (Ptr{Void}, Ptr{Void}, Csize_t), s.d, x.d, s.size*sizeof(Culong)) + s, int(x.exp - x.prec), int(x.sign) +end + +hash(x::Float16, h::Uint=zero(Uint)) = hash(float64(x), h) + +## hashing strings ## + +function hash{T<:ByteString}(s::Union(T,SubString{T}), h::Uint=zero(Uint)) + h += 0x71e729fd56419c81 + ccall(:memhash_seed, Uint64, (Ptr{Void}, Int, Uint32), pointer(s), sizeof(s), h) + h +end +hash(s::String, h::Uint=zero(Uint)) = hash(bytestring(s), h) + +## hashing collections ## + +function hash(v::Union(Tuple,AbstractArray,Associative), h::Uint=zero(Uint)) + h += object_id(eltype(v)) + for x = v + h = hash(x, h) + end + return h +end + +hash(s::Set, h::Uint=zero(Uint)) = hash(sort(s.dict.keys[s.dict.slots .!= 0]), h) + +hash(r::Range{Bool}, h::Uint=zero(Uint)) = invoke(hash, (Range, Uint), r, h) +hash(B::BitArray, h::Uint=zero(Uint)) = hash((size(B),B.chunks), h) +hash(a::AbstractArray{Bool}, h::Uint=zero(Uint)) = hash(bitpack(a), h) + +# hashing ranges by component at worst leads to collisions for very similar ranges +hash{T<:Range}(r::T, h::Uint=zero(Uint)) = + hash(first(r), hash(step(r), hash(last(r), h + object_id(eltype(T))))) + +## hashing general objects and expressions ## + +hash(x::ANY, h::Uint=zero(Uint)) = hash(object_id(x), h) diff --git a/base/mpfr.jl b/base/mpfr.jl index 744c39e0d124e..d4720edd98ffd 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -16,7 +16,7 @@ import gamma, lgamma, digamma, erf, erfc, zeta, log1p, airyai, iceil, ifloor, itrunc, eps, signbit, sin, cos, tan, sec, csc, cot, acos, asin, atan, cosh, sinh, tanh, sech, csch, coth, acosh, asinh, atanh, atan2, - serialize, deserialize, inf, nan, hash, cbrt, typemax, typemin, + serialize, deserialize, inf, nan, cbrt, typemax, typemin, realmin, realmax, get_rounding, set_rounding, maxintfloat, widen import Base.GMP: ClongMax, CulongMax @@ -714,24 +714,4 @@ print(io::IO, b::BigFloat) = print(io, string(b)) show(io::IO, b::BigFloat) = print(io, string(b), " with $(precision(b)) bits of precision") showcompact(io::IO, b::BigFloat) = print(io, string(b)) -function hash(x::BigFloat) - if isnan(x) - return hash(NaN) - end - if isinf(x) - return hash(float64(x)) - end - n = ceil(precision(x)/53) - e = exponent(x) - h::Uint = signbit(x) - h = h<<30 + e - x = ldexp(x, -e) - for i=1:n - f64 = float64(x) - h = bitmix(h, hash(f64)$11111) - x -= f64 - end - h -end - end #module diff --git a/base/multi.jl b/base/multi.jl index 847cefa9d164e..243170317396f 100644 --- a/base/multi.jl +++ b/base/multi.jl @@ -448,7 +448,7 @@ type RemoteRef next_id() = (id=(myid(),REQ_ID); REQ_ID+=1; id) end -hash(r::RemoteRef) = hash(r.whence)+3*hash(r.id) +hash(r::RemoteRef, h::Uint=zero(Uint)) = hash(r.whence, hash(r.id, h)) isequal(r::RemoteRef, s::RemoteRef) = (r.whence==s.whence && r.id==s.id) rr2id(r::RemoteRef) = (r.whence, r.id) diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 0b78db5b085d0..cde16603f0e9b 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -480,6 +480,8 @@ end ## unique across dim +# TODO: this doesn't fit into the new hashing scheme in any obvious way + immutable Prehashed hash::Uint end @@ -492,7 +494,7 @@ hash(x::Prehashed) = x.hash # Compute hash for each row k = 0 @nloops N i A d->(if d == dim; k = i_d; end) begin - @inbounds hashes[k] = bitmix(hashes[k], hash((@nref N A i))) + @inbounds hashes[k] = hash(hashes[k], hash((@nref N A i))) end # Collect index of first row for each hash diff --git a/base/pkg/types.jl b/base/pkg/types.jl index c2d75ffe13a47..2f75639ed079b 100644 --- a/base/pkg/types.jl +++ b/base/pkg/types.jl @@ -44,7 +44,7 @@ function intersect(A::VersionSet, B::VersionSet) VersionSet(ivals) end isequal(A::VersionSet, B::VersionSet) = (A.intervals == B.intervals) -hash(s::VersionSet) = hash(s.intervals) +hash(s::VersionSet, h::Uint=zero(Uint)) = hash(s.intervals, h + 0x2fd2ca6efa023f44) deepcopy_internal(vs::VersionSet, ::ObjectIdDict) = VersionSet(copy(vs.intervals)) typealias Requires Dict{ByteString,VersionSet} diff --git a/base/precompile.jl b/base/precompile.jl index fcdfad42cea5f..44200d159789b 100644 --- a/base/precompile.jl +++ b/base/precompile.jl @@ -52,9 +52,6 @@ precompile(bool, (RemoteRef,)) precompile(wait, (RemoteRef,)) precompile(hash, (RemoteRef,)) precompile(take, (RemoteRef,)) -precompile(bitmix, (Int, Int)) -precompile(bitmix, (Uint, Int)) -precompile(bitmix, (Uint64, Int64)) precompile(hash, (Int,)) precompile(isequal, (Symbol, Symbol)) precompile(isequal, (Bool, Bool)) diff --git a/base/profile.jl b/base/profile.jl index 65277f42b1f7a..4fb6f2a9d8b92 100644 --- a/base/profile.jl +++ b/base/profile.jl @@ -76,7 +76,12 @@ const UNKNOWN = LineInfo("?", "?", -1) isequal(a::LineInfo, b::LineInfo) = a.line == b.line && a.func == b.func && a.file == b.file -hash(li::LineInfo) = bitmix(hash(li.func), bitmix(hash(li.file), hash(li.line))) +function hash(li::LineInfo, h::Uint=zero(Uint)) + h += 0xf4fbda67fe20ce88 + h = hash(li.line, h) + h = hash(li.file, h) + h = hash(li.func, h) +end # C wrappers start_timer() = ccall(:jl_profile_start_timer, Cint, ()) diff --git a/base/random.jl b/base/random.jl index 172322493d0a8..abc9757990b34 100644 --- a/base/random.jl +++ b/base/random.jl @@ -39,11 +39,9 @@ function __init__() catch println(STDERR, "Entropy pool not available to seed RNG; using ad-hoc entropy sources.") seed = reinterpret(Uint64, time()) - seed = bitmix(seed, uint64(getpid())) + seed = hash(seed, uint64(getpid())) try - seed = bitmix(seed, parseint(Uint64, readall(`ifconfig` |> `sha1sum`)[1:40], 16)) - catch - # ignore + seed = hash(seed, parseint(Uint64, readall(`ifconfig` |> `sha1sum`)[1:40], 16)) end srand(seed) end diff --git a/base/range.jl b/base/range.jl index c86611f9f093d..910637c880505 100644 --- a/base/range.jl +++ b/base/range.jl @@ -299,10 +299,6 @@ function ==(r::Range, s::Range) return true end -# hashing ranges by component at worst leads to collisions for very similar ranges -hash(r::Range) = - bitmix(hash(first(r)), bitmix(hash(step(r)), bitmix(hash(last(r)), uint(0xaaeeaaee)))) - # TODO: isless? intersect{T1<:Integer, T2<:Integer}(r::UnitRange{T1}, s::UnitRange{T2}) = max(r.start,s.start):min(last(r),last(s)) diff --git a/base/rational.jl b/base/rational.jl index 2c22105a99465..e626c4487c69c 100644 --- a/base/rational.jl +++ b/base/rational.jl @@ -110,8 +110,6 @@ typemax{T<:Integer}(::Type{Rational{T}}) = one(T)//zero(T) isinteger(x::Rational) = x.den == 1 -hash(x::Rational) = bitmix(hash(x.num), ~hash(x.den)) - -(x::Rational) = (-x.num) // x.den for op in (:+, :-, :rem, :mod) @eval begin diff --git a/base/set.jl b/base/set.jl index ee3421c78924c..a83cf3a1e98a6 100644 --- a/base/set.jl +++ b/base/set.jl @@ -107,5 +107,3 @@ function filter!(f::Function, s::Set) return s end filter(f::Function, s::Set) = filter!(f, copy(s)) - -hash(s::Set) = hash(sort(s.dict.keys[s.dict.slots .!= 0])) diff --git a/base/string.jl b/base/string.jl index 3ee7f3fa337ee..c7be7e7dea612 100644 --- a/base/string.jl +++ b/base/string.jl @@ -1678,19 +1678,3 @@ pointer{T<:ByteString}(x::SubString{T}, i::Integer) = pointer(x.string.data) + x pointer(x::Union(UTF16String,UTF32String), i::Integer) = pointer(x)+(i-1)*sizeof(eltype(x.data)) pointer{T<:Union(UTF16String,UTF32String)}(x::SubString{T}) = pointer(x.string.data) + x.offset*sizeof(eltype(x.data)) pointer{T<:Union(UTF16String,UTF32String)}(x::SubString{T}, i::Integer) = pointer(x.string.data) + (x.offset + (i-1))*sizeof(eltype(x.data)) - -# string hashing: -if WORD_SIZE == 64 - hash{T<:ByteString}(s::Union(T,SubString{T})) = - ccall(:memhash, Uint64, (Ptr{Void}, Int), pointer(s), sizeof(s)) - hash{T<:ByteString}(s::Union(T,SubString{T}), seed::Union(Int,Uint)) = - ccall(:memhash_seed, Uint64, (Ptr{Void}, Int, Uint32), - pointer(s), sizeof(s), uint32(seed)) -else - hash{T<:ByteString}(s::Union(T,SubString{T})) = - ccall(:memhash32, Uint32, (Ptr{Void}, Int), pointer(s), sizeof(s)) - hash{T<:ByteString}(s::Union(T,SubString{T}), seed::Union(Int,Uint)) = - ccall(:memhash32_seed, Uint32, (Ptr{Void}, Int, Uint32), - pointer(s), sizeof(s), uint32(seed)) -end -hash(s::String) = hash(bytestring(s)) diff --git a/base/sysimg.jl b/base/sysimg.jl index 0cc475085094e..add8cffd3ea0d 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -58,6 +58,7 @@ include("bitarray.jl") include("intset.jl") include("dict.jl") include("set.jl") +include("hashing.jl") include("iterator.jl") # compiler @@ -174,6 +175,9 @@ big(q::Rational) = big(num(q))//big(den(q)) big(z::Complex) = complex(big(real(z)),big(imag(z))) @vectorize_1arg Number big +# moer hashing definitions +include("hashing2.jl") + # random number generation and statistics include("statistics.jl") include("librandom.jl") diff --git a/base/utf8proc.jl b/base/utf8proc.jl index 32e5d21011b72..d1d7427289d3c 100644 --- a/base/utf8proc.jl +++ b/base/utf8proc.jl @@ -1,7 +1,7 @@ # Various Unicode functionality from the utf8proc library module UTF8proc -import Base: show, showcompact, ==, string, symbol, isless, hash +import Base: show, showcompact, ==, string, symbol, isless # also exported by Base: export normalize_string, is_valid_char, is_assigned_char diff --git a/base/version.jl b/base/version.jl index 9fd46b1821c04..86b55b2141c3b 100644 --- a/base/version.jl +++ b/base/version.jl @@ -145,7 +145,14 @@ function isless(a::VersionNumber, b::VersionNumber) return false end -hash(v::VersionNumber) = hash([v.(n) for n in VersionNumber.names]) +function hash(v::VersionNumber, h::Uint=zero(Uint)) + h += 0x8ff4ffdb75f9fede + h = hash(v.major, h) + h = hash(v.minor, h) + h = hash(v.patch, h) + h = hash(v.prerelease, ~h) + h = hash(v.build, ~h) +end lowerbound(v::VersionNumber) = VersionNumber(v.major, v.minor, v.patch, ("",), ()) upperbound(v::VersionNumber) = VersionNumber(v.major, v.minor, v.patch, (), ("",)) diff --git a/test/hashing.jl b/test/hashing.jl index f713ccd929131..6b2b767e54229 100644 --- a/test/hashing.jl +++ b/test/hashing.jl @@ -35,7 +35,3 @@ end @test hash(:(X.x)) != hash(:(X.y)) @test hash([1,2]) == hash(sub([1,2,3,4],1:2)) - -# make sure >>> is used -@test bitmix(2, -3) != bitmix(2, -4) -@test bitmix(-3, 2) != bitmix(-4, 2) From b3a7c84d8aada3d6524d606bfe8fd03228e829c8 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Thu, 24 Apr 2014 16:27:07 -0400 Subject: [PATCH 02/18] decompose: fix documentation of decompose for rational hashing. --- base/hashing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/hashing.jl b/base/hashing.jl index 09e0d48b65002..8b8fbbe73c9ae 100644 --- a/base/hashing.jl +++ b/base/hashing.jl @@ -39,7 +39,7 @@ end ## hashing rational values ## #= -`decompose(x)`: non-canonical decomposition of rational values as `den*2^pow/num`. +`decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`. The decompose function is the point where rational-valued numeric types that support hashing hook into the hashing protocol. `decompose(x)` should return three integer From dadc7cf62c815a904cfd4b22a7d1a91276b9fbec Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Thu, 24 Apr 2014 17:02:36 -0400 Subject: [PATCH 03/18] define hash(x) = hash(x, zero(Uint)) in a single place; fix bugs. --- base/base.jl | 2 +- base/hashing.jl | 34 ++++++++++++++++------------------ base/hashing2.jl | 22 +++++++++++----------- base/multi.jl | 2 +- base/pkg/types.jl | 2 +- base/profile.jl | 2 +- base/version.jl | 2 +- test/arrayops.jl | 2 +- test/collections.jl | 2 +- 9 files changed, 34 insertions(+), 36 deletions(-) diff --git a/base/base.jl b/base/base.jl index 004fcabd67e85..44bb21bcaa504 100644 --- a/base/base.jl +++ b/base/base.jl @@ -112,7 +112,7 @@ isequal(w::WeakRef, v::WeakRef) = isequal(w.value, v.value) isequal(w::WeakRef, v) = isequal(w.value, v) isequal(w, v::WeakRef) = isequal(w, v.value) -hash(w::WeakRef, h::Uint=zero(Uint)) = hash(w.value, h) +hash(w::WeakRef, h::Uint) = hash(w.value, h) function finalizer(o::ANY, f::Union(Function,Ptr)) if isimmutable(o) diff --git a/base/hashing.jl b/base/hashing.jl index 8b8fbbe73c9ae..bfd7b9b9e77f8 100644 --- a/base/hashing.jl +++ b/base/hashing.jl @@ -1,3 +1,7 @@ +## hashing a single value ## + +hash(x::Any) = hash(x, zero(Uint)) + ## core data hashing functions ## function hash_uint(n::Uint64) @@ -25,7 +29,7 @@ end ## efficient value-based hashing of integers ## -function hash_integer(n::Integer, h::Uint=zero(Uint)) +function hash_integer(n::Integer, h::Uint) h = hash_uint(uint(n & typemax(Uint)) $ h) $ h n = ifelse(n < 0, oftype(n,-n), n) n >>>= sizeof(Uint) << 3 @@ -88,20 +92,14 @@ end hx(a::Uint64, b::Float64, h::Uint) = hash_uint((3a + reinterpret(Uint64,b)) - h) -hash(x::Uint64, h::Uint=zero(Uint)) = hx(x, float64(x), h) -hash(x::Int64, h::Uint=zero(Uint)) = hx(reinterpret(Uint64,x), float64(x), h) -hash(x::Float64, h::Uint=zero(Uint)) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(x==x,x,NaN), h) - -hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32)) = hash(int64(x)) -hash(x::Union(Float16,Float32)) = hash(float64(x)) +hash(x::Uint64, h::Uint) = hx(x, float64(x), h) +hash(x::Int64, h::Uint) = hx(reinterpret(Uint64,x), float64(x), h) +hash(x::Float64, h::Uint) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(x==x,x,NaN), h) -const hash_NaN = hash(NaN) -const hash_pos_Inf = hash(+Inf) -const hash_neg_Inf = hash(-Inf) -const hash_pos_zero = hash(+0.) -const hash_neg_zero = hash(-0.) +hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32), h::Uint) = hash(int64(x), h) +hash(x::Float32, h::Uint) = hash(float64(x), h) -function hash(x::Real, h::Uint=zero(Uint)) +function hash(x::Real, h::Uint) # decompose x as num*2^pow/den num, pow, den = decompose(x)::(Integer,Integer,Integer) @@ -157,7 +155,7 @@ end const h_imag = 0x32a7a07f3e7cd1f9 const hash_0_imag = hash(0, h_imag) -function hash(z::Complex, h::Uint=zero(Uint)) +function hash(z::Complex, h::Uint) # TODO: with default argument specialization, this would be better: # hash(real(z), h $ hash(imag(z), h $ h_imag) $ hash(0, h $ h_imag)) hash(real(z), h $ hash(imag(z), h_imag) $ hash_0_imag) @@ -165,10 +163,10 @@ end ## special hashing for booleans and characters ## -hash(x::Bool, h::Uint=zero(Uint)) = hash(int(x), h + 0x4cd135a1755139a5) -hash(x::Char, h::Uint=zero(Uint)) = hash(int(x), h + 0x10f989ff0f886f11) +hash(x::Bool, h::Uint) = hash(int(x), h + 0x4cd135a1755139a5) +hash(x::Char, h::Uint) = hash(int(x), h + 0x10f989ff0f886f11) ## expression hashing ## -hash(x::Symbol, h::Uint=zero(Uint)) = hash(object_id(x), h) -hash(x::Expr, h::Uint=zero(Uint)) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6)) +hash(x::Symbol, h::Uint) = hash(object_id(x), h) +hash(x::Expr, h::Uint) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6)) diff --git a/base/hashing2.jl b/base/hashing2.jl index ebb459465655a..f962d57183ba1 100644 --- a/base/hashing2.jl +++ b/base/hashing2.jl @@ -1,6 +1,6 @@ ## hashing BigInts, BigFloats, and Float16s ## -function hash_integer(n::BigInt, h::Uint=zero(Uint)) +function hash_integer(n::BigInt, h::Uint) s = n.size s == 0 && return hash_integer(0, h) p = convert(Ptr{Uint}, n.d) @@ -23,19 +23,19 @@ function decompose(x::BigFloat) s, int(x.exp - x.prec), int(x.sign) end -hash(x::Float16, h::Uint=zero(Uint)) = hash(float64(x), h) +hash(x::Float16, h::Uint) = hash(float64(x), h) ## hashing strings ## -function hash{T<:ByteString}(s::Union(T,SubString{T}), h::Uint=zero(Uint)) +function hash{T<:ByteString}(s::Union(T,SubString{T}), h::Uint) h += 0x71e729fd56419c81 ccall(:memhash_seed, Uint64, (Ptr{Void}, Int, Uint32), pointer(s), sizeof(s), h) + h end -hash(s::String, h::Uint=zero(Uint)) = hash(bytestring(s), h) +hash(s::String, h::Uint) = hash(bytestring(s), h) ## hashing collections ## -function hash(v::Union(Tuple,AbstractArray,Associative), h::Uint=zero(Uint)) +function hash(v::Union(Tuple,AbstractArray,Associative), h::Uint) h += object_id(eltype(v)) for x = v h = hash(x, h) @@ -43,16 +43,16 @@ function hash(v::Union(Tuple,AbstractArray,Associative), h::Uint=zero(Uint)) return h end -hash(s::Set, h::Uint=zero(Uint)) = hash(sort(s.dict.keys[s.dict.slots .!= 0]), h) +hash(s::Set, h::Uint) = hash(sort(s.dict.keys[s.dict.slots .!= 0]), h) -hash(r::Range{Bool}, h::Uint=zero(Uint)) = invoke(hash, (Range, Uint), r, h) -hash(B::BitArray, h::Uint=zero(Uint)) = hash((size(B),B.chunks), h) -hash(a::AbstractArray{Bool}, h::Uint=zero(Uint)) = hash(bitpack(a), h) +hash(r::Range{Bool}, h::Uint) = invoke(hash, (Range, Uint), r, h) +hash(B::BitArray, h::Uint) = hash((size(B),B.chunks), h) +hash(a::AbstractArray{Bool}, h::Uint) = hash(bitpack(a), h) # hashing ranges by component at worst leads to collisions for very similar ranges -hash{T<:Range}(r::T, h::Uint=zero(Uint)) = +hash{T<:Range}(r::T, h::Uint) = hash(first(r), hash(step(r), hash(last(r), h + object_id(eltype(T))))) ## hashing general objects and expressions ## -hash(x::ANY, h::Uint=zero(Uint)) = hash(object_id(x), h) +hash(x::ANY, h::Uint) = hash(object_id(x), h) diff --git a/base/multi.jl b/base/multi.jl index 243170317396f..4027ab69d2b3a 100644 --- a/base/multi.jl +++ b/base/multi.jl @@ -448,7 +448,7 @@ type RemoteRef next_id() = (id=(myid(),REQ_ID); REQ_ID+=1; id) end -hash(r::RemoteRef, h::Uint=zero(Uint)) = hash(r.whence, hash(r.id, h)) +hash(r::RemoteRef, h::Uint) = hash(r.whence, hash(r.id, h)) isequal(r::RemoteRef, s::RemoteRef) = (r.whence==s.whence && r.id==s.id) rr2id(r::RemoteRef) = (r.whence, r.id) diff --git a/base/pkg/types.jl b/base/pkg/types.jl index 2f75639ed079b..eaf172c76f915 100644 --- a/base/pkg/types.jl +++ b/base/pkg/types.jl @@ -44,7 +44,7 @@ function intersect(A::VersionSet, B::VersionSet) VersionSet(ivals) end isequal(A::VersionSet, B::VersionSet) = (A.intervals == B.intervals) -hash(s::VersionSet, h::Uint=zero(Uint)) = hash(s.intervals, h + 0x2fd2ca6efa023f44) +hash(s::VersionSet, h::Uint) = hash(s.intervals, h + 0x2fd2ca6efa023f44) deepcopy_internal(vs::VersionSet, ::ObjectIdDict) = VersionSet(copy(vs.intervals)) typealias Requires Dict{ByteString,VersionSet} diff --git a/base/profile.jl b/base/profile.jl index 4fb6f2a9d8b92..7770a19bf7fd7 100644 --- a/base/profile.jl +++ b/base/profile.jl @@ -76,7 +76,7 @@ const UNKNOWN = LineInfo("?", "?", -1) isequal(a::LineInfo, b::LineInfo) = a.line == b.line && a.func == b.func && a.file == b.file -function hash(li::LineInfo, h::Uint=zero(Uint)) +function hash(li::LineInfo, h::Uint) h += 0xf4fbda67fe20ce88 h = hash(li.line, h) h = hash(li.file, h) diff --git a/base/version.jl b/base/version.jl index 86b55b2141c3b..490fb8c32b1fb 100644 --- a/base/version.jl +++ b/base/version.jl @@ -145,7 +145,7 @@ function isless(a::VersionNumber, b::VersionNumber) return false end -function hash(v::VersionNumber, h::Uint=zero(Uint)) +function hash(v::VersionNumber, h::Uint) h += 0x8ff4ffdb75f9fede h = hash(v.major, h) h = hash(v.minor, h) diff --git a/test/arrayops.jl b/test/arrayops.jl index 210b35f11d57d..8deb7e42cdcb2 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -363,7 +363,7 @@ D = cat(3, B, B) immutable HashCollision x::Float64 end -Base.hash(::HashCollision) = uint(0) +Base.hash(::HashCollision, h::Uint) = h @test map(x->x.x, unique(map(HashCollision, B), 1)) == C ## reduce ## diff --git a/test/collections.jl b/test/collections.jl index 8e30731e1ee93..3a4bd61275e07 100644 --- a/test/collections.jl +++ b/test/collections.jl @@ -80,7 +80,7 @@ type I1438T id end import Base.hash -hash(x::I1438T) = x.id +hash(x::I1438T, h::Uint) = hash(x.id, h) begin local seq, xs, s From 316eeda726ce31d8d8f27f8fbe781437610b9f8d Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Thu, 24 Apr 2014 17:55:17 -0400 Subject: [PATCH 04/18] MathConst: == and hash --- base/constants.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/base/constants.jl b/base/constants.jl index a0f1f522267d8..77310ba5c4e0d 100644 --- a/base/constants.jl +++ b/base/constants.jl @@ -13,6 +13,11 @@ convert(::Type{Float16}, x::MathConst) = float16(float32(x)) convert{T<:Real}(::Type{Complex{T}}, x::MathConst) = convert(Complex{T}, float64(x)) convert{T<:Integer}(::Type{Rational{T}}, x::MathConst) = convert(Rational{T}, float64(x)) +=={s}(::MathConst{s}, ::MathConst{s}) = true +==(::MathConst, ::MathConst) = false + +hash(x::MathConst, h::Uint) = hash(object_id(x), h) + -(x::MathConst) = -float64(x) for op in {:+, :-, :*, :/, :^} @eval $op(x::MathConst, y::MathConst) = $op(float64(x),float64(y)) From f7910291097d192dc8e0618424fe10653d8545c0 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Thu, 24 Apr 2014 18:37:49 -0400 Subject: [PATCH 05/18] move generic rational hashing definitions into hashing2.jl --- base/hashing.jl | 120 ++--------------------------------------------- base/hashing2.jl | 110 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 112 insertions(+), 118 deletions(-) diff --git a/base/hashing.jl b/base/hashing.jl index bfd7b9b9e77f8..0da7bbbce9ab9 100644 --- a/base/hashing.jl +++ b/base/hashing.jl @@ -27,130 +27,18 @@ function hash_uint(n::Uint32) return a end -## efficient value-based hashing of integers ## - -function hash_integer(n::Integer, h::Uint) - h = hash_uint(uint(n & typemax(Uint)) $ h) $ h - n = ifelse(n < 0, oftype(n,-n), n) - n >>>= sizeof(Uint) << 3 - while n != 0 - h = hash_uint(uint(n & typemax(Uint)) $ h) $ h - n >>>= sizeof(Uint) << 3 - end - return h -end - -## hashing rational values ## - -#= -`decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`. - -The decompose function is the point where rational-valued numeric types that support -hashing hook into the hashing protocol. `decompose(x)` should return three integer -values `num, pow, den`, such that the value of `x` is mathematically equal to - - num*2^pow/den - -The decomposition need not be canonical in the sense that it just needs to be *some* -way to express `x` in this form, not any particular way – with the restriction that -`num` and `den` may not share any odd common factors. They may, however, have powers -of two in common – the generic hashing code will normalize those as necessary. - -Special values: - - - `x` is zero: `num` should be zero and `den` should have the same sign as `x` - - `x` is infinite: `den` should be zero and `num` should have the same sign as `x` - - `x` is not a number: `num` and `den` should both be zero -=# - -decompose(x::Integer) = x, 0, 1 -decompose(x::Rational) = num(x), 0, den(x) - -function decompose(x::Float32) - isnan(x) && return 0, 0, 0 - isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 - n = reinterpret(Int32, x) - s = int32(n & 0x007fffff) - e = int32(n & 0x7f800000 >> 23) - s |= int32(e != 0) << 23 - d = ifelse(signbit(n) == 1, -1, 1) - int(s), int(e - 150 + (e == 0)), d -end - -function decompose(x::Float64) - isnan(x) && return 0, 0, 0 - isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 - n = reinterpret(Int64, x) - s = int64(n & 0x000fffffffffffff) - e = int64(n & 0x7ff0000000000000 >> 52) - s |= int64(e != 0) << 52 - d = ifelse(signbit(n) == 1, -1, 1) - int(s), int(e - 1075 + (e == 0)), d -end - -# hashing methods for rational-valued types +## hashing small, built-in numeric types ## hx(a::Uint64, b::Float64, h::Uint) = hash_uint((3a + reinterpret(Uint64,b)) - h) hash(x::Uint64, h::Uint) = hx(x, float64(x), h) hash(x::Int64, h::Uint) = hx(reinterpret(Uint64,x), float64(x), h) -hash(x::Float64, h::Uint) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(x==x,x,NaN), h) +hash(x::Float64, h::Uint) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(isnan(x), NaN, x), h) hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32), h::Uint) = hash(int64(x), h) hash(x::Float32, h::Uint) = hash(float64(x), h) -function hash(x::Real, h::Uint) - # decompose x as num*2^pow/den - num, pow, den = decompose(x)::(Integer,Integer,Integer) - - # handle special values - num == 0 && den == 0 && return hash(NaN, h) - if num == 0 - den > 0 && return hash(+0.0, h) - den < 0 && return hash(-0.0, h) - end - if den == 0 - num > 0 && return hash(+Inf, h) - num < 0 && return hash(-Inf, h) - end - - # normalize decomposition - if den < 0 - num = -num - den = -den - end - z = trailing_zeros(num) - if z != 0 - num >>= z - pow += z - end - z = trailing_zeros(den) - if z != 0 - den >>= z - pow -= z - end - - # handle values representable as Int64, Uint64, Float64 - if den == 1 - left = ndigits0z(num,2) + pow - right = trailing_zeros(num) + pow - if -1074 <= right - if 0 <= right && left <= 64 - left <= 63 && return hash(int64(num) << int(pow), h) - signbit(num) == signbit(den) && return hash(uint64(num) << int(pow), h) - end - left <= 1024 && left - right <= 53 && return hash(float64(num) * 2.0^pow, h) - end - end - - # handle "generic" real values - h = hash_integer(den, h) - h = hash_integer(pow, h) - h = hash_integer(num, h) - return h -end - -## hashing complex values ## +## hashing complex numbers ## const h_imag = 0x32a7a07f3e7cd1f9 const hash_0_imag = hash(0, h_imag) @@ -166,7 +54,7 @@ end hash(x::Bool, h::Uint) = hash(int(x), h + 0x4cd135a1755139a5) hash(x::Char, h::Uint) = hash(int(x), h + 0x10f989ff0f886f11) -## expression hashing ## +## symbol & expression hashing ## hash(x::Symbol, h::Uint) = hash(object_id(x), h) hash(x::Expr, h::Uint) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6)) diff --git a/base/hashing2.jl b/base/hashing2.jl index f962d57183ba1..eef5943451642 100644 --- a/base/hashing2.jl +++ b/base/hashing2.jl @@ -1,4 +1,15 @@ -## hashing BigInts, BigFloats, and Float16s ## +## efficient value-based hashing of integers ## + +function hash_integer(n::Integer, h::Uint) + h = hash_uint(uint(n & typemax(Uint)) $ h) $ h + n = ifelse(n < 0, oftype(n,-n), n) + n >>>= sizeof(Uint) << 3 + while n != 0 + h = hash_uint(uint(n & typemax(Uint)) $ h) $ h + n >>>= sizeof(Uint) << 3 + end + return h +end function hash_integer(n::BigInt, h::Uint) s = n.size @@ -12,6 +23,99 @@ function hash_integer(n::BigInt, h::Uint) return h end +## generic hashing for rational values ## + +function hash(x::Real, h::Uint) + # decompose x as num*2^pow/den + num, pow, den = decompose(x)::(Integer,Integer,Integer) + + # handle special values + num == 0 && den == 0 && return hash(NaN, h) + num == 0 && return hash(ifelse(den > 0, 0.0, -0.0), h) + den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h) + + # normalize decomposition + if den < 0 + num = -num + den = -den + end + z = trailing_zeros(num) + if z != 0 + num >>= z + pow += z + end + z = trailing_zeros(den) + if z != 0 + den >>= z + pow -= z + end + + # handle values representable as Int64, Uint64, Float64 + if den == 1 + left = ndigits0z(num,2) + pow + right = trailing_zeros(num) + pow + if -1074 <= right + if 0 <= right && left <= 64 + left <= 63 && return hash(int64(num) << int(pow), h) + signbit(num) == signbit(den) && return hash(uint64(num) << int(pow), h) + end # typemin(Int64) handled by Float64 case + left <= 1024 && left - right <= 53 && return hash(float64(num) * 2.0^pow, h) + end + end + + # handle generic rational values + h = hash_integer(den, h) + h = hash_integer(pow, h) + h = hash_integer(num, h) + return h +end + +#= +`decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`. + +The decompose function is the point where rational-valued numeric types that support +hashing hook into the hashing protocol. `decompose(x)` should return three integer +values `num, pow, den`, such that the value of `x` is mathematically equal to + + num*2^pow/den + +The decomposition need not be canonical in the sense that it just needs to be *some* +way to express `x` in this form, not any particular way – with the restriction that +`num` and `den` may not share any odd common factors. They may, however, have powers +of two in common – the generic hashing code will normalize those as necessary. + +Special values: + + - `x` is zero: `num` should be zero and `den` should have the same sign as `x` + - `x` is infinite: `den` should be zero and `num` should have the same sign as `x` + - `x` is not a number: `num` and `den` should both be zero +=# + +decompose(x::Integer) = x, 0, 1 +decompose(x::Rational) = num(x), 0, den(x) + +function decompose(x::Float32) + isnan(x) && return 0, 0, 0 + isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 + n = reinterpret(Int32, x) + s = int32(n & 0x007fffff) + e = int32(n & 0x7f800000 >> 23) + s |= int32(e != 0) << 23 + d = ifelse(signbit(n) == 1, -1, 1) + int(s), int(e - 150 + (e == 0)), d +end + +function decompose(x::Float64) + isnan(x) && return 0, 0, 0 + isinf(x) && return ifelse(x < 0, -1, 1), 0, 0 + n = reinterpret(Int64, x) + s = int64(n & 0x000fffffffffffff) + e = int64(n & 0x7ff0000000000000 >> 52) + s |= int64(e != 0) << 52 + d = ifelse(signbit(n) == 1, -1, 1) + int(s), int(e - 1075 + (e == 0)), d +end + function decompose(x::BigFloat) isnan(x) && return big(0), 0, 0 isinf(x) && return big(x.sign), 0, 0 @@ -23,6 +127,8 @@ function decompose(x::BigFloat) s, int(x.exp - x.prec), int(x.sign) end +## hashing Float16s ## + hash(x::Float16, h::Uint) = hash(float64(x), h) ## hashing strings ## @@ -53,6 +159,6 @@ hash(a::AbstractArray{Bool}, h::Uint) = hash(bitpack(a), h) hash{T<:Range}(r::T, h::Uint) = hash(first(r), hash(step(r), hash(last(r), h + object_id(eltype(T))))) -## hashing general objects and expressions ## +## hashing general objects ## hash(x::ANY, h::Uint) = hash(object_id(x), h) From 36969687fb9bbd936b3adc0baa6cf0a408d2ccb7 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Fri, 25 Apr 2014 10:33:06 -0400 Subject: [PATCH 06/18] Work around LLVM's dickish undefined constant folding behavior. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LLVM's fptosi intrinsic is undefined for NaN, so LLVM obnoxiously and pointlessly does different things when it gets NaN as a run-time value than as a compile-time value. To avoid this shitty, pointless trap, we have to avoid calling fptosi on NaN by introducing a branch into the hashing function – even though for hashing, we don't care *what* value is produced, just as long as it's consistent. Unfortunately, this affects the performance of Float64 hashing pretty badly. I was not able to figure out any way to recover this lost performance. LLVM really needs to stop doing this. --- base/hashing.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base/hashing.jl b/base/hashing.jl index 0da7bbbce9ab9..53fd24305c45f 100644 --- a/base/hashing.jl +++ b/base/hashing.jl @@ -30,10 +30,11 @@ end ## hashing small, built-in numeric types ## hx(a::Uint64, b::Float64, h::Uint) = hash_uint((3a + reinterpret(Uint64,b)) - h) +const hx_NaN = hx(uint(0), NaN, uint(0)) hash(x::Uint64, h::Uint) = hx(x, float64(x), h) hash(x::Int64, h::Uint) = hx(reinterpret(Uint64,x), float64(x), h) -hash(x::Float64, h::Uint) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(isnan(x), NaN, x), h) +hash(x::Float64, h::Uint) = isnan(x) ? (hx_NaN $ h) : hx(box(Uint64,fptosi(unbox(Float64,x))), x, h) hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32), h::Uint) = hash(int64(x), h) hash(x::Float32, h::Uint) = hash(float64(x), h) From ef79b1363eaae9729ffae59b157711f33b3802f2 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Fri, 25 Apr 2014 12:14:18 -0400 Subject: [PATCH 07/18] generic hash(Real) optimization: remove type assert on `decompose`. Apparently the type assertion after the decompose call was sabotaging the ability to inline the call to decompose for Rational{Int}. Removing the type assert gives a 10x boost, bringing the speed of generic hash(Rational) to withing 10x of hash(Int). --- base/hashing2.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/hashing2.jl b/base/hashing2.jl index eef5943451642..119098040e30a 100644 --- a/base/hashing2.jl +++ b/base/hashing2.jl @@ -27,7 +27,7 @@ end function hash(x::Real, h::Uint) # decompose x as num*2^pow/den - num, pow, den = decompose(x)::(Integer,Integer,Integer) + num, pow, den = decompose(x) # handle special values num == 0 && den == 0 && return hash(NaN, h) From 85afdbbb10f6e8c1db1be56014af778fda078ec2 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Fri, 25 Apr 2014 15:13:18 -0400 Subject: [PATCH 08/18] =?UTF-8?q?More=20streamlined=20hashing=20for=20smal?= =?UTF-8?q?lish=20rational=20types=20(=E2=89=A4=2064-bits).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This actually provides less gain over the generic real hashing function than you would think, but it is slightly faster. --- base/hashing2.jl | 23 +++++++++++++++++++++++ base/int.jl | 2 ++ 2 files changed, 25 insertions(+) diff --git a/base/hashing2.jl b/base/hashing2.jl index 119098040e30a..8a52a4795f31f 100644 --- a/base/hashing2.jl +++ b/base/hashing2.jl @@ -127,6 +127,29 @@ function decompose(x::BigFloat) s, int(x.exp - x.prec), int(x.sign) end +## streamlined hashing for smallish rational types ## + +function hash{T<:Integer64}(x::Rational{T}, h::Uint) + num, den = Base.num(x), Base.den(x) + den == 1 && return hash(num, h) + den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h) + if isodd(den) + pow = trailing_zeros(num) + num >>= pow + else + pow = trailing_zeros(den) + den >>= pow + pow = -pow + if den == 1 && abs(num) < 9007199254740992 + return hash(float64(num) * 2.0^pow) + end + end + h = hash_integer(den, h) + h = hash_integer(pow, h) + h = hash_integer(num, h) + return h +end + ## hashing Float16s ## hash(x::Float16, h::Uint) = hash(float64(x), h) diff --git a/base/int.jl b/base/int.jl index 3ddecb3fb8aeb..914af8d3070d6 100644 --- a/base/int.jl +++ b/base/int.jl @@ -96,6 +96,8 @@ mod(x::Unsigned, y::Signed) = rem(y+signed(rem(x,y)),y) # while there is a substantial performance penalty to 64-bit promotion. typealias Signed64 Union(Int8,Int16,Int32,Int64) typealias Unsigned64 Union(Uint8,Uint16,Uint32,Uint64) +typealias Integer64 Union(Signed64,Unsigned64) + div{T<:Signed64} (x::T, y::T) = box(T,sdiv_int(unbox(T,x),unbox(T,y))) div{T<:Unsigned64}(x::T, y::T) = box(T,udiv_int(unbox(T,x),unbox(T,y))) rem{T<:Signed64} (x::T, y::T) = box(T,srem_int(unbox(T,x),unbox(T,y))) From f5fd830af43099a62af02ec9098fe61e23213f83 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Sun, 27 Apr 2014 12:44:38 -0400 Subject: [PATCH 09/18] signbit: return a boolean value, instead of an Int. While messing around with generic equality checking based on the new decode function introduced in the hashing work, I discovered that LLVM seems to be much better able to analyze expressions that use signbit when it's boolean and explicitly defined as `x < 0` for integer values. Since `true == 1` and `false == 0` this is a pretty benign change, although technically it is breaking. I've wanted to do this for a while and this seems like as good a time as any. --- base/bool.jl | 2 +- base/int.jl | 8 ++------ base/mpfr.jl | 3 +-- base/number.jl | 2 +- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/base/bool.jl b/base/bool.jl index 1e84f74e5c05f..85638bf72e82f 100644 --- a/base/bool.jl +++ b/base/bool.jl @@ -23,7 +23,7 @@ typemax(::Type{Bool}) = true (|)(x::Bool, y::Bool) = box(Bool,or_int(unbox(Bool,x),unbox(Bool,y))) ($)(x::Bool, y::Bool) = (x!=y) -signbit(x::Bool) = 0 +signbit(x::Bool) = false sign(x::Bool) = x abs(x::Bool) = x abs2(x::Bool) = x diff --git a/base/int.jl b/base/int.jl index 914af8d3070d6..7d510d4a6d618 100644 --- a/base/int.jl +++ b/base/int.jl @@ -53,12 +53,8 @@ inv(x::Integer) = float(one(x))/float(x) isodd(n::Integer) = bool(rem(n,2)) iseven(n::Integer) = !isodd(n) -signbit(x::Unsigned) = 0 -signbit(x::Int8) = int(x>>>7) -signbit(x::Int16) = int(x>>>15) -signbit(x::Int32) = int(x>>>31) -signbit(x::Int64) = int(x>>>63) -signbit(x::Int128) = int(x>>>127) +signbit(x::Integer) = x < 0 +signbit(x::Unsigned) = false flipsign(x::Int, y::Int) = box(Int,flipsign_int(unbox(Int,x),unbox(Int,y))) flipsign(x::Int64, y::Int64) = box(Int64,flipsign_int(unbox(Int64,x),unbox(Int64,y))) diff --git a/base/mpfr.jl b/base/mpfr.jl index d4720edd98ffd..bd1f39532ba3f 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -580,8 +580,7 @@ end <(x::BigFloat, y::BigFloat) = ccall((:mpfr_less_p, :libmpfr), Int32, (Ptr{BigFloat}, Ptr{BigFloat}), &x, &y) != 0 >(x::BigFloat, y::BigFloat) = ccall((:mpfr_greater_p, :libmpfr), Int32, (Ptr{BigFloat}, Ptr{BigFloat}), &x, &y) != 0 -signbit(x::BigFloat) = - int(ccall((:mpfr_signbit, :libmpfr), Int32, (Ptr{BigFloat},), &x)!=0) +signbit(x::BigFloat) = ccall((:mpfr_signbit, :libmpfr), Int32, (Ptr{BigFloat},), &x) != 0 function precision(x::BigFloat) return ccall((:mpfr_get_prec, :libmpfr), Clong, (Ptr{BigFloat},), &x) diff --git a/base/number.jl b/base/number.jl index f54230d1d272e..6dcfc24c7fc04 100644 --- a/base/number.jl +++ b/base/number.jl @@ -18,7 +18,7 @@ first(x::Number) = x last(x::Number) = x divrem(x,y) = (div(x,y),rem(x,y)) -signbit(x::Real) = int(x < 0) +signbit(x::Real) = x < 0 sign(x::Real) = ifelse(x < 0, oftype(x,-1), ifelse(x > 0, one(x), x)) abs(x::Real) = ifelse(x < 0, -x, x) abs2(x::Real) = x*x From 660c018f842a1f81169b0c8cdebc9e3c56ea2818 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Sun, 27 Apr 2014 13:23:03 -0400 Subject: [PATCH 10/18] isnan, isinf, isfinite: improved generic definitions. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defining isfinite in terms of decompose is simple. It seems good to insist that only floating-point reals can be NaN – NaN in hardware is simply an unavoidable reality. For any user-defined type that is not implemented in hardware, NaN should not exist since operations that would produce NaNs should raise immediate errors instead. --- base/float.jl | 15 ++++++--------- base/rational.jl | 4 ---- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/base/float.jl b/base/float.jl index d805574a39f3d..0c7577131d9f7 100644 --- a/base/float.jl +++ b/base/float.jl @@ -220,18 +220,15 @@ end abs(x::Float64) = box(Float64,abs_float(unbox(Float64,x))) abs(x::Float32) = box(Float32,abs_float(unbox(Float32,x))) -isnan(x::FloatingPoint) = (x != x) -isnan(x::Real) = isnan(float(x)) -isnan(x::Integer) = false +isnan(x::FloatingPoint) = x != x +isnan(x::Real) = false -isinf(x::FloatingPoint) = (abs(x) == Inf) -isinf(x::Real) = isinf(float(x)) -isinf(x::Integer) = false - -isfinite(x::FloatingPoint) = (x-x == 0) -isfinite(x::Real) = isfinite(float(x)) +isfinite(x::FloatingPoint) = x - x == 0 +isfinite(x::Real) = decompose(x)[3] != 0 isfinite(x::Integer) = true +isinf(x::Real) = !isnan(x) & !isfinite(x) + ## floating point traits ## const Inf16 = box(Float16,unbox(Uint16,0x7c00)) diff --git a/base/rational.jl b/base/rational.jl index e626c4487c69c..82a96aa9cbcc7 100644 --- a/base/rational.jl +++ b/base/rational.jl @@ -101,10 +101,6 @@ signbit(x::Rational) = signbit(x.num) copysign(x::Rational, y::Real) = copysign(x.num,y) // x.den copysign(x::Rational, y::Rational) = copysign(x.num,y.num) // x.den -isnan(x::Rational) = false -isinf(x::Rational) = x.den == 0 -isfinite(x::Rational) = x.den != 0 - typemin{T<:Integer}(::Type{Rational{T}}) = -one(T)//zero(T) typemax{T<:Integer}(::Type{Rational{T}}) = one(T)//zero(T) From 04908d454e1f9cbfde413f26ddc8facf1260eac0 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 28 Apr 2014 16:16:37 -0400 Subject: [PATCH 11/18] move hash(WeakRef) into base/hashing.jl also (not needed so early). --- base/base.jl | 2 -- base/hashing.jl | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/base/base.jl b/base/base.jl index 44bb21bcaa504..62e5577404ad3 100644 --- a/base/base.jl +++ b/base/base.jl @@ -112,8 +112,6 @@ isequal(w::WeakRef, v::WeakRef) = isequal(w.value, v.value) isequal(w::WeakRef, v) = isequal(w.value, v) isequal(w, v::WeakRef) = isequal(w, v.value) -hash(w::WeakRef, h::Uint) = hash(w.value, h) - function finalizer(o::ANY, f::Union(Function,Ptr)) if isimmutable(o) error("objects of type ", typeof(o), " cannot be finalized") diff --git a/base/hashing.jl b/base/hashing.jl index 53fd24305c45f..c1443016eb232 100644 --- a/base/hashing.jl +++ b/base/hashing.jl @@ -1,6 +1,7 @@ ## hashing a single value ## hash(x::Any) = hash(x, zero(Uint)) +hash(w::WeakRef, h::Uint) = hash(w.value, h) ## core data hashing functions ## From 1c8b7d12ab27f968c36d743bfc5a5abea5eeab1c Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Tue, 29 Apr 2014 20:12:10 -0400 Subject: [PATCH 12/18] hashing: make new hashing work on 32-bit systems. --- base/hashing.jl | 35 +++++++++++++++++++++++++++-------- base/hashing2.jl | 14 ++++++++++---- base/pkg/types.jl | 2 +- base/profile.jl | 2 +- base/version.jl | 2 +- 5 files changed, 40 insertions(+), 15 deletions(-) diff --git a/base/hashing.jl b/base/hashing.jl index c1443016eb232..44e7971b533bb 100644 --- a/base/hashing.jl +++ b/base/hashing.jl @@ -5,7 +5,7 @@ hash(w::WeakRef, h::Uint) = hash(w.value, h) ## core data hashing functions ## -function hash_uint(n::Uint64) +function hash_64_64(n::Uint64) local a::Uint64 = n a = ~a + a << 21 a = a $ a >> 24 @@ -17,7 +17,18 @@ function hash_uint(n::Uint64) return a end -function hash_uint(n::Uint32) +function hash_64_32(n::Uint64) + local a::Uint64 = n + a = ~a + a << 18 + a = a $ a >> 31 + a = a * 21 + a = a $ a >> 11 + a = a + a << 6 + a = a $ a >> 22 + return uint32(a) +end + +function hash_32_32(n::Uint32) local a::Uint32 = n a = a + 0x7ed55d16 + a << 12 a = a $ 0xc761c23c $ a >> 19 @@ -28,10 +39,18 @@ function hash_uint(n::Uint32) return a end +if Uint == Uint64 + hash_uint64(x::Uint64) = hash_64_64(x) + hash_uint(x::Uint) = hash_64_64(x) +else + hash_uint64(x::Uint64) = hash_64_32(x) + hash_uint(x::Uint) = hash_32_32(x) +end + ## hashing small, built-in numeric types ## -hx(a::Uint64, b::Float64, h::Uint) = hash_uint((3a + reinterpret(Uint64,b)) - h) -const hx_NaN = hx(uint(0), NaN, uint(0)) +hx(a::Uint64, b::Float64, h::Uint) = hash_uint64((3a + reinterpret(Uint64,b)) - h) +const hx_NaN = hx(uint64(0), NaN, uint(0 )) hash(x::Uint64, h::Uint) = hx(x, float64(x), h) hash(x::Int64, h::Uint) = hx(reinterpret(Uint64,x), float64(x), h) @@ -42,7 +61,7 @@ hash(x::Float32, h::Uint) = hash(float64(x), h) ## hashing complex numbers ## -const h_imag = 0x32a7a07f3e7cd1f9 +const h_imag = uint(0x32a7a07f3e7cd1f9) const hash_0_imag = hash(0, h_imag) function hash(z::Complex, h::Uint) @@ -53,10 +72,10 @@ end ## special hashing for booleans and characters ## -hash(x::Bool, h::Uint) = hash(int(x), h + 0x4cd135a1755139a5) -hash(x::Char, h::Uint) = hash(int(x), h + 0x10f989ff0f886f11) +hash(x::Bool, h::Uint) = hash(int(x), h + uint(0x4cd135a1755139a5)) +hash(x::Char, h::Uint) = hash(int(x), h + uint(0x10f989ff0f886f11)) ## symbol & expression hashing ## hash(x::Symbol, h::Uint) = hash(object_id(x), h) -hash(x::Expr, h::Uint) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6)) +hash(x::Expr, h::Uint) = hash(x.args, hash(x.head, h + uint(0x83c7900696d26dc6))) diff --git a/base/hashing2.jl b/base/hashing2.jl index 8a52a4795f31f..fececd3784446 100644 --- a/base/hashing2.jl +++ b/base/hashing2.jl @@ -156,9 +156,11 @@ hash(x::Float16, h::Uint) = hash(float64(x), h) ## hashing strings ## +const memhash = Uint == Uint64 ? :memhash_seed : :memhash32_seed + function hash{T<:ByteString}(s::Union(T,SubString{T}), h::Uint) - h += 0x71e729fd56419c81 - ccall(:memhash_seed, Uint64, (Ptr{Void}, Int, Uint32), pointer(s), sizeof(s), h) + h + h += uint(0x71e729fd56419c81) + ccall(memhash, Uint, (Ptr{Uint8}, Csize_t, Uint32), pointer(s), sizeof(s), h) + h end hash(s::String, h::Uint) = hash(bytestring(s), h) @@ -179,8 +181,12 @@ hash(B::BitArray, h::Uint) = hash((size(B),B.chunks), h) hash(a::AbstractArray{Bool}, h::Uint) = hash(bitpack(a), h) # hashing ranges by component at worst leads to collisions for very similar ranges -hash{T<:Range}(r::T, h::Uint) = - hash(first(r), hash(step(r), hash(last(r), h + object_id(eltype(T))))) +function hash{T<:Range}(r::T, h::Uint) + h += uint(0x80707b6821b70087) + h = hash(first(r), h) + h = hash(step(r), h) + h = hash(last(r), h) +end ## hashing general objects ## diff --git a/base/pkg/types.jl b/base/pkg/types.jl index eaf172c76f915..78a6bf8efdeb6 100644 --- a/base/pkg/types.jl +++ b/base/pkg/types.jl @@ -44,7 +44,7 @@ function intersect(A::VersionSet, B::VersionSet) VersionSet(ivals) end isequal(A::VersionSet, B::VersionSet) = (A.intervals == B.intervals) -hash(s::VersionSet, h::Uint) = hash(s.intervals, h + 0x2fd2ca6efa023f44) +hash(s::VersionSet, h::Uint) = hash(s.intervals, h + uint(0x2fd2ca6efa023f44)) deepcopy_internal(vs::VersionSet, ::ObjectIdDict) = VersionSet(copy(vs.intervals)) typealias Requires Dict{ByteString,VersionSet} diff --git a/base/profile.jl b/base/profile.jl index 7770a19bf7fd7..e47c09b5490f1 100644 --- a/base/profile.jl +++ b/base/profile.jl @@ -77,7 +77,7 @@ const UNKNOWN = LineInfo("?", "?", -1) isequal(a::LineInfo, b::LineInfo) = a.line == b.line && a.func == b.func && a.file == b.file function hash(li::LineInfo, h::Uint) - h += 0xf4fbda67fe20ce88 + h += uint(0xf4fbda67fe20ce88) h = hash(li.line, h) h = hash(li.file, h) h = hash(li.func, h) diff --git a/base/version.jl b/base/version.jl index 490fb8c32b1fb..cb34c9cbafa5c 100644 --- a/base/version.jl +++ b/base/version.jl @@ -146,7 +146,7 @@ function isless(a::VersionNumber, b::VersionNumber) end function hash(v::VersionNumber, h::Uint) - h += 0x8ff4ffdb75f9fede + h += uint(0x8ff4ffdb75f9fede) h = hash(v.major, h) h = hash(v.minor, h) h = hash(v.patch, h) From 737ad6eba2f94a8542e9013fa84b6f4e3f5d827a Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 28 Apr 2014 16:22:54 -0400 Subject: [PATCH 13/18] isequal, isless: bring comparison and sorting in line with new hashing. --- base/base.jl | 6 ++--- base/bitarray.jl | 2 -- base/comparison.jl | 0 base/expr.jl | 4 +-- base/float.jl | 8 ------ base/gmp.jl | 1 - base/intset.jl | 2 +- base/operators.jl | 44 +++++++++++++++++++------------ base/pkg/reqs.jl | 2 +- base/pkg/resolve/fieldvalue.jl | 4 +-- base/pkg/resolve/versionweight.jl | 16 +++++------ base/pkg/types.jl | 10 +++---- base/range.jl | 12 +-------- base/set.jl | 4 +-- base/string.jl | 19 +++++++------ base/sysimg.jl | 2 +- base/version.jl | 4 +-- test/numbers.jl | 8 +++--- test/ranges.jl | 6 ++--- 19 files changed, 70 insertions(+), 84 deletions(-) create mode 100644 base/comparison.jl diff --git a/base/base.jl b/base/base.jl index 62e5577404ad3..7b6d58fbb94c4 100644 --- a/base/base.jl +++ b/base/base.jl @@ -108,9 +108,9 @@ type Colon end const (:) = Colon() -isequal(w::WeakRef, v::WeakRef) = isequal(w.value, v.value) -isequal(w::WeakRef, v) = isequal(w.value, v) -isequal(w, v::WeakRef) = isequal(w, v.value) +==(w::WeakRef, v::WeakRef) = isequal(w.value, v.value) +==(w::WeakRef, v) = isequal(w.value, v) +==(w, v::WeakRef) = isequal(w, v.value) function finalizer(o::ANY, f::Union(Function,Ptr)) if isimmutable(o) diff --git a/base/bitarray.jl b/base/bitarray.jl index 9f92e665ff195..d53b7e9d15938 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -1826,5 +1826,3 @@ function cat(catdim::Integer, X::Union(BitArray, Integer)...) end # hvcat -> use fallbacks in abstractarray.jl - -isequal(A::BitArray, B::BitArray) = (A == B) diff --git a/base/comparison.jl b/base/comparison.jl new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/base/expr.jl b/base/expr.jl index b9f09abd54425..b4d759fa9d3fc 100644 --- a/base/expr.jl +++ b/base/expr.jl @@ -39,8 +39,8 @@ astcopy(x::Union(SymbolNode,GetfieldNode,Expr)) = copy(x) astcopy(x::Array{Any,1}) = map(astcopy, x) astcopy(x) = x -isequal(x::Expr, y::Expr) = (is(x.head,y.head) && isequal(x.args,y.args)) -isequal(x::QuoteNode, y::QuoteNode) = isequal(x.value, y.value) +==(x::Expr, y::Expr) = x.head === y.head && x.args == y.args +==(x::QuoteNode, y::QuoteNode) = x.value == y.value function show(io::IO, tv::TypeVar) if !is(tv.lb, None) diff --git a/base/float.jl b/base/float.jl index 0c7577131d9f7..44b66df09f3cf 100644 --- a/base/float.jl +++ b/base/float.jl @@ -150,19 +150,11 @@ mod{T<:FloatingPoint}(x::T, y::T) = rem(y+rem(x,y),y) <=(x::Float32, y::Float32) = le_float(unbox(Float32,x),unbox(Float32,y)) <=(x::Float64, y::Float64) = le_float(unbox(Float64,x),unbox(Float64,y)) -isequal{T<:FloatingPoint}(x::T, y::T) = - ((x==y) & (signbit(x)==signbit(y))) | (isnan(x)&isnan(y)) - isequal(x::Float32, y::Float32) = fpiseq(unbox(Float32,x),unbox(Float32,y)) isequal(x::Float64, y::Float64) = fpiseq(unbox(Float64,x),unbox(Float64,y)) isless (x::Float32, y::Float32) = fpislt(unbox(Float32,x),unbox(Float32,y)) isless (x::Float64, y::Float64) = fpislt(unbox(Float64,x),unbox(Float64,y)) -isless(a::FloatingPoint, b::FloatingPoint) = - (asignbit(b)))) -isless(a::Real, b::FloatingPoint) = (ay, 1, 0)) diff --git a/base/gmp.jl b/base/gmp.jl index 5f8733c63abd0..af2ea296a06c7 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -407,7 +407,6 @@ end binomial(n::BigInt, k::Integer) = k < 0 ? throw(DomainError()) : binomial(n, uint(k)) ==(x::BigInt, y::BigInt) = cmp(x,y) == 0 -isequal(x::BigInt, y::BigInt) = cmp(x,y) == 0 <=(x::BigInt, y::BigInt) = cmp(x,y) <= 0 >=(x::BigInt, y::BigInt) = cmp(x,y) >= 0 <(x::BigInt, y::BigInt) = cmp(x,y) < 0 diff --git a/base/intset.jl b/base/intset.jl index 383be0f6ec88e..d383925026785 100644 --- a/base/intset.jl +++ b/base/intset.jl @@ -258,7 +258,7 @@ function symdiff!(s::IntSet, s2::IntSet) s end -function isequal(s1::IntSet, s2::IntSet) +function ==(s1::IntSet, s2::IntSet) if s1.fill1s != s2.fill1s return false end diff --git a/base/operators.jl b/base/operators.jl index dfe1243b5158b..5fdcbd03bb833 100644 --- a/base/operators.jl +++ b/base/operators.jl @@ -4,27 +4,37 @@ const (<:) = issubtype super(T::DataType) = T.super +## generic comparison ## + +==(x,y) = x === y + +isequal(x, y) = x == y +isequal(x::FloatingPoint, y::FloatingPoint) = (isnan(x) & isnan(y)) | (signbit(x) == signbit(y)) & (x == y) +isequal(x::Real, y::FloatingPoint) = (isnan(x) & isnan(y)) | (signbit(x) == signbit(y)) & (x == y) +isequal(x::FloatingPoint, y::Real ) = (isnan(x) & isnan(y)) | (signbit(x) == signbit(y)) & (x == y) + +isless(x::Any, y::Any) = x < y +isless(x::FloatingPoint, y::FloatingPoint) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y) +isless(x::Real, y::FloatingPoint) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y) +isless(x::FloatingPoint, y::Real ) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y) + # avoid ambiguity with isequal(::Tuple, ::Tuple) -isequal(T::(Type...), S::(Type...)) = typeseq(T, S) -isequal(T::Type, S::Type) = typeseq(T, S) +==(T::(Type...), S::(Type...)) = typeseq(T, S) +==(T::Type, S::Type) = typeseq(T, S) -## comparison ## +## comparison fallbacks ## -isequal(x,y) = is(x,y) -==(x,y) = isequal(x,y) !=(x,y) = !(x==y) !==(x,y) = !is(x,y) -< (x,y) = isless(x,y) -> (x,y) = y < x +>(x,y) = y < x <=(x,y) = !(y < x) >=(x,y) = (y <= x) -.> (x,y) = y.=(x,y) = y.<=x +.>(x,y) = y .< x +.>=(x,y) = y .<= x # this definition allows Number types to implement < instead of isless, # which is more idiomatic: -isless(x::Real, y::Real) = x> and >>> takes Int32 as second arg -<<(x,y::Integer) = x << convert(Int32,y) <<(x,y::Int32) = no_op_err("<<", typeof(x)) ->>(x,y::Integer) = x >> convert(Int32,y) >>(x,y::Int32) = no_op_err(">>", typeof(x)) ->>>(x,y::Integer) = x >>> convert(Int32,y) >>>(x,y::Int32) = no_op_err(">>>", typeof(x)) +<<(x,y::Integer) = x << convert(Int32,y) +>>(x,y::Integer) = x >> convert(Int32,y) +>>>(x,y::Integer) = x >>> convert(Int32,y) # fallback div and fld implementations # NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division, diff --git a/base/pkg/reqs.jl b/base/pkg/reqs.jl index bf7fe90202e15..b8b58bfecded9 100644 --- a/base/pkg/reqs.jl +++ b/base/pkg/reqs.jl @@ -46,7 +46,7 @@ immutable Requirement <: Line end # TODO: shouldn't be neccessary #4648 -Base.isequal(a::Line, b::Line) = (a.content == b.content) +==(a::Line, b::Line) = a.content == b.content # general machinery for parsing REQUIRE files diff --git a/base/pkg/resolve/fieldvalue.jl b/base/pkg/resolve/fieldvalue.jl index d450121bfee99..233d7750ccf89 100644 --- a/base/pkg/resolve/fieldvalue.jl +++ b/base/pkg/resolve/fieldvalue.jl @@ -40,7 +40,7 @@ Base.typemin(::Type{FieldValue}) = (x=typemin(Int); y=typemin(VersionWeight); Fi Base.(:-)(a::FieldValue, b::FieldValue) = FieldValue(a.l0-b.l0, a.l1-b.l1, a.l2-b.l2, a.l3-b.l3, a.l4-b.l4) Base.(:+)(a::FieldValue, b::FieldValue) = FieldValue(a.l0+b.l0, a.l1+b.l1, a.l2+b.l2, a.l3+b.l3, a.l4+b.l4) -function Base.isless(a::FieldValue, b::FieldValue) +function <(a::FieldValue, b::FieldValue) a.l0 < b.l0 && return true a.l0 > b.l0 && return false c = cmp(a.l1, b.l1) @@ -55,7 +55,7 @@ function Base.isless(a::FieldValue, b::FieldValue) return false end -Base.isequal(a::FieldValue, b::FieldValue) = +==(a::FieldValue, b::FieldValue) = a.l0 == b.l0 && a.l1 == b.l1 && a.l2 == b.l2 && a.l3 == b.l3 && a.l4 == b.l4 Base.abs(a::FieldValue) = FieldValue(abs(a.l0), abs(a.l1), abs(a.l2), abs(a.l3), abs(a.l4)) diff --git a/base/pkg/resolve/versionweight.jl b/base/pkg/resolve/versionweight.jl index 05c9803fbb473..e3c189c65a570 100644 --- a/base/pkg/resolve/versionweight.jl +++ b/base/pkg/resolve/versionweight.jl @@ -60,8 +60,8 @@ function Base.cmp{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) end return cmp(a.rest, b.rest) end -Base.isless{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = (cmp(a, b) == -1) -Base.isequal{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = (a.v == b.v) && (a.rest == b.rest) +<{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = cmp(a,b) < 0 +=={T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = a.v == b.v && a.rest == b.rest Base.abs{T}(a::HierarchicalValue{T}) = HierarchicalValue(T[abs(x) for x in a.v], abs(a.rest)) @@ -88,8 +88,8 @@ function Base.cmp(a::VWPreBuildItem, b::VWPreBuildItem) c = cmp(a.s, b.s); c != 0 && return c return cmp(a.i, b.i) end -Base.isless(a::VWPreBuildItem, b::VWPreBuildItem) = (cmp(a,b) == -1) -Base.isequal(a::VWPreBuildItem, b::VWPreBuildItem) = (cmp(a,b) == 0) +<(a::VWPreBuildItem, b::VWPreBuildItem) = cmp(a,b) < 0 +==(a::VWPreBuildItem, b::VWPreBuildItem) = cmp(a,b) == 0 Base.abs(a::VWPreBuildItem) = VWPreBuildItem(abs(a.nonempty), abs(a.s), abs(a.i)) @@ -125,8 +125,8 @@ function Base.cmp(a::VWPreBuild, b::VWPreBuild) c = cmp(a.nonempty, b.nonempty); c != 0 && return c return cmp(a.w, b.w) end -Base.isless(a::VWPreBuild, b::VWPreBuild) = (cmp(a,b) == -1) -Base.isequal(a::VWPreBuild, b::VWPreBuild) = (cmp(a,b) == 0) +<(a::VWPreBuild, b::VWPreBuild) = cmp(a,b) < 0 +==(a::VWPreBuild, b::VWPreBuild) = cmp(a,b) == 0 Base.abs(a::VWPreBuild) = VWPreBuild(abs(a.nonempty), abs(a.w)) @@ -179,8 +179,8 @@ function Base.cmp(a::VersionWeight, b::VersionWeight) c = cmp(a.build, b.build); c != 0 && return c return cmp(a.uninstall, b.uninstall) end -Base.isless(a::VersionWeight, b::VersionWeight) = (cmp(a, b) == -1) -Base.isequal(a::VersionWeight, b::VersionWeight) = (cmp(a, b) == 0) +<(a::VersionWeight, b::VersionWeight) = cmp(a,b) < 0 +==(a::VersionWeight, b::VersionWeight) = cmp(a,b) == 0 Base.abs(a::VersionWeight) = VersionWeight(abs(a.major), abs(a.minor), abs(a.patch), diff --git a/base/pkg/types.jl b/base/pkg/types.jl index 78a6bf8efdeb6..fb8b63513527e 100644 --- a/base/pkg/types.jl +++ b/base/pkg/types.jl @@ -1,7 +1,7 @@ module Types export VersionInterval, VersionSet, Requires, Available, Fixed, merge_requires!, satisfies -import Base: show, isempty, in, intersect, isequal, hash, deepcopy_internal +import Base: show, isempty, in, intersect, hash, deepcopy_internal immutable VersionInterval lower::VersionNumber @@ -14,7 +14,7 @@ show(io::IO, i::VersionInterval) = print(io, "[$(i.lower),$(i.upper))") isempty(i::VersionInterval) = i.upper <= i.lower in(v::VersionNumber, i::VersionInterval) = i.lower <= v < i.upper intersect(a::VersionInterval, b::VersionInterval) = VersionInterval(max(a.lower,b.lower), min(a.upper,b.upper)) -isequal(a::VersionInterval, b::VersionInterval) = (a.lower == b.lower) & (a.upper == b.upper) +==(a::VersionInterval, b::VersionInterval) = a.lower == b.lower && a.upper == b.upper immutable VersionSet intervals::Vector{VersionInterval} @@ -43,7 +43,7 @@ function intersect(A::VersionSet, B::VersionSet) sort!(ivals, by=i->i.lower) VersionSet(ivals) end -isequal(A::VersionSet, B::VersionSet) = (A.intervals == B.intervals) +==(A::VersionSet, B::VersionSet) = A.intervals == B.intervals hash(s::VersionSet, h::Uint) = hash(s.intervals, h + uint(0x2fd2ca6efa023f44)) deepcopy_internal(vs::VersionSet, ::ObjectIdDict) = VersionSet(copy(vs.intervals)) @@ -64,7 +64,7 @@ immutable Available requires::Requires end -isequal(a::Available, b::Available) = (a.sha1 == b.sha1 && a.requires == b.requires) +==(a::Available, b::Available) = a.sha1 == b.sha1 && a.requires == b.requires show(io::IO, a::Available) = isempty(a.requires) ? print(io, "Available(", repr(a.sha1), ")") : @@ -76,7 +76,7 @@ immutable Fixed end Fixed(v::VersionNumber) = Fixed(v,Requires()) -isequal(a::Fixed, b::Fixed) = (a.version == b.version && a.requires == b.requires) +==(a::Fixed, b::Fixed) = a.version == b.version && a.requires == b.requires show(io::IO, f::Fixed) = isempty(f.requires) ? print(io, "Fixed(", repr(f.version), ")") : diff --git a/base/range.jl b/base/range.jl index 910637c880505..f414db7270c51 100644 --- a/base/range.jl +++ b/base/range.jl @@ -273,15 +273,7 @@ function show(io::IO, r::Range) end show(io::IO, r::UnitRange) = print(io, repr(first(r)), ':', repr(last(r))) -isequal{T<:Range}(r::T, s::T) = - (first(r)==first(s)) & (step(r)==step(s)) & (last(r)==last(s)) - -isequal(r::Range, s::Range) = false - -=={T<:Range}(r::T, s::T) = isequal(r, s) - -=={T<:Integer, S<:Integer}(r::Range{T}, s::Range{S}) = - (first(r)==first(s)) & (step(r)==step(s)) & (last(r)==last(s)) +=={T<:Range}(r::T, s::T) = (first(r) == first(s)) & (step(r) == step(s)) & (last(r) == last(s)) function ==(r::Range, s::Range) lr = length(r) @@ -299,8 +291,6 @@ function ==(r::Range, s::Range) return true end -# TODO: isless? - intersect{T1<:Integer, T2<:Integer}(r::UnitRange{T1}, s::UnitRange{T2}) = max(r.start,s.start):min(last(r),last(s)) intersect{T<:Integer}(i::Integer, r::UnitRange{T}) = diff --git a/base/set.jl b/base/set.jl index a83cf3a1e98a6..9ea5c0545a734 100644 --- a/base/set.jl +++ b/base/set.jl @@ -73,8 +73,8 @@ function setdiff(a::Set, b::Set) d end -isequal(l::Set, r::Set) = (length(l) == length(r)) && (l <= r) -<(l::Set, r::Set) = (length(l) < length(r)) && (l <= r) +==(l::Set, r::Set) = (length(l) == length(r)) && (l <= r) +< (l::Set, r::Set) = (length(l) < length(r)) && (l <= r) <=(l::Set, r::Set) = issubset(l, r) function issubset(l, r) diff --git a/base/string.jl b/base/string.jl index c7be7e7dea612..847b82de0caf2 100644 --- a/base/string.jl +++ b/base/string.jl @@ -482,8 +482,8 @@ function cmp(a::String, b::String) !done(a,i) && done(b,j) ? +1 : 0 end -isequal(a::String, b::String) = cmp(a,b) == 0 -isless(a::String, b::String) = cmp(a,b) < 0 +==(a::String, b::String) = cmp(a,b) == 0 +< (a::String, b::String) = cmp(a,b) < 0 # begins with and ends with predicates @@ -515,19 +515,18 @@ function endswith(a::String, b::String) end endswith(a::String, c::Char) = !isempty(a) && a[end] == c -# faster comparisons for byte strings +# faster comparisons for byte strings and symbols -cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data) -isequal(a::ByteString, b::ByteString) = endof(a)==endof(b) && cmp(a,b)==0 -beginswith(a::ByteString, b::ByteString) = beginswith(a.data, b.data) +cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data) +cmp(a::Symbol, b::Symbol) = int(sign(ccall(:strcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}), a, b))) + +==(a::ByteString, b::ByteString) = endof(a) == endof(b) && cmp(a,b) == 0 +<(a::Symbol, b::Symbol) = cmp(a,b) < 0 +beginswith(a::ByteString, b::ByteString) = beginswith(a.data, b.data) beginswith(a::Array{Uint8,1}, b::Array{Uint8,1}) = (length(a) >= length(b) && ccall(:strncmp, Int32, (Ptr{Uint8}, Ptr{Uint8}, Uint), a, b, length(b)) == 0) -cmp(a::Symbol, b::Symbol) = - int(sign(ccall(:strcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}), a, b))) -isless(a::Symbol, b::Symbol) = cmp(a,b)<0 - # TODO: fast endswith ## character column width function ## diff --git a/base/sysimg.jl b/base/sysimg.jl index add8cffd3ea0d..c89ff144fdb59 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -175,7 +175,7 @@ big(q::Rational) = big(num(q))//big(den(q)) big(z::Complex) = complex(big(real(z)),big(imag(z))) @vectorize_1arg Number big -# moer hashing definitions +# more hashing definitions include("hashing2.jl") # random number generation and statistics diff --git a/base/version.jl b/base/version.jl index cb34c9cbafa5c..05966dbd7af9b 100644 --- a/base/version.jl +++ b/base/version.jl @@ -115,7 +115,7 @@ function ident_cmp(A::(Union(Int,ASCIIString)...), !done(A,i) && done(B,j) ? +1 : 0 end -function isequal(a::VersionNumber, b::VersionNumber) +function ==(a::VersionNumber, b::VersionNumber) (a.major != b.major) && return false (a.minor != b.minor) && return false (a.patch != b.patch) && return false @@ -126,7 +126,7 @@ end issupbuild(v::VersionNumber) = length(v.build)==1 && isempty(v.build[1]) -function isless(a::VersionNumber, b::VersionNumber) +function <(a::VersionNumber, b::VersionNumber) (a.major < b.major) && return true (a.major > b.major) && return false (a.minor < b.minor) && return true diff --git a/test/numbers.jl b/test/numbers.jl index d2fd2fc479b23..2afa54cb0fa37 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -382,8 +382,8 @@ end @test !isequal(+1.0,-1.0) @test !isequal(+Inf,-Inf) -@test !isequal(-0.0f0,-0.0) -@test !isequal( 0.0f0, 0.0) +@test isequal(-0.0f0,-0.0) +@test isequal( 0.0f0, 0.0) @test !isequal(-0.0f0, 0.0) @test !isequal(0.0f0 ,-0.0) @@ -459,8 +459,8 @@ end @test !isless(+NaN,-NaN) @test !isless(+NaN,+NaN) -@test !isequal( 0, 0.0) -@test !isequal( 0.0, 0) +@test isequal( 0, 0.0) +@test isequal( 0.0, 0) @test !isequal( 0,-0.0) @test !isequal(-0.0, 0) @test isless(-0.0, 0) diff --git a/test/ranges.jl b/test/ranges.jl index a544cbba8ece5..469c67a2d687e 100644 --- a/test/ranges.jl +++ b/test/ranges.jl @@ -245,12 +245,10 @@ let for r in Rs ar = collect(r) @test r != ar - @test !isequal(r, ar) + @test !isequal(r,ar) for s in Rs as = collect(s) - - @test !isequal(r, s) || hash(r)==hash(s) - + @test !isequal(r,s) || hash(r)==hash(s) @test (r==s) == (ar==as) end end From f11b2e8bb70ceffffb0a9b7d88da6723b32eed87 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Wed, 30 Apr 2014 19:40:31 -0400 Subject: [PATCH 14/18] `d[k] = v`: replace key when new values are assigned in dicts. Setup: julia> d = Dict() Dict{Any,Any}() julia> d[1] = "foo" "foo" julia> d {1=>"foo"} julia> d[1.0] = "bar" "bar" Before: julia> d {1=>"bar"} After: julia> d {1.0=>"bar"} The new behavior seems far less surprising to me. --- base/dict.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/base/dict.jl b/base/dict.jl index 8527690430037..bf0f5f32acd24 100644 --- a/base/dict.jl +++ b/base/dict.jl @@ -425,6 +425,7 @@ function setindex!{K,V}(h::Dict{K,V}, v0, key0) index = ht_keyindex2(h, key) if index > 0 + h.keys[index] = key h.vals[index] = v else _setindex!(h, v, key, -index) From f9cb1e3d001dbed0fc2d4b66a86b21a9e7e2acbc Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Thu, 1 May 2014 23:10:04 -0400 Subject: [PATCH 15/18] workaround for the Type{()} error --- src/gf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gf.c b/src/gf.c index 6d3a168122c37..0ac5ab8c2f133 100644 --- a/src/gf.c +++ b/src/gf.c @@ -467,6 +467,14 @@ static jl_function_t *cache_method(jl_methtable_t *mt, jl_tuple_t *type, jl_value_t *elt = jl_tupleref(type,i); jl_value_t *decl_i = nth_slot_type(decl,i); if (jl_is_type_type(elt) && jl_is_tuple(jl_tparam0(elt)) && + /* + NOTE: without this, () is sometimes specialized as () and + sometimes as Type{()}. In #6624, this caused a + TypeError(func=:tuplelen, context="", expected=(Any...,), got=Type{()}()) + inside ==, inside isstructtype. Not quite clear why, however. + */ + jl_tparam0(elt) != (jl_value_t*)jl_null && + !jl_is_type_type(decl_i)) { jl_methlist_t *curr = mt->defs; int ok=1; From b5e0b73b6e0e1c81bb0895ffca3bd516394836b2 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Fri, 2 May 2014 13:29:02 -0400 Subject: [PATCH 16/18] update docs for == and isequal --- doc/stdlib/base.rst | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/doc/stdlib/base.rst b/doc/stdlib/base.rst index 7a1aec99e3d81..c8e32dd51472d 100644 --- a/doc/stdlib/base.rst +++ b/doc/stdlib/base.rst @@ -139,34 +139,19 @@ All Objects .. function:: isequal(x, y) - True if and only if ``x`` and ``y`` would cause a typical function to behave the - same. A "typical" function is one that uses only intended interfaces, and does not - unreasonably exploit implementation details of its arguments. For example, - floating-point ``NaN`` values are ``isequal`` regardless of their sign bits, since - the sign of a ``NaN`` has no meaning in the vast majority of cases (but can be - discovered if you really want to). - - One implication of this definition is that implementing ``isequal`` for a new type - encapsulates, to a large extent, what the true abstraction presented by that type - is. For example, a ``String`` is a sequence of characters, so two strings are - ``isequal`` if they generate the same characters. Other concerns, such as encoding, - are not considered. - - When calling ``isequal``, be aware that it cannot be all things to all people. - For example, if your use of strings *does* care about encoding, you will have to - perform a check like ``typeof(x)==typeof(y) && isequal(x,y)``. - - ``isequal`` is the default comparison function used by hash tables (``Dict``). - ``isequal(x,y)`` must imply ``hash(x)==hash(y)``. + Similar to ``==``, except treats all floating-point ``NaN`` values as equal, and + treats ``-0.0`` as unequal to ``0.0``. Falls back to ``==``. - New types with a notion of equality should implement this function, except for - numbers, which should implement ``==`` instead. However, numeric types with special - values like ``NaN`` might need to implement ``isequal`` as well. Numbers of different - types are considered unequal. + ``isequal`` is the comparison function used by hash tables (``Dict``). + ``isequal(x,y)`` must imply ``hash(x)==hash(y)``. Mutable containers should generally implement ``isequal`` by calling ``isequal`` recursively on all contents. + Other new types generally do not need to implement this function, unless they + represent floating-point numbers amenable to a more efficient implementation + than that provided by a generic fallback (based on ``isnan``, ``signbit``, and ``==``). + .. function:: isless(x, y) Test whether ``x`` is less than ``y``, according to a canonical total order. @@ -2288,8 +2273,18 @@ Mathematical Operators .. _==: .. function:: ==(x, y) - Numeric equality operator. Compares numbers and number-like values (e.g. arrays) by numeric value. True for numbers of different types that represent the same value (e.g. ``2`` and ``2.0``). Follows IEEE semantics for floating-point numbers. - New numeric types should implement this function for two arguments of the new type. + Generic equality operator, giving a single ``Bool`` result. Falls back to ``===``. + Should be implemented for all types with a notion of equality, based + on the abstract value that an instance represents. For example, all numeric types are compared + by numeric value, ignoring type. Strings are compared as sequences of characters, ignoring + encoding. + + Follows IEEE semantics for floating-point numbers. + + Mutable containers should generally implement ``==`` by calling ``==`` recursively on all contents. + + New numeric types should implement this function for two arguments of the new type, and handle + comparison to other types via promotion rules where possible. .. _!=: .. function:: !=(x, y) From 9a48950a0729bed3cf2de72602ab991bb1cb4d63 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Fri, 2 May 2014 17:03:36 -0400 Subject: [PATCH 17/18] doc: some wording adjustments for isequal. --- doc/stdlib/base.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/doc/stdlib/base.rst b/doc/stdlib/base.rst index c8e32dd51472d..988e0a1cd1776 100644 --- a/doc/stdlib/base.rst +++ b/doc/stdlib/base.rst @@ -139,18 +139,19 @@ All Objects .. function:: isequal(x, y) - Similar to ``==``, except treats all floating-point ``NaN`` values as equal, and - treats ``-0.0`` as unequal to ``0.0``. Falls back to ``==``. + Similar to ``==``, except treats all floating-point ``NaN`` values as equal to each other + and greater than all other real values, and treats ``-0.0`` as unequal to ``0.0``. + For values that are not floating-point, ``isequal`` is the same as ``==``. ``isequal`` is the comparison function used by hash tables (``Dict``). - ``isequal(x,y)`` must imply ``hash(x)==hash(y)``. + ``isequal(x,y)`` must imply that ``hash(x) == hash(y)``. - Mutable containers should generally implement ``isequal`` by calling ``isequal`` - recursively on all contents. + Mutable containers typically implement ``isequal`` by calling ``isequal`` recursively on + all contents. - Other new types generally do not need to implement this function, unless they + Scalar types generally do not need to implement ``isequal``, unless they represent floating-point numbers amenable to a more efficient implementation - than that provided by a generic fallback (based on ``isnan``, ``signbit``, and ``==``). + than that provided as a generic fallback (based on ``isnan``, ``signbit``, and ``==``). .. function:: isless(x, y) From 9738dd3516ce65955e37ee910618a497cab6dd97 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 7 May 2014 14:49:01 -0400 Subject: [PATCH 18/18] restore former behavior of isless as a total order --- base/operators.jl | 3 ++- base/pkg/resolve/fieldvalue.jl | 2 +- base/pkg/resolve/versionweight.jl | 8 ++++---- base/string.jl | 4 ++-- base/version.jl | 2 +- doc/stdlib/base.rst | 8 ++++---- 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/base/operators.jl b/base/operators.jl index 5fdcbd03bb833..46f5e64ef956a 100644 --- a/base/operators.jl +++ b/base/operators.jl @@ -13,7 +13,6 @@ isequal(x::FloatingPoint, y::FloatingPoint) = (isnan(x) & isnan(y)) | (signbit(x isequal(x::Real, y::FloatingPoint) = (isnan(x) & isnan(y)) | (signbit(x) == signbit(y)) & (x == y) isequal(x::FloatingPoint, y::Real ) = (isnan(x) & isnan(y)) | (signbit(x) == signbit(y)) & (x == y) -isless(x::Any, y::Any) = x < y isless(x::FloatingPoint, y::FloatingPoint) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y) isless(x::Real, y::FloatingPoint) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y) isless(x::FloatingPoint, y::Real ) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y) @@ -27,6 +26,7 @@ isless(x::FloatingPoint, y::Real ) = (!isnan(x) & isnan(y)) | (signbit(x !=(x,y) = !(x==y) !==(x,y) = !is(x,y) +<(x,y) = isless(x,y) >(x,y) = y < x <=(x,y) = !(y < x) >=(x,y) = (y <= x) @@ -35,6 +35,7 @@ isless(x::FloatingPoint, y::Real ) = (!isnan(x) & isnan(y)) | (signbit(x # this definition allows Number types to implement < instead of isless, # which is more idiomatic: +isless(x::Real, y::Real) = x b.l0 && return false c = cmp(a.l1, b.l1) diff --git a/base/pkg/resolve/versionweight.jl b/base/pkg/resolve/versionweight.jl index e3c189c65a570..f89360e68918b 100644 --- a/base/pkg/resolve/versionweight.jl +++ b/base/pkg/resolve/versionweight.jl @@ -60,7 +60,7 @@ function Base.cmp{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) end return cmp(a.rest, b.rest) end -<{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = cmp(a,b) < 0 +Base.isless{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = cmp(a,b) < 0 =={T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = a.v == b.v && a.rest == b.rest Base.abs{T}(a::HierarchicalValue{T}) = HierarchicalValue(T[abs(x) for x in a.v], abs(a.rest)) @@ -88,7 +88,7 @@ function Base.cmp(a::VWPreBuildItem, b::VWPreBuildItem) c = cmp(a.s, b.s); c != 0 && return c return cmp(a.i, b.i) end -<(a::VWPreBuildItem, b::VWPreBuildItem) = cmp(a,b) < 0 +Base.isless(a::VWPreBuildItem, b::VWPreBuildItem) = cmp(a,b) < 0 ==(a::VWPreBuildItem, b::VWPreBuildItem) = cmp(a,b) == 0 Base.abs(a::VWPreBuildItem) = VWPreBuildItem(abs(a.nonempty), abs(a.s), abs(a.i)) @@ -125,7 +125,7 @@ function Base.cmp(a::VWPreBuild, b::VWPreBuild) c = cmp(a.nonempty, b.nonempty); c != 0 && return c return cmp(a.w, b.w) end -<(a::VWPreBuild, b::VWPreBuild) = cmp(a,b) < 0 +Base.isless(a::VWPreBuild, b::VWPreBuild) = cmp(a,b) < 0 ==(a::VWPreBuild, b::VWPreBuild) = cmp(a,b) == 0 Base.abs(a::VWPreBuild) = VWPreBuild(abs(a.nonempty), abs(a.w)) @@ -179,7 +179,7 @@ function Base.cmp(a::VersionWeight, b::VersionWeight) c = cmp(a.build, b.build); c != 0 && return c return cmp(a.uninstall, b.uninstall) end -<(a::VersionWeight, b::VersionWeight) = cmp(a,b) < 0 +Base.isless(a::VersionWeight, b::VersionWeight) = cmp(a,b) < 0 ==(a::VersionWeight, b::VersionWeight) = cmp(a,b) == 0 Base.abs(a::VersionWeight) = diff --git a/base/string.jl b/base/string.jl index 847b82de0caf2..1af1db70bfa8f 100644 --- a/base/string.jl +++ b/base/string.jl @@ -483,7 +483,7 @@ function cmp(a::String, b::String) end ==(a::String, b::String) = cmp(a,b) == 0 -< (a::String, b::String) = cmp(a,b) < 0 +isless(a::String, b::String) = cmp(a,b) < 0 # begins with and ends with predicates @@ -521,7 +521,7 @@ cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data) cmp(a::Symbol, b::Symbol) = int(sign(ccall(:strcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}), a, b))) ==(a::ByteString, b::ByteString) = endof(a) == endof(b) && cmp(a,b) == 0 -<(a::Symbol, b::Symbol) = cmp(a,b) < 0 +isless(a::Symbol, b::Symbol) = cmp(a,b) < 0 beginswith(a::ByteString, b::ByteString) = beginswith(a.data, b.data) beginswith(a::Array{Uint8,1}, b::Array{Uint8,1}) = diff --git a/base/version.jl b/base/version.jl index 05966dbd7af9b..b1b0fa99e6419 100644 --- a/base/version.jl +++ b/base/version.jl @@ -126,7 +126,7 @@ end issupbuild(v::VersionNumber) = length(v.build)==1 && isempty(v.build[1]) -function <(a::VersionNumber, b::VersionNumber) +function isless(a::VersionNumber, b::VersionNumber) (a.major < b.major) && return true (a.major > b.major) && return false (a.minor < b.minor) && return true diff --git a/doc/stdlib/base.rst b/doc/stdlib/base.rst index 988e0a1cd1776..c5db6ab905b32 100644 --- a/doc/stdlib/base.rst +++ b/doc/stdlib/base.rst @@ -139,14 +139,14 @@ All Objects .. function:: isequal(x, y) - Similar to ``==``, except treats all floating-point ``NaN`` values as equal to each other - and greater than all other real values, and treats ``-0.0`` as unequal to ``0.0``. + Similar to ``==``, except treats all floating-point ``NaN`` values as equal to each other, + and treats ``-0.0`` as unequal to ``0.0``. For values that are not floating-point, ``isequal`` is the same as ``==``. ``isequal`` is the comparison function used by hash tables (``Dict``). ``isequal(x,y)`` must imply that ``hash(x) == hash(y)``. - Mutable containers typically implement ``isequal`` by calling ``isequal`` recursively on + Collections typically implement ``isequal`` by calling ``isequal`` recursively on all contents. Scalar types generally do not need to implement ``isequal``, unless they @@ -2282,7 +2282,7 @@ Mathematical Operators Follows IEEE semantics for floating-point numbers. - Mutable containers should generally implement ``==`` by calling ``==`` recursively on all contents. + Collections should generally implement ``==`` by calling ``==`` recursively on all contents. New numeric types should implement this function for two arguments of the new type, and handle comparison to other types via promotion rules where possible.