From 155a6c884661f0de790a19c6648a16479cba7c95 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Mon, 24 Feb 2014 17:32:18 -0500
Subject: [PATCH 01/18] WIP: new approach to efficiently hashing 1, 1.0,
 big(1), the same.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The major change is that numeric values that are mathematically
equal hash the same. This is tricky to do in a way that allows the
hashing of Int64, Float64, Uint64 and such common numeric types to
all be fast – nearly as fast as just applying the core hash to them
as raw bits.

Although tests pass, this is an inherently half-baked state since
isequal and hash are now badly out of sync. Many decisions still
need to be made about how to hash collections: does the collection
type matter or just the element type of the collection? Or neither?

This also does away with the bitmix function, instead favoring a
Merkle-Damgård style of combining arbitrary amounts of data into a
single, fixed-size hash value output. In a sense the hash function
with two arguments replaces the bitmix function – you can give the
result of hashing previous values as the second argument to hash
and the result will depend on both in a difficult-to-predict way.
---
 base/base.jl             |   3 +-
 base/bitarray.jl         |   4 -
 base/complex.jl          |   2 -
 base/dict.jl             |  98 ----------------------
 base/exports.jl          |   1 -
 base/float16.jl          |   2 -
 base/hashing.jl          | 174 +++++++++++++++++++++++++++++++++++++++
 base/hashing2.jl         |  58 +++++++++++++
 base/mpfr.jl             |  22 +----
 base/multi.jl            |   2 +-
 base/multidimensional.jl |   4 +-
 base/pkg/types.jl        |   2 +-
 base/precompile.jl       |   3 -
 base/profile.jl          |   7 +-
 base/random.jl           |   6 +-
 base/range.jl            |   4 -
 base/rational.jl         |   2 -
 base/set.jl              |   2 -
 base/string.jl           |  16 ----
 base/sysimg.jl           |   4 +
 base/utf8proc.jl         |   2 +-
 base/version.jl          |   9 +-
 test/hashing.jl          |   4 -
 23 files changed, 261 insertions(+), 170 deletions(-)
 create mode 100644 base/hashing.jl
 create mode 100644 base/hashing2.jl

diff --git a/base/base.jl b/base/base.jl
index 1309006009ab1..004fcabd67e85 100644
--- a/base/base.jl
+++ b/base/base.jl
@@ -108,11 +108,12 @@ type Colon
 end
 const (:) = Colon()
 
-hash(w::WeakRef) = hash(w.value)
 isequal(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
 isequal(w::WeakRef, v) = isequal(w.value, v)
 isequal(w, v::WeakRef) = isequal(w, v.value)
 
+hash(w::WeakRef, h::Uint=zero(Uint)) = hash(w.value, h)
+
 function finalizer(o::ANY, f::Union(Function,Ptr))
     if isimmutable(o)
         error("objects of type ", typeof(o), " cannot be finalized")
diff --git a/base/bitarray.jl b/base/bitarray.jl
index 23fa71472a857..9f92e665ff195 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -1828,7 +1828,3 @@ end
 # hvcat -> use fallbacks in abstractarray.jl
 
 isequal(A::BitArray, B::BitArray) = (A == B)
-
-# Hashing
-
-hash(B::BitArray) = hash((size(B), B.chunks))
diff --git a/base/complex.jl b/base/complex.jl
index 29c89042b653e..7203fc4788c43 100644
--- a/base/complex.jl
+++ b/base/complex.jl
@@ -99,8 +99,6 @@ end
 
 isequal(z::Complex, w::Complex) = isequal(real(z),real(w)) & isequal(imag(z),imag(w))
 
-hash(z::Complex) = bitmix(hash(real(z)),hash(imag(z)))
-
 conj(z::Complex) = Complex(real(z),-imag(z))
 abs(z::Complex)  = hypot(real(z), imag(z))
 abs2(z::Complex) = real(z)*real(z) + imag(z)*imag(z)
diff --git a/base/dict.jl b/base/dict.jl
index 945a4faa48df2..8527690430037 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -94,14 +94,6 @@ filter(f::Function, d::Associative) = filter!(f,copy(d))
 
 eltype{K,V}(a::Associative{K,V}) = (K,V)
 
-function hash(d::Associative)
-    h::Uint = 0
-    for (k,v) in d
-        h $= bitmix(hash(k),~hash(v))
-    end
-    h
-end
-
 function isequal(l::Associative, r::Associative)
     if isa(l,ObjectIdDict) != isa(r,ObjectIdDict)
         return false
@@ -201,96 +193,6 @@ function length(d::ObjectIdDict)
     n
 end
 
-# hashing
-
-function int32hash(n::Uint32)
-    local a::Uint32 = n
-    a = (a + 0x7ed55d16) + a << 12
-    a = (a $ 0xc761c23c) $ a >> 19
-    a = (a + 0x165667b1) + a << 5
-    a = (a + 0xd3a2646c) $ a << 9
-    a = (a + 0xfd7046c5) + a << 3
-    a = (a $ 0xb55a4f09) $ a >> 16
-    return a
-end
-
-function int64hash(n::Uint64)
-    local a::Uint64 = n
-    a = ~a + (a << 21)
-    a =  a $ (a >> 24)
-    a = (a + (a << 3)) + (a << 8)
-    a =  a $ (a >> 14)
-    a = (a + (a << 2)) + (a << 4)
-    a =  a $ (a >> 28)
-    a =  a + (a << 31)
-    return a
-end
-
-function int64to32hash(n::Uint64)
-    local key::Uint64 = n
-    key = ~key + (key << 18)
-    key =  key $ (key >> 31)
-    key =  key * 21
-    key =  key $ (key >> 11)
-    key =  key + (key << 6 )
-    key =  key $ (key >> 22)
-    return uint32(key)
-end
-
-bitmix(a::Union(Int32,Uint32), b::Union(Int32,Uint32)) = int64to32hash((uint64(a)<<32)|uint64(b))
-bitmix(a::Union(Int64,Uint64), b::Union(Int64, Uint64)) = int64hash(uint64(a$((b<<32)|(b>>>32))))
-
-if WORD_SIZE == 64
-    hash64(x::Float64) = int64hash(reinterpret(Uint64,x))
-    hash64(x::Union(Int64,Uint64)) = int64hash(reinterpret(Uint64,x))
-else
-    hash64(x::Float64) = int64to32hash(reinterpret(Uint64,x))
-    hash64(x::Union(Int64,Uint64)) = int64to32hash(reinterpret(Uint64,x))
-end
-
-hash(x::Union(Bool,Char,Int8,Uint8,Int16,Uint16,Int32,Uint32,Int64,Uint64)) =
-    hash64(uint64(x))
-
-function hash(x::Integer)
-    h::Uint = hash(uint64(x&0xffffffffffffffff))
-    if typemin(Int64) <= x <= typemax(Uint64)
-        return h
-    end
-    x >>>= 64
-    while x != 0 && x != -1
-        h = bitmix(h, hash(uint64(x&0xffffffffffffffff)))
-        x >>>= 64
-    end
-    return h
-end
-
-hash(x::Float32) = hash(reinterpret(Uint32, ifelse(isnan(x), NaN32, x)))
-hash(x::Float64) = hash(reinterpret(Uint64, ifelse(isnan(x), NaN, x)))
-
-function hash(t::Tuple)
-    h::Uint = 0
-    for i=1:length(t)
-        h = bitmix(h,int(hash(t[i]))+42)
-    end
-    return h
-end
-
-function hash(a::AbstractArray)
-    h::Uint = hash(size(a))+1
-    for i=1:length(a)
-        h = bitmix(h,int(hash(a[i])))
-    end
-    return h
-end
-
-# make sure Array{Bool} and BitArray can be equivalent
-hash(a::AbstractArray{Bool}) = hash(bitpack(a))
-
-hash(x::ANY) = object_id(x)
-
-hash(x::Expr) = bitmix(hash(x.head),hash(x.args)+43)
-
-
 # dict
 
 type Dict{K,V} <: Associative{K,V}
diff --git a/base/exports.jl b/base/exports.jl
index 8eef21bbbf561..e07b86ec8f352 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -279,7 +279,6 @@ export
     atanh,
     big,
     binomial,
-    bitmix,
     bool,
     bswap,
     cbrt,
diff --git a/base/float16.jl b/base/float16.jl
index 303529d2afe7d..158ed6dbf3e4c 100644
--- a/base/float16.jl
+++ b/base/float16.jl
@@ -139,5 +139,3 @@ hypot(a::Float16, b::Float16) = float16(hypot(float32(a), float32(b)))
 ldexp(a::Float16, b::Integer) = float16(ldexp(float32(a), b))
 exponent(x::Float16) = exponent(float32(x))
 ^(x::Float16, y::Integer) = x^float16(y)
-
-hash(x::Float16) = hash(reinterpret(Uint16, isnan(x) ? NaN16 : x))
diff --git a/base/hashing.jl b/base/hashing.jl
new file mode 100644
index 0000000000000..09e0d48b65002
--- /dev/null
+++ b/base/hashing.jl
@@ -0,0 +1,174 @@
+## core data hashing functions ##
+
+function hash_uint(n::Uint64)
+    local a::Uint64 = n
+    a = ~a + a << 21
+    a =  a $ a >> 24
+    a =  a + a << 3 + a << 8
+    a =  a $ a >> 14
+    a =  a + a << 2 + a << 4
+    a =  a $ a >> 28
+    a =  a + a << 31
+    return a
+end
+
+function hash_uint(n::Uint32)
+    local a::Uint32 = n
+    a = a + 0x7ed55d16 + a << 12
+    a = a $ 0xc761c23c $ a >> 19
+    a = a + 0x165667b1 + a << 5
+    a = a + 0xd3a2646c $ a << 9
+    a = a + 0xfd7046c5 + a << 3
+    a = a $ 0xb55a4f09 $ a >> 16
+    return a
+end
+
+## efficient value-based hashing of integers ##
+
+function hash_integer(n::Integer, h::Uint=zero(Uint))
+    h = hash_uint(uint(n & typemax(Uint)) $ h) $ h
+    n = ifelse(n < 0, oftype(n,-n), n)
+    n >>>= sizeof(Uint) << 3
+    while n != 0
+        h = hash_uint(uint(n & typemax(Uint)) $ h) $ h
+        n >>>= sizeof(Uint) << 3
+    end
+    return h
+end
+
+## hashing rational values ##
+
+#=
+`decompose(x)`: non-canonical decomposition of rational values as `den*2^pow/num`.
+
+The decompose function is the point where rational-valued numeric types that support
+hashing hook into the hashing protocol. `decompose(x)` should return three integer
+values `num, pow, den`, such that the value of `x` is mathematically equal to
+
+    num*2^pow/den
+
+The decomposition need not be canonical in the sense that it just needs to be *some*
+way to express `x` in this form, not any particular way – with the restriction that
+`num` and `den` may not share any odd common factors. They may, however, have powers
+of two in common – the generic hashing code will normalize those as necessary.
+
+Special values:
+
+ - `x` is zero: `num` should be zero and `den` should have the same sign as `x`
+ - `x` is infinite: `den` should be zero and `num` should have the same sign as `x`
+ - `x` is not a number: `num` and `den` should both be zero
+=#
+
+decompose(x::Integer) = x, 0, 1
+decompose(x::Rational) = num(x), 0, den(x)
+
+function decompose(x::Float32)
+    isnan(x) && return 0, 0, 0
+    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
+    n = reinterpret(Int32, x)
+    s = int32(n & 0x007fffff)
+    e = int32(n & 0x7f800000 >> 23)
+    s |= int32(e != 0) << 23
+    d = ifelse(signbit(n) == 1, -1, 1)
+    int(s), int(e - 150 + (e == 0)), d
+end
+
+function decompose(x::Float64)
+    isnan(x) && return 0, 0, 0
+    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
+    n = reinterpret(Int64, x)
+    s = int64(n & 0x000fffffffffffff)
+    e = int64(n & 0x7ff0000000000000 >> 52)
+    s |= int64(e != 0) << 52
+    d = ifelse(signbit(n) == 1, -1, 1)
+    int(s), int(e - 1075 + (e == 0)), d
+end
+
+# hashing methods for rational-valued types
+
+hx(a::Uint64, b::Float64, h::Uint) = hash_uint((3a + reinterpret(Uint64,b)) - h)
+
+hash(x::Uint64,  h::Uint=zero(Uint)) = hx(x, float64(x), h)
+hash(x::Int64,   h::Uint=zero(Uint)) = hx(reinterpret(Uint64,x), float64(x), h)
+hash(x::Float64, h::Uint=zero(Uint)) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(x==x,x,NaN), h)
+
+hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32)) = hash(int64(x))
+hash(x::Union(Float16,Float32)) = hash(float64(x))
+
+const hash_NaN = hash(NaN)
+const hash_pos_Inf = hash(+Inf)
+const hash_neg_Inf = hash(-Inf)
+const hash_pos_zero = hash(+0.)
+const hash_neg_zero = hash(-0.)
+
+function hash(x::Real, h::Uint=zero(Uint))
+    # decompose x as num*2^pow/den
+    num, pow, den = decompose(x)::(Integer,Integer,Integer)
+
+    # handle special values
+    num == 0 && den == 0 && return hash(NaN, h)
+    if num == 0
+        den > 0 && return hash(+0.0, h)
+        den < 0 && return hash(-0.0, h)
+    end
+    if den == 0
+        num > 0 && return hash(+Inf, h)
+        num < 0 && return hash(-Inf, h)
+    end
+
+    # normalize decomposition
+    if den < 0
+        num = -num
+        den = -den
+    end
+    z = trailing_zeros(num)
+    if z != 0
+        num >>= z
+        pow += z
+    end
+    z = trailing_zeros(den)
+    if z != 0
+        den >>= z
+        pow -= z
+    end
+
+    # handle values representable as Int64, Uint64, Float64
+    if den == 1
+        left = ndigits0z(num,2) + pow
+        right = trailing_zeros(num) + pow
+        if -1074 <= right
+            if 0 <= right && left <= 64
+                left <= 63                     && return hash(int64(num) << int(pow), h)
+                signbit(num) == signbit(den)   && return hash(uint64(num) << int(pow), h)
+            end
+            left <= 1024 && left - right <= 53 && return hash(float64(num) * 2.0^pow, h)
+        end
+    end
+
+    # handle "generic" real values
+    h = hash_integer(den, h)
+    h = hash_integer(pow, h)
+    h = hash_integer(num, h)
+    return h
+end
+
+## hashing complex values ##
+
+const h_imag = 0x32a7a07f3e7cd1f9
+const hash_0_imag = hash(0, h_imag)
+
+function hash(z::Complex, h::Uint=zero(Uint))
+    # TODO: with default argument specialization, this would be better:
+    # hash(real(z), h $ hash(imag(z), h $ h_imag) $ hash(0, h $ h_imag))
+    hash(real(z), h $ hash(imag(z), h_imag) $ hash_0_imag)
+end
+
+## special hashing for booleans and characters ##
+
+hash(x::Bool, h::Uint=zero(Uint)) = hash(int(x), h + 0x4cd135a1755139a5)
+hash(x::Char, h::Uint=zero(Uint)) = hash(int(x), h + 0x10f989ff0f886f11)
+
+## expression hashing ##
+
+hash(x::Symbol, h::Uint=zero(Uint)) = hash(object_id(x), h)
+hash(x::Expr, h::Uint=zero(Uint)) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6))
diff --git a/base/hashing2.jl b/base/hashing2.jl
new file mode 100644
index 0000000000000..ebb459465655a
--- /dev/null
+++ b/base/hashing2.jl
@@ -0,0 +1,58 @@
+## hashing BigInts, BigFloats, and Float16s ##
+
+function hash_integer(n::BigInt, h::Uint=zero(Uint))
+    s = n.size
+    s == 0 && return hash_integer(0, h)
+    p = convert(Ptr{Uint}, n.d)
+    b = unsafe_load(p)
+    h = hash_uint(ifelse(s < 0, -b, b) $ h) $ h
+    for k = 2:abs(s)
+        h = hash_uint(unsafe_load(p, k) $ h) $ h
+    end
+    return h
+end
+
+function decompose(x::BigFloat)
+    isnan(x) && return big(0), 0, 0
+    isinf(x) && return big(x.sign), 0, 0
+    x == 0 && return big(0), 0, int(x.sign)
+    s = BigInt()
+    ccall((:__gmpz_realloc2, :libgmp), Void, (Ptr{BigInt}, Culong), &s, x.prec)
+    s.size = -fld(-x.prec,(sizeof(Culong)<<3))
+    ccall(:memcpy, Ptr{Void}, (Ptr{Void}, Ptr{Void}, Csize_t), s.d, x.d, s.size*sizeof(Culong))
+    s, int(x.exp - x.prec), int(x.sign)
+end
+
+hash(x::Float16, h::Uint=zero(Uint)) = hash(float64(x), h)
+
+## hashing strings ##
+
+function hash{T<:ByteString}(s::Union(T,SubString{T}), h::Uint=zero(Uint))
+    h += 0x71e729fd56419c81
+    ccall(:memhash_seed, Uint64, (Ptr{Void}, Int, Uint32), pointer(s), sizeof(s), h) + h
+end
+hash(s::String, h::Uint=zero(Uint)) = hash(bytestring(s), h)
+
+## hashing collections ##
+
+function hash(v::Union(Tuple,AbstractArray,Associative), h::Uint=zero(Uint))
+    h += object_id(eltype(v))
+    for x = v
+        h = hash(x, h)
+    end
+    return h
+end
+
+hash(s::Set, h::Uint=zero(Uint)) = hash(sort(s.dict.keys[s.dict.slots .!= 0]), h)
+
+hash(r::Range{Bool}, h::Uint=zero(Uint)) = invoke(hash, (Range, Uint), r, h)
+hash(B::BitArray, h::Uint=zero(Uint)) = hash((size(B),B.chunks), h)
+hash(a::AbstractArray{Bool}, h::Uint=zero(Uint)) = hash(bitpack(a), h)
+
+# hashing ranges by component at worst leads to collisions for very similar ranges
+hash{T<:Range}(r::T, h::Uint=zero(Uint)) =
+    hash(first(r), hash(step(r), hash(last(r), h + object_id(eltype(T)))))
+
+## hashing general objects and expressions ##
+
+hash(x::ANY,  h::Uint=zero(Uint)) = hash(object_id(x), h)
diff --git a/base/mpfr.jl b/base/mpfr.jl
index 744c39e0d124e..d4720edd98ffd 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -16,7 +16,7 @@ import
         gamma, lgamma, digamma, erf, erfc, zeta, log1p, airyai, iceil, ifloor,
         itrunc, eps, signbit, sin, cos, tan, sec, csc, cot, acos, asin, atan,
         cosh, sinh, tanh, sech, csch, coth, acosh, asinh, atanh, atan2,
-        serialize, deserialize, inf, nan, hash, cbrt, typemax, typemin,
+        serialize, deserialize, inf, nan, cbrt, typemax, typemin,
         realmin, realmax, get_rounding, set_rounding, maxintfloat, widen
 
 import Base.GMP: ClongMax, CulongMax
@@ -714,24 +714,4 @@ print(io::IO, b::BigFloat) = print(io, string(b))
 show(io::IO, b::BigFloat) = print(io, string(b), " with $(precision(b)) bits of precision")
 showcompact(io::IO, b::BigFloat) = print(io, string(b))
 
-function hash(x::BigFloat)
-    if isnan(x)
-        return hash(NaN)
-    end
-    if isinf(x)
-        return hash(float64(x))
-    end
-    n = ceil(precision(x)/53)
-    e = exponent(x)
-    h::Uint = signbit(x)
-    h = h<<30 + e
-    x = ldexp(x, -e)
-    for i=1:n
-        f64 = float64(x)
-        h = bitmix(h, hash(f64)$11111)
-        x -= f64
-    end
-    h
-end
-
 end #module
diff --git a/base/multi.jl b/base/multi.jl
index 847cefa9d164e..243170317396f 100644
--- a/base/multi.jl
+++ b/base/multi.jl
@@ -448,7 +448,7 @@ type RemoteRef
     next_id() = (id=(myid(),REQ_ID); REQ_ID+=1; id)
 end
 
-hash(r::RemoteRef) = hash(r.whence)+3*hash(r.id)
+hash(r::RemoteRef, h::Uint=zero(Uint)) = hash(r.whence, hash(r.id, h))
 isequal(r::RemoteRef, s::RemoteRef) = (r.whence==s.whence && r.id==s.id)
 
 rr2id(r::RemoteRef) = (r.whence, r.id)
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index 0b78db5b085d0..cde16603f0e9b 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -480,6 +480,8 @@ end
 
 ## unique across dim
 
+# TODO: this doesn't fit into the new hashing scheme in any obvious way
+
 immutable Prehashed
     hash::Uint
 end
@@ -492,7 +494,7 @@ hash(x::Prehashed) = x.hash
     # Compute hash for each row
     k = 0
     @nloops N i A d->(if d == dim; k = i_d; end) begin
-       @inbounds hashes[k] = bitmix(hashes[k], hash((@nref N A i)))
+       @inbounds hashes[k] = hash(hashes[k], hash((@nref N A i)))
     end
 
     # Collect index of first row for each hash
diff --git a/base/pkg/types.jl b/base/pkg/types.jl
index c2d75ffe13a47..2f75639ed079b 100644
--- a/base/pkg/types.jl
+++ b/base/pkg/types.jl
@@ -44,7 +44,7 @@ function intersect(A::VersionSet, B::VersionSet)
     VersionSet(ivals)
 end
 isequal(A::VersionSet, B::VersionSet) = (A.intervals == B.intervals)
-hash(s::VersionSet) = hash(s.intervals)
+hash(s::VersionSet, h::Uint=zero(Uint)) = hash(s.intervals, h + 0x2fd2ca6efa023f44)
 deepcopy_internal(vs::VersionSet, ::ObjectIdDict) = VersionSet(copy(vs.intervals))
 
 typealias Requires Dict{ByteString,VersionSet}
diff --git a/base/precompile.jl b/base/precompile.jl
index fcdfad42cea5f..44200d159789b 100644
--- a/base/precompile.jl
+++ b/base/precompile.jl
@@ -52,9 +52,6 @@ precompile(bool, (RemoteRef,))
 precompile(wait, (RemoteRef,))
 precompile(hash, (RemoteRef,))
 precompile(take, (RemoteRef,))
-precompile(bitmix, (Int, Int))
-precompile(bitmix, (Uint, Int))
-precompile(bitmix, (Uint64, Int64))
 precompile(hash, (Int,))
 precompile(isequal, (Symbol, Symbol))
 precompile(isequal, (Bool, Bool))
diff --git a/base/profile.jl b/base/profile.jl
index 65277f42b1f7a..4fb6f2a9d8b92 100644
--- a/base/profile.jl
+++ b/base/profile.jl
@@ -76,7 +76,12 @@ const UNKNOWN = LineInfo("?", "?", -1)
 
 isequal(a::LineInfo, b::LineInfo) = a.line == b.line && a.func == b.func && a.file == b.file
 
-hash(li::LineInfo) = bitmix(hash(li.func), bitmix(hash(li.file), hash(li.line)))
+function hash(li::LineInfo, h::Uint=zero(Uint))
+    h += 0xf4fbda67fe20ce88
+    h = hash(li.line, h)
+    h = hash(li.file, h)
+    h = hash(li.func, h)
+end
 
 # C wrappers
 start_timer() = ccall(:jl_profile_start_timer, Cint, ())
diff --git a/base/random.jl b/base/random.jl
index 172322493d0a8..abc9757990b34 100644
--- a/base/random.jl
+++ b/base/random.jl
@@ -39,11 +39,9 @@ function __init__()
     catch
         println(STDERR, "Entropy pool not available to seed RNG; using ad-hoc entropy sources.")
         seed = reinterpret(Uint64, time())
-        seed = bitmix(seed, uint64(getpid()))
+        seed = hash(seed, uint64(getpid()))
         try
-            seed = bitmix(seed, parseint(Uint64, readall(`ifconfig` |> `sha1sum`)[1:40], 16))
-        catch
-            # ignore
+        seed = hash(seed, parseint(Uint64, readall(`ifconfig` |> `sha1sum`)[1:40], 16))
         end
         srand(seed)
     end
diff --git a/base/range.jl b/base/range.jl
index c86611f9f093d..910637c880505 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -299,10 +299,6 @@ function ==(r::Range, s::Range)
     return true
 end
 
-# hashing ranges by component at worst leads to collisions for very similar ranges
-hash(r::Range) =
-    bitmix(hash(first(r)), bitmix(hash(step(r)), bitmix(hash(last(r)), uint(0xaaeeaaee))))
-
 # TODO: isless?
 
 intersect{T1<:Integer, T2<:Integer}(r::UnitRange{T1}, s::UnitRange{T2}) = max(r.start,s.start):min(last(r),last(s))
diff --git a/base/rational.jl b/base/rational.jl
index 2c22105a99465..e626c4487c69c 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -110,8 +110,6 @@ typemax{T<:Integer}(::Type{Rational{T}}) = one(T)//zero(T)
 
 isinteger(x::Rational) = x.den == 1
 
-hash(x::Rational) = bitmix(hash(x.num), ~hash(x.den))
-
 -(x::Rational) = (-x.num) // x.den
 for op in (:+, :-, :rem, :mod)
     @eval begin
diff --git a/base/set.jl b/base/set.jl
index ee3421c78924c..a83cf3a1e98a6 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -107,5 +107,3 @@ function filter!(f::Function, s::Set)
     return s
 end
 filter(f::Function, s::Set) = filter!(f, copy(s))
-
-hash(s::Set) = hash(sort(s.dict.keys[s.dict.slots .!= 0]))
diff --git a/base/string.jl b/base/string.jl
index 3ee7f3fa337ee..c7be7e7dea612 100644
--- a/base/string.jl
+++ b/base/string.jl
@@ -1678,19 +1678,3 @@ pointer{T<:ByteString}(x::SubString{T}, i::Integer) = pointer(x.string.data) + x
 pointer(x::Union(UTF16String,UTF32String), i::Integer) = pointer(x)+(i-1)*sizeof(eltype(x.data))
 pointer{T<:Union(UTF16String,UTF32String)}(x::SubString{T}) = pointer(x.string.data) + x.offset*sizeof(eltype(x.data))
 pointer{T<:Union(UTF16String,UTF32String)}(x::SubString{T}, i::Integer) = pointer(x.string.data) + (x.offset + (i-1))*sizeof(eltype(x.data))
-
-# string hashing:
-if WORD_SIZE == 64
-    hash{T<:ByteString}(s::Union(T,SubString{T})) =
-        ccall(:memhash, Uint64, (Ptr{Void}, Int), pointer(s), sizeof(s))
-    hash{T<:ByteString}(s::Union(T,SubString{T}), seed::Union(Int,Uint)) =
-        ccall(:memhash_seed, Uint64, (Ptr{Void}, Int, Uint32),
-              pointer(s), sizeof(s), uint32(seed))
-else
-    hash{T<:ByteString}(s::Union(T,SubString{T})) =
-        ccall(:memhash32, Uint32, (Ptr{Void}, Int), pointer(s), sizeof(s))
-    hash{T<:ByteString}(s::Union(T,SubString{T}), seed::Union(Int,Uint)) =
-        ccall(:memhash32_seed, Uint32, (Ptr{Void}, Int, Uint32),
-              pointer(s), sizeof(s), uint32(seed))
-end
-hash(s::String) = hash(bytestring(s))
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 0cc475085094e..add8cffd3ea0d 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -58,6 +58,7 @@ include("bitarray.jl")
 include("intset.jl")
 include("dict.jl")
 include("set.jl")
+include("hashing.jl")
 include("iterator.jl")
 
 # compiler
@@ -174,6 +175,9 @@ big(q::Rational) = big(num(q))//big(den(q))
 big(z::Complex) = complex(big(real(z)),big(imag(z)))
 @vectorize_1arg Number big
 
+# moer hashing definitions
+include("hashing2.jl")
+
 # random number generation and statistics
 include("statistics.jl")
 include("librandom.jl")
diff --git a/base/utf8proc.jl b/base/utf8proc.jl
index 32e5d21011b72..d1d7427289d3c 100644
--- a/base/utf8proc.jl
+++ b/base/utf8proc.jl
@@ -1,7 +1,7 @@
 # Various Unicode functionality from the utf8proc library
 module UTF8proc
 
-import Base: show, showcompact, ==, string, symbol, isless, hash
+import Base: show, showcompact, ==, string, symbol, isless
 
 # also exported by Base:
 export normalize_string, is_valid_char, is_assigned_char
diff --git a/base/version.jl b/base/version.jl
index 9fd46b1821c04..86b55b2141c3b 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -145,7 +145,14 @@ function isless(a::VersionNumber, b::VersionNumber)
     return false
 end
 
-hash(v::VersionNumber) = hash([v.(n) for n in VersionNumber.names])
+function hash(v::VersionNumber, h::Uint=zero(Uint))
+    h += 0x8ff4ffdb75f9fede
+    h = hash(v.major, h)
+    h = hash(v.minor, h)
+    h = hash(v.patch, h)
+    h = hash(v.prerelease, ~h)
+    h = hash(v.build, ~h)
+end
 
 lowerbound(v::VersionNumber) = VersionNumber(v.major, v.minor, v.patch, ("",), ())
 upperbound(v::VersionNumber) = VersionNumber(v.major, v.minor, v.patch, (), ("",))
diff --git a/test/hashing.jl b/test/hashing.jl
index f713ccd929131..6b2b767e54229 100644
--- a/test/hashing.jl
+++ b/test/hashing.jl
@@ -35,7 +35,3 @@ end
 @test hash(:(X.x)) != hash(:(X.y))
 
 @test hash([1,2]) == hash(sub([1,2,3,4],1:2))
-
-# make sure >>> is used
-@test bitmix(2, -3) != bitmix(2, -4)
-@test bitmix(-3, 2) != bitmix(-4, 2)

From b3a7c84d8aada3d6524d606bfe8fd03228e829c8 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Thu, 24 Apr 2014 16:27:07 -0400
Subject: [PATCH 02/18] decompose: fix documentation of decompose for rational
 hashing.

---
 base/hashing.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/hashing.jl b/base/hashing.jl
index 09e0d48b65002..8b8fbbe73c9ae 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -39,7 +39,7 @@ end
 ## hashing rational values ##
 
 #=
-`decompose(x)`: non-canonical decomposition of rational values as `den*2^pow/num`.
+`decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`.
 
 The decompose function is the point where rational-valued numeric types that support
 hashing hook into the hashing protocol. `decompose(x)` should return three integer

From dadc7cf62c815a904cfd4b22a7d1a91276b9fbec Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Thu, 24 Apr 2014 17:02:36 -0400
Subject: [PATCH 03/18] define hash(x) = hash(x, zero(Uint)) in a single place;
 fix bugs.

---
 base/base.jl        |  2 +-
 base/hashing.jl     | 34 ++++++++++++++++------------------
 base/hashing2.jl    | 22 +++++++++++-----------
 base/multi.jl       |  2 +-
 base/pkg/types.jl   |  2 +-
 base/profile.jl     |  2 +-
 base/version.jl     |  2 +-
 test/arrayops.jl    |  2 +-
 test/collections.jl |  2 +-
 9 files changed, 34 insertions(+), 36 deletions(-)

diff --git a/base/base.jl b/base/base.jl
index 004fcabd67e85..44bb21bcaa504 100644
--- a/base/base.jl
+++ b/base/base.jl
@@ -112,7 +112,7 @@ isequal(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
 isequal(w::WeakRef, v) = isequal(w.value, v)
 isequal(w, v::WeakRef) = isequal(w, v.value)
 
-hash(w::WeakRef, h::Uint=zero(Uint)) = hash(w.value, h)
+hash(w::WeakRef, h::Uint) = hash(w.value, h)
 
 function finalizer(o::ANY, f::Union(Function,Ptr))
     if isimmutable(o)
diff --git a/base/hashing.jl b/base/hashing.jl
index 8b8fbbe73c9ae..bfd7b9b9e77f8 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -1,3 +1,7 @@
+## hashing a single value ##
+
+hash(x::Any) = hash(x, zero(Uint))
+
 ## core data hashing functions ##
 
 function hash_uint(n::Uint64)
@@ -25,7 +29,7 @@ end
 
 ## efficient value-based hashing of integers ##
 
-function hash_integer(n::Integer, h::Uint=zero(Uint))
+function hash_integer(n::Integer, h::Uint)
     h = hash_uint(uint(n & typemax(Uint)) $ h) $ h
     n = ifelse(n < 0, oftype(n,-n), n)
     n >>>= sizeof(Uint) << 3
@@ -88,20 +92,14 @@ end
 
 hx(a::Uint64, b::Float64, h::Uint) = hash_uint((3a + reinterpret(Uint64,b)) - h)
 
-hash(x::Uint64,  h::Uint=zero(Uint)) = hx(x, float64(x), h)
-hash(x::Int64,   h::Uint=zero(Uint)) = hx(reinterpret(Uint64,x), float64(x), h)
-hash(x::Float64, h::Uint=zero(Uint)) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(x==x,x,NaN), h)
-
-hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32)) = hash(int64(x))
-hash(x::Union(Float16,Float32)) = hash(float64(x))
+hash(x::Uint64,  h::Uint) = hx(x, float64(x), h)
+hash(x::Int64,   h::Uint) = hx(reinterpret(Uint64,x), float64(x), h)
+hash(x::Float64, h::Uint) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(x==x,x,NaN), h)
 
-const hash_NaN = hash(NaN)
-const hash_pos_Inf = hash(+Inf)
-const hash_neg_Inf = hash(-Inf)
-const hash_pos_zero = hash(+0.)
-const hash_neg_zero = hash(-0.)
+hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32), h::Uint) = hash(int64(x), h)
+hash(x::Float32, h::Uint) = hash(float64(x), h)
 
-function hash(x::Real, h::Uint=zero(Uint))
+function hash(x::Real, h::Uint)
     # decompose x as num*2^pow/den
     num, pow, den = decompose(x)::(Integer,Integer,Integer)
 
@@ -157,7 +155,7 @@ end
 const h_imag = 0x32a7a07f3e7cd1f9
 const hash_0_imag = hash(0, h_imag)
 
-function hash(z::Complex, h::Uint=zero(Uint))
+function hash(z::Complex, h::Uint)
     # TODO: with default argument specialization, this would be better:
     # hash(real(z), h $ hash(imag(z), h $ h_imag) $ hash(0, h $ h_imag))
     hash(real(z), h $ hash(imag(z), h_imag) $ hash_0_imag)
@@ -165,10 +163,10 @@ end
 
 ## special hashing for booleans and characters ##
 
-hash(x::Bool, h::Uint=zero(Uint)) = hash(int(x), h + 0x4cd135a1755139a5)
-hash(x::Char, h::Uint=zero(Uint)) = hash(int(x), h + 0x10f989ff0f886f11)
+hash(x::Bool, h::Uint) = hash(int(x), h + 0x4cd135a1755139a5)
+hash(x::Char, h::Uint) = hash(int(x), h + 0x10f989ff0f886f11)
 
 ## expression hashing ##
 
-hash(x::Symbol, h::Uint=zero(Uint)) = hash(object_id(x), h)
-hash(x::Expr, h::Uint=zero(Uint)) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6))
+hash(x::Symbol, h::Uint) = hash(object_id(x), h)
+hash(x::Expr, h::Uint) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6))
diff --git a/base/hashing2.jl b/base/hashing2.jl
index ebb459465655a..f962d57183ba1 100644
--- a/base/hashing2.jl
+++ b/base/hashing2.jl
@@ -1,6 +1,6 @@
 ## hashing BigInts, BigFloats, and Float16s ##
 
-function hash_integer(n::BigInt, h::Uint=zero(Uint))
+function hash_integer(n::BigInt, h::Uint)
     s = n.size
     s == 0 && return hash_integer(0, h)
     p = convert(Ptr{Uint}, n.d)
@@ -23,19 +23,19 @@ function decompose(x::BigFloat)
     s, int(x.exp - x.prec), int(x.sign)
 end
 
-hash(x::Float16, h::Uint=zero(Uint)) = hash(float64(x), h)
+hash(x::Float16, h::Uint) = hash(float64(x), h)
 
 ## hashing strings ##
 
-function hash{T<:ByteString}(s::Union(T,SubString{T}), h::Uint=zero(Uint))
+function hash{T<:ByteString}(s::Union(T,SubString{T}), h::Uint)
     h += 0x71e729fd56419c81
     ccall(:memhash_seed, Uint64, (Ptr{Void}, Int, Uint32), pointer(s), sizeof(s), h) + h
 end
-hash(s::String, h::Uint=zero(Uint)) = hash(bytestring(s), h)
+hash(s::String, h::Uint) = hash(bytestring(s), h)
 
 ## hashing collections ##
 
-function hash(v::Union(Tuple,AbstractArray,Associative), h::Uint=zero(Uint))
+function hash(v::Union(Tuple,AbstractArray,Associative), h::Uint)
     h += object_id(eltype(v))
     for x = v
         h = hash(x, h)
@@ -43,16 +43,16 @@ function hash(v::Union(Tuple,AbstractArray,Associative), h::Uint=zero(Uint))
     return h
 end
 
-hash(s::Set, h::Uint=zero(Uint)) = hash(sort(s.dict.keys[s.dict.slots .!= 0]), h)
+hash(s::Set, h::Uint) = hash(sort(s.dict.keys[s.dict.slots .!= 0]), h)
 
-hash(r::Range{Bool}, h::Uint=zero(Uint)) = invoke(hash, (Range, Uint), r, h)
-hash(B::BitArray, h::Uint=zero(Uint)) = hash((size(B),B.chunks), h)
-hash(a::AbstractArray{Bool}, h::Uint=zero(Uint)) = hash(bitpack(a), h)
+hash(r::Range{Bool}, h::Uint) = invoke(hash, (Range, Uint), r, h)
+hash(B::BitArray, h::Uint) = hash((size(B),B.chunks), h)
+hash(a::AbstractArray{Bool}, h::Uint) = hash(bitpack(a), h)
 
 # hashing ranges by component at worst leads to collisions for very similar ranges
-hash{T<:Range}(r::T, h::Uint=zero(Uint)) =
+hash{T<:Range}(r::T, h::Uint) =
     hash(first(r), hash(step(r), hash(last(r), h + object_id(eltype(T)))))
 
 ## hashing general objects and expressions ##
 
-hash(x::ANY,  h::Uint=zero(Uint)) = hash(object_id(x), h)
+hash(x::ANY,  h::Uint) = hash(object_id(x), h)
diff --git a/base/multi.jl b/base/multi.jl
index 243170317396f..4027ab69d2b3a 100644
--- a/base/multi.jl
+++ b/base/multi.jl
@@ -448,7 +448,7 @@ type RemoteRef
     next_id() = (id=(myid(),REQ_ID); REQ_ID+=1; id)
 end
 
-hash(r::RemoteRef, h::Uint=zero(Uint)) = hash(r.whence, hash(r.id, h))
+hash(r::RemoteRef, h::Uint) = hash(r.whence, hash(r.id, h))
 isequal(r::RemoteRef, s::RemoteRef) = (r.whence==s.whence && r.id==s.id)
 
 rr2id(r::RemoteRef) = (r.whence, r.id)
diff --git a/base/pkg/types.jl b/base/pkg/types.jl
index 2f75639ed079b..eaf172c76f915 100644
--- a/base/pkg/types.jl
+++ b/base/pkg/types.jl
@@ -44,7 +44,7 @@ function intersect(A::VersionSet, B::VersionSet)
     VersionSet(ivals)
 end
 isequal(A::VersionSet, B::VersionSet) = (A.intervals == B.intervals)
-hash(s::VersionSet, h::Uint=zero(Uint)) = hash(s.intervals, h + 0x2fd2ca6efa023f44)
+hash(s::VersionSet, h::Uint) = hash(s.intervals, h + 0x2fd2ca6efa023f44)
 deepcopy_internal(vs::VersionSet, ::ObjectIdDict) = VersionSet(copy(vs.intervals))
 
 typealias Requires Dict{ByteString,VersionSet}
diff --git a/base/profile.jl b/base/profile.jl
index 4fb6f2a9d8b92..7770a19bf7fd7 100644
--- a/base/profile.jl
+++ b/base/profile.jl
@@ -76,7 +76,7 @@ const UNKNOWN = LineInfo("?", "?", -1)
 
 isequal(a::LineInfo, b::LineInfo) = a.line == b.line && a.func == b.func && a.file == b.file
 
-function hash(li::LineInfo, h::Uint=zero(Uint))
+function hash(li::LineInfo, h::Uint)
     h += 0xf4fbda67fe20ce88
     h = hash(li.line, h)
     h = hash(li.file, h)
diff --git a/base/version.jl b/base/version.jl
index 86b55b2141c3b..490fb8c32b1fb 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -145,7 +145,7 @@ function isless(a::VersionNumber, b::VersionNumber)
     return false
 end
 
-function hash(v::VersionNumber, h::Uint=zero(Uint))
+function hash(v::VersionNumber, h::Uint)
     h += 0x8ff4ffdb75f9fede
     h = hash(v.major, h)
     h = hash(v.minor, h)
diff --git a/test/arrayops.jl b/test/arrayops.jl
index 210b35f11d57d..8deb7e42cdcb2 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -363,7 +363,7 @@ D = cat(3, B, B)
 immutable HashCollision
     x::Float64
 end
-Base.hash(::HashCollision) = uint(0)
+Base.hash(::HashCollision, h::Uint) = h
 @test map(x->x.x, unique(map(HashCollision, B), 1)) == C
 
 ## reduce ##
diff --git a/test/collections.jl b/test/collections.jl
index 8e30731e1ee93..3a4bd61275e07 100644
--- a/test/collections.jl
+++ b/test/collections.jl
@@ -80,7 +80,7 @@ type I1438T
     id
 end
 import Base.hash
-hash(x::I1438T) = x.id
+hash(x::I1438T, h::Uint) = hash(x.id, h)
 
 begin
     local seq, xs, s

From 316eeda726ce31d8d8f27f8fbe781437610b9f8d Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Thu, 24 Apr 2014 17:55:17 -0400
Subject: [PATCH 04/18] MathConst: == and hash

---
 base/constants.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/base/constants.jl b/base/constants.jl
index a0f1f522267d8..77310ba5c4e0d 100644
--- a/base/constants.jl
+++ b/base/constants.jl
@@ -13,6 +13,11 @@ convert(::Type{Float16}, x::MathConst) = float16(float32(x))
 convert{T<:Real}(::Type{Complex{T}}, x::MathConst) = convert(Complex{T}, float64(x))
 convert{T<:Integer}(::Type{Rational{T}}, x::MathConst) = convert(Rational{T}, float64(x))
 
+=={s}(::MathConst{s}, ::MathConst{s}) = true
+==(::MathConst, ::MathConst) = false
+
+hash(x::MathConst, h::Uint) = hash(object_id(x), h)
+
 -(x::MathConst) = -float64(x)
 for op in {:+, :-, :*, :/, :^}
     @eval $op(x::MathConst, y::MathConst) = $op(float64(x),float64(y))

From f7910291097d192dc8e0618424fe10653d8545c0 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Thu, 24 Apr 2014 18:37:49 -0400
Subject: [PATCH 05/18] move generic rational hashing definitions into
 hashing2.jl

---
 base/hashing.jl  | 120 ++---------------------------------------------
 base/hashing2.jl | 110 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 112 insertions(+), 118 deletions(-)

diff --git a/base/hashing.jl b/base/hashing.jl
index bfd7b9b9e77f8..0da7bbbce9ab9 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -27,130 +27,18 @@ function hash_uint(n::Uint32)
     return a
 end
 
-## efficient value-based hashing of integers ##
-
-function hash_integer(n::Integer, h::Uint)
-    h = hash_uint(uint(n & typemax(Uint)) $ h) $ h
-    n = ifelse(n < 0, oftype(n,-n), n)
-    n >>>= sizeof(Uint) << 3
-    while n != 0
-        h = hash_uint(uint(n & typemax(Uint)) $ h) $ h
-        n >>>= sizeof(Uint) << 3
-    end
-    return h
-end
-
-## hashing rational values ##
-
-#=
-`decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`.
-
-The decompose function is the point where rational-valued numeric types that support
-hashing hook into the hashing protocol. `decompose(x)` should return three integer
-values `num, pow, den`, such that the value of `x` is mathematically equal to
-
-    num*2^pow/den
-
-The decomposition need not be canonical in the sense that it just needs to be *some*
-way to express `x` in this form, not any particular way – with the restriction that
-`num` and `den` may not share any odd common factors. They may, however, have powers
-of two in common – the generic hashing code will normalize those as necessary.
-
-Special values:
-
- - `x` is zero: `num` should be zero and `den` should have the same sign as `x`
- - `x` is infinite: `den` should be zero and `num` should have the same sign as `x`
- - `x` is not a number: `num` and `den` should both be zero
-=#
-
-decompose(x::Integer) = x, 0, 1
-decompose(x::Rational) = num(x), 0, den(x)
-
-function decompose(x::Float32)
-    isnan(x) && return 0, 0, 0
-    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
-    n = reinterpret(Int32, x)
-    s = int32(n & 0x007fffff)
-    e = int32(n & 0x7f800000 >> 23)
-    s |= int32(e != 0) << 23
-    d = ifelse(signbit(n) == 1, -1, 1)
-    int(s), int(e - 150 + (e == 0)), d
-end
-
-function decompose(x::Float64)
-    isnan(x) && return 0, 0, 0
-    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
-    n = reinterpret(Int64, x)
-    s = int64(n & 0x000fffffffffffff)
-    e = int64(n & 0x7ff0000000000000 >> 52)
-    s |= int64(e != 0) << 52
-    d = ifelse(signbit(n) == 1, -1, 1)
-    int(s), int(e - 1075 + (e == 0)), d
-end
-
-# hashing methods for rational-valued types
+## hashing small, built-in numeric types ##
 
 hx(a::Uint64, b::Float64, h::Uint) = hash_uint((3a + reinterpret(Uint64,b)) - h)
 
 hash(x::Uint64,  h::Uint) = hx(x, float64(x), h)
 hash(x::Int64,   h::Uint) = hx(reinterpret(Uint64,x), float64(x), h)
-hash(x::Float64, h::Uint) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(x==x,x,NaN), h)
+hash(x::Float64, h::Uint) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(isnan(x), NaN, x), h)
 
 hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32), h::Uint) = hash(int64(x), h)
 hash(x::Float32, h::Uint) = hash(float64(x), h)
 
-function hash(x::Real, h::Uint)
-    # decompose x as num*2^pow/den
-    num, pow, den = decompose(x)::(Integer,Integer,Integer)
-
-    # handle special values
-    num == 0 && den == 0 && return hash(NaN, h)
-    if num == 0
-        den > 0 && return hash(+0.0, h)
-        den < 0 && return hash(-0.0, h)
-    end
-    if den == 0
-        num > 0 && return hash(+Inf, h)
-        num < 0 && return hash(-Inf, h)
-    end
-
-    # normalize decomposition
-    if den < 0
-        num = -num
-        den = -den
-    end
-    z = trailing_zeros(num)
-    if z != 0
-        num >>= z
-        pow += z
-    end
-    z = trailing_zeros(den)
-    if z != 0
-        den >>= z
-        pow -= z
-    end
-
-    # handle values representable as Int64, Uint64, Float64
-    if den == 1
-        left = ndigits0z(num,2) + pow
-        right = trailing_zeros(num) + pow
-        if -1074 <= right
-            if 0 <= right && left <= 64
-                left <= 63                     && return hash(int64(num) << int(pow), h)
-                signbit(num) == signbit(den)   && return hash(uint64(num) << int(pow), h)
-            end
-            left <= 1024 && left - right <= 53 && return hash(float64(num) * 2.0^pow, h)
-        end
-    end
-
-    # handle "generic" real values
-    h = hash_integer(den, h)
-    h = hash_integer(pow, h)
-    h = hash_integer(num, h)
-    return h
-end
-
-## hashing complex values ##
+## hashing complex numbers ##
 
 const h_imag = 0x32a7a07f3e7cd1f9
 const hash_0_imag = hash(0, h_imag)
@@ -166,7 +54,7 @@ end
 hash(x::Bool, h::Uint) = hash(int(x), h + 0x4cd135a1755139a5)
 hash(x::Char, h::Uint) = hash(int(x), h + 0x10f989ff0f886f11)
 
-## expression hashing ##
+## symbol & expression hashing ##
 
 hash(x::Symbol, h::Uint) = hash(object_id(x), h)
 hash(x::Expr, h::Uint) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6))
diff --git a/base/hashing2.jl b/base/hashing2.jl
index f962d57183ba1..eef5943451642 100644
--- a/base/hashing2.jl
+++ b/base/hashing2.jl
@@ -1,4 +1,15 @@
-## hashing BigInts, BigFloats, and Float16s ##
+## efficient value-based hashing of integers ##
+
+function hash_integer(n::Integer, h::Uint)
+    h = hash_uint(uint(n & typemax(Uint)) $ h) $ h
+    n = ifelse(n < 0, oftype(n,-n), n)
+    n >>>= sizeof(Uint) << 3
+    while n != 0
+        h = hash_uint(uint(n & typemax(Uint)) $ h) $ h
+        n >>>= sizeof(Uint) << 3
+    end
+    return h
+end
 
 function hash_integer(n::BigInt, h::Uint)
     s = n.size
@@ -12,6 +23,99 @@ function hash_integer(n::BigInt, h::Uint)
     return h
 end
 
+## generic hashing for rational values ##
+
+function hash(x::Real, h::Uint)
+    # decompose x as num*2^pow/den
+    num, pow, den = decompose(x)::(Integer,Integer,Integer)
+
+    # handle special values
+    num == 0 && den == 0 && return hash(NaN, h)
+    num == 0 && return hash(ifelse(den > 0, 0.0, -0.0), h)
+    den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
+
+    # normalize decomposition
+    if den < 0
+        num = -num
+        den = -den
+    end
+    z = trailing_zeros(num)
+    if z != 0
+        num >>= z
+        pow += z
+    end
+    z = trailing_zeros(den)
+    if z != 0
+        den >>= z
+        pow -= z
+    end
+
+    # handle values representable as Int64, Uint64, Float64
+    if den == 1
+        left = ndigits0z(num,2) + pow
+        right = trailing_zeros(num) + pow
+        if -1074 <= right
+            if 0 <= right && left <= 64
+                left <= 63                     && return hash(int64(num) << int(pow), h)
+                signbit(num) == signbit(den)   && return hash(uint64(num) << int(pow), h)
+            end # typemin(Int64) handled by Float64 case
+            left <= 1024 && left - right <= 53 && return hash(float64(num) * 2.0^pow, h)
+        end
+    end
+
+    # handle generic rational values
+    h = hash_integer(den, h)
+    h = hash_integer(pow, h)
+    h = hash_integer(num, h)
+    return h
+end
+
+#=
+`decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`.
+
+The decompose function is the point where rational-valued numeric types that support
+hashing hook into the hashing protocol. `decompose(x)` should return three integer
+values `num, pow, den`, such that the value of `x` is mathematically equal to
+
+    num*2^pow/den
+
+The decomposition need not be canonical in the sense that it just needs to be *some*
+way to express `x` in this form, not any particular way – with the restriction that
+`num` and `den` may not share any odd common factors. They may, however, have powers
+of two in common – the generic hashing code will normalize those as necessary.
+
+Special values:
+
+ - `x` is zero: `num` should be zero and `den` should have the same sign as `x`
+ - `x` is infinite: `den` should be zero and `num` should have the same sign as `x`
+ - `x` is not a number: `num` and `den` should both be zero
+=#
+
+decompose(x::Integer) = x, 0, 1
+decompose(x::Rational) = num(x), 0, den(x)
+
+function decompose(x::Float32)
+    isnan(x) && return 0, 0, 0
+    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
+    n = reinterpret(Int32, x)
+    s = int32(n & 0x007fffff)
+    e = int32(n & 0x7f800000 >> 23)
+    s |= int32(e != 0) << 23
+    d = ifelse(signbit(n) == 1, -1, 1)
+    int(s), int(e - 150 + (e == 0)), d
+end
+
+function decompose(x::Float64)
+    isnan(x) && return 0, 0, 0
+    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
+    n = reinterpret(Int64, x)
+    s = int64(n & 0x000fffffffffffff)
+    e = int64(n & 0x7ff0000000000000 >> 52)
+    s |= int64(e != 0) << 52
+    d = ifelse(signbit(n) == 1, -1, 1)
+    int(s), int(e - 1075 + (e == 0)), d
+end
+
 function decompose(x::BigFloat)
     isnan(x) && return big(0), 0, 0
     isinf(x) && return big(x.sign), 0, 0
@@ -23,6 +127,8 @@ function decompose(x::BigFloat)
     s, int(x.exp - x.prec), int(x.sign)
 end
 
+## hashing Float16s ##
+
 hash(x::Float16, h::Uint) = hash(float64(x), h)
 
 ## hashing strings ##
@@ -53,6 +159,6 @@ hash(a::AbstractArray{Bool}, h::Uint) = hash(bitpack(a), h)
 hash{T<:Range}(r::T, h::Uint) =
     hash(first(r), hash(step(r), hash(last(r), h + object_id(eltype(T)))))
 
-## hashing general objects and expressions ##
+## hashing general objects ##
 
 hash(x::ANY,  h::Uint) = hash(object_id(x), h)

From 36969687fb9bbd936b3adc0baa6cf0a408d2ccb7 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Fri, 25 Apr 2014 10:33:06 -0400
Subject: [PATCH 06/18] Work around LLVM's dickish undefined constant folding
 behavior.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LLVM's fptosi intrinsic is undefined for NaN, so LLVM obnoxiously and
pointlessly does different things when it gets NaN as a run-time value
than as a compile-time value. To avoid this shitty, pointless trap, we
have to avoid calling fptosi on NaN by introducing a branch into the
hashing function – even though for hashing, we don't care *what* value
is produced, just as long as it's consistent. Unfortunately, this
affects the performance of Float64 hashing pretty badly. I was not
able to figure out any way to recover this lost performance. LLVM
really needs to stop doing this.
---
 base/hashing.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/base/hashing.jl b/base/hashing.jl
index 0da7bbbce9ab9..53fd24305c45f 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -30,10 +30,11 @@ end
 ## hashing small, built-in numeric types ##
 
 hx(a::Uint64, b::Float64, h::Uint) = hash_uint((3a + reinterpret(Uint64,b)) - h)
+const hx_NaN = hx(uint(0), NaN, uint(0))
 
 hash(x::Uint64,  h::Uint) = hx(x, float64(x), h)
 hash(x::Int64,   h::Uint) = hx(reinterpret(Uint64,x), float64(x), h)
-hash(x::Float64, h::Uint) = hx(box(Uint64,fptosi(unbox(Float64,x))), ifelse(isnan(x), NaN, x), h)
+hash(x::Float64, h::Uint) = isnan(x) ? (hx_NaN $ h) : hx(box(Uint64,fptosi(unbox(Float64,x))), x, h)
 
 hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32), h::Uint) = hash(int64(x), h)
 hash(x::Float32, h::Uint) = hash(float64(x), h)

From ef79b1363eaae9729ffae59b157711f33b3802f2 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Fri, 25 Apr 2014 12:14:18 -0400
Subject: [PATCH 07/18] generic hash(Real) optimization: remove type assert on
 `decompose`.

Apparently the type assertion after the decompose call was sabotaging
the ability to inline the call to decompose for Rational{Int}.
Removing the type assert gives a 10x boost, bringing the speed of
generic hash(Rational) to withing 10x of hash(Int).
---
 base/hashing2.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/hashing2.jl b/base/hashing2.jl
index eef5943451642..119098040e30a 100644
--- a/base/hashing2.jl
+++ b/base/hashing2.jl
@@ -27,7 +27,7 @@ end
 
 function hash(x::Real, h::Uint)
     # decompose x as num*2^pow/den
-    num, pow, den = decompose(x)::(Integer,Integer,Integer)
+    num, pow, den = decompose(x)
 
     # handle special values
     num == 0 && den == 0 && return hash(NaN, h)

From 85afdbbb10f6e8c1db1be56014af778fda078ec2 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Fri, 25 Apr 2014 15:13:18 -0400
Subject: [PATCH 08/18] =?UTF-8?q?More=20streamlined=20hashing=20for=20smal?=
 =?UTF-8?q?lish=20rational=20types=20(=E2=89=A4=2064-bits).?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This actually provides less gain over the generic real hashing
function than you would think, but it is slightly faster.
---
 base/hashing2.jl | 23 +++++++++++++++++++++++
 base/int.jl      |  2 ++
 2 files changed, 25 insertions(+)

diff --git a/base/hashing2.jl b/base/hashing2.jl
index 119098040e30a..8a52a4795f31f 100644
--- a/base/hashing2.jl
+++ b/base/hashing2.jl
@@ -127,6 +127,29 @@ function decompose(x::BigFloat)
     s, int(x.exp - x.prec), int(x.sign)
 end
 
+## streamlined hashing for smallish rational types ##
+
+function hash{T<:Integer64}(x::Rational{T}, h::Uint)
+    num, den = Base.num(x), Base.den(x)
+    den == 1 && return hash(num, h)
+    den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
+    if isodd(den)
+        pow = trailing_zeros(num)
+        num >>= pow
+    else
+        pow = trailing_zeros(den)
+        den >>= pow
+        pow = -pow
+        if den == 1 && abs(num) < 9007199254740992
+            return hash(float64(num) * 2.0^pow)
+        end
+    end
+    h = hash_integer(den, h)
+    h = hash_integer(pow, h)
+    h = hash_integer(num, h)
+    return h
+end
+
 ## hashing Float16s ##
 
 hash(x::Float16, h::Uint) = hash(float64(x), h)
diff --git a/base/int.jl b/base/int.jl
index 3ddecb3fb8aeb..914af8d3070d6 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -96,6 +96,8 @@ mod(x::Unsigned, y::Signed) = rem(y+signed(rem(x,y)),y)
 # while there is a substantial performance penalty to 64-bit promotion.
 typealias Signed64 Union(Int8,Int16,Int32,Int64)
 typealias Unsigned64 Union(Uint8,Uint16,Uint32,Uint64)
+typealias Integer64 Union(Signed64,Unsigned64)
+
 div{T<:Signed64}  (x::T, y::T) = box(T,sdiv_int(unbox(T,x),unbox(T,y)))
 div{T<:Unsigned64}(x::T, y::T) = box(T,udiv_int(unbox(T,x),unbox(T,y)))
 rem{T<:Signed64}  (x::T, y::T) = box(T,srem_int(unbox(T,x),unbox(T,y)))

From f5fd830af43099a62af02ec9098fe61e23213f83 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Sun, 27 Apr 2014 12:44:38 -0400
Subject: [PATCH 09/18] signbit: return a boolean value, instead of an Int.

While messing around with generic equality checking based on the
new decode function introduced in the hashing work, I discovered
that LLVM seems to be much better able to analyze expressions that
use signbit when it's boolean and explicitly defined as `x < 0` for
integer values. Since `true == 1` and `false == 0` this is a pretty
benign change, although technically it is breaking. I've wanted to
do this for a while and this seems like as good a time as any.
---
 base/bool.jl   | 2 +-
 base/int.jl    | 8 ++------
 base/mpfr.jl   | 3 +--
 base/number.jl | 2 +-
 4 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/base/bool.jl b/base/bool.jl
index 1e84f74e5c05f..85638bf72e82f 100644
--- a/base/bool.jl
+++ b/base/bool.jl
@@ -23,7 +23,7 @@ typemax(::Type{Bool}) = true
 (|)(x::Bool, y::Bool) = box(Bool,or_int(unbox(Bool,x),unbox(Bool,y)))
 ($)(x::Bool, y::Bool) = (x!=y)
 
-signbit(x::Bool) = 0
+signbit(x::Bool) = false
 sign(x::Bool) = x
 abs(x::Bool) = x
 abs2(x::Bool) = x
diff --git a/base/int.jl b/base/int.jl
index 914af8d3070d6..7d510d4a6d618 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -53,12 +53,8 @@ inv(x::Integer) = float(one(x))/float(x)
 isodd(n::Integer) = bool(rem(n,2))
 iseven(n::Integer) = !isodd(n)
 
-signbit(x::Unsigned) = 0
-signbit(x::Int8) = int(x>>>7)
-signbit(x::Int16) = int(x>>>15)
-signbit(x::Int32) = int(x>>>31)
-signbit(x::Int64) = int(x>>>63)
-signbit(x::Int128) = int(x>>>127)
+signbit(x::Integer) = x < 0
+signbit(x::Unsigned) = false
 
 flipsign(x::Int,    y::Int)    = box(Int,flipsign_int(unbox(Int,x),unbox(Int,y)))
 flipsign(x::Int64,  y::Int64)  = box(Int64,flipsign_int(unbox(Int64,x),unbox(Int64,y)))
diff --git a/base/mpfr.jl b/base/mpfr.jl
index d4720edd98ffd..bd1f39532ba3f 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -580,8 +580,7 @@ end
 <(x::BigFloat, y::BigFloat) = ccall((:mpfr_less_p, :libmpfr), Int32, (Ptr{BigFloat}, Ptr{BigFloat}), &x, &y) != 0
 >(x::BigFloat, y::BigFloat) = ccall((:mpfr_greater_p, :libmpfr), Int32, (Ptr{BigFloat}, Ptr{BigFloat}), &x, &y) != 0
 
-signbit(x::BigFloat) =
-    int(ccall((:mpfr_signbit, :libmpfr), Int32, (Ptr{BigFloat},), &x)!=0)
+signbit(x::BigFloat) = ccall((:mpfr_signbit, :libmpfr), Int32, (Ptr{BigFloat},), &x) != 0
 
 function precision(x::BigFloat)
     return ccall((:mpfr_get_prec, :libmpfr), Clong, (Ptr{BigFloat},), &x)
diff --git a/base/number.jl b/base/number.jl
index f54230d1d272e..6dcfc24c7fc04 100644
--- a/base/number.jl
+++ b/base/number.jl
@@ -18,7 +18,7 @@ first(x::Number) = x
 last(x::Number) = x
 
 divrem(x,y) = (div(x,y),rem(x,y))
-signbit(x::Real) = int(x < 0)
+signbit(x::Real) = x < 0
 sign(x::Real) = ifelse(x < 0, oftype(x,-1), ifelse(x > 0, one(x), x))
 abs(x::Real) = ifelse(x < 0, -x, x)
 abs2(x::Real) = x*x

From 660c018f842a1f81169b0c8cdebc9e3c56ea2818 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Sun, 27 Apr 2014 13:23:03 -0400
Subject: [PATCH 10/18] isnan, isinf, isfinite: improved generic definitions.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Defining isfinite in terms of decompose is simple. It seems good to
insist that only floating-point reals can be NaN – NaN in hardware
is simply an unavoidable reality. For any user-defined type that is
not implemented in hardware, NaN should not exist since operations
that would produce NaNs should raise immediate errors instead.
---
 base/float.jl    | 15 ++++++---------
 base/rational.jl |  4 ----
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/base/float.jl b/base/float.jl
index d805574a39f3d..0c7577131d9f7 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -220,18 +220,15 @@ end
 abs(x::Float64) = box(Float64,abs_float(unbox(Float64,x)))
 abs(x::Float32) = box(Float32,abs_float(unbox(Float32,x)))
 
-isnan(x::FloatingPoint) = (x != x)
-isnan(x::Real) = isnan(float(x))
-isnan(x::Integer) = false
+isnan(x::FloatingPoint) = x != x
+isnan(x::Real) = false
 
-isinf(x::FloatingPoint) = (abs(x) == Inf)
-isinf(x::Real) = isinf(float(x))
-isinf(x::Integer) = false
-
-isfinite(x::FloatingPoint) = (x-x == 0)
-isfinite(x::Real) = isfinite(float(x))
+isfinite(x::FloatingPoint) = x - x == 0
+isfinite(x::Real) = decompose(x)[3] != 0
 isfinite(x::Integer) = true
 
+isinf(x::Real) = !isnan(x) & !isfinite(x)
+
 ## floating point traits ##
 
 const Inf16 = box(Float16,unbox(Uint16,0x7c00))
diff --git a/base/rational.jl b/base/rational.jl
index e626c4487c69c..82a96aa9cbcc7 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -101,10 +101,6 @@ signbit(x::Rational) = signbit(x.num)
 copysign(x::Rational, y::Real) = copysign(x.num,y) // x.den
 copysign(x::Rational, y::Rational) = copysign(x.num,y.num) // x.den
 
-isnan(x::Rational) = false
-isinf(x::Rational) = x.den == 0
-isfinite(x::Rational) = x.den != 0
-
 typemin{T<:Integer}(::Type{Rational{T}}) = -one(T)//zero(T)
 typemax{T<:Integer}(::Type{Rational{T}}) = one(T)//zero(T)
 

From 04908d454e1f9cbfde413f26ddc8facf1260eac0 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Mon, 28 Apr 2014 16:16:37 -0400
Subject: [PATCH 11/18] move hash(WeakRef) into base/hashing.jl also (not
 needed so early).

---
 base/base.jl    | 2 --
 base/hashing.jl | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/base/base.jl b/base/base.jl
index 44bb21bcaa504..62e5577404ad3 100644
--- a/base/base.jl
+++ b/base/base.jl
@@ -112,8 +112,6 @@ isequal(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
 isequal(w::WeakRef, v) = isequal(w.value, v)
 isequal(w, v::WeakRef) = isequal(w, v.value)
 
-hash(w::WeakRef, h::Uint) = hash(w.value, h)
-
 function finalizer(o::ANY, f::Union(Function,Ptr))
     if isimmutable(o)
         error("objects of type ", typeof(o), " cannot be finalized")
diff --git a/base/hashing.jl b/base/hashing.jl
index 53fd24305c45f..c1443016eb232 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -1,6 +1,7 @@
 ## hashing a single value ##
 
 hash(x::Any) = hash(x, zero(Uint))
+hash(w::WeakRef, h::Uint) = hash(w.value, h)
 
 ## core data hashing functions ##
 

From 1c8b7d12ab27f968c36d743bfc5a5abea5eeab1c Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Tue, 29 Apr 2014 20:12:10 -0400
Subject: [PATCH 12/18] hashing: make new hashing work on 32-bit systems.

---
 base/hashing.jl   | 35 +++++++++++++++++++++++++++--------
 base/hashing2.jl  | 14 ++++++++++----
 base/pkg/types.jl |  2 +-
 base/profile.jl   |  2 +-
 base/version.jl   |  2 +-
 5 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/base/hashing.jl b/base/hashing.jl
index c1443016eb232..44e7971b533bb 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -5,7 +5,7 @@ hash(w::WeakRef, h::Uint) = hash(w.value, h)
 
 ## core data hashing functions ##
 
-function hash_uint(n::Uint64)
+function hash_64_64(n::Uint64)
     local a::Uint64 = n
     a = ~a + a << 21
     a =  a $ a >> 24
@@ -17,7 +17,18 @@ function hash_uint(n::Uint64)
     return a
 end
 
-function hash_uint(n::Uint32)
+function hash_64_32(n::Uint64)
+    local a::Uint64 = n
+    a = ~a + a << 18
+    a =  a $ a >> 31
+    a =  a * 21
+    a =  a $ a >> 11
+    a =  a + a << 6
+    a =  a $ a >> 22
+    return uint32(a)
+end
+
+function hash_32_32(n::Uint32)
     local a::Uint32 = n
     a = a + 0x7ed55d16 + a << 12
     a = a $ 0xc761c23c $ a >> 19
@@ -28,10 +39,18 @@ function hash_uint(n::Uint32)
     return a
 end
 
+if Uint == Uint64
+    hash_uint64(x::Uint64) = hash_64_64(x)
+    hash_uint(x::Uint)     = hash_64_64(x)
+else
+    hash_uint64(x::Uint64) = hash_64_32(x)
+    hash_uint(x::Uint)     = hash_32_32(x)
+end
+
 ## hashing small, built-in numeric types ##
 
-hx(a::Uint64, b::Float64, h::Uint) = hash_uint((3a + reinterpret(Uint64,b)) - h)
-const hx_NaN = hx(uint(0), NaN, uint(0))
+hx(a::Uint64, b::Float64, h::Uint) = hash_uint64((3a + reinterpret(Uint64,b)) - h)
+const hx_NaN = hx(uint64(0), NaN, uint(0  ))
 
 hash(x::Uint64,  h::Uint) = hx(x, float64(x), h)
 hash(x::Int64,   h::Uint) = hx(reinterpret(Uint64,x), float64(x), h)
@@ -42,7 +61,7 @@ hash(x::Float32, h::Uint) = hash(float64(x), h)
 
 ## hashing complex numbers ##
 
-const h_imag = 0x32a7a07f3e7cd1f9
+const h_imag = uint(0x32a7a07f3e7cd1f9)
 const hash_0_imag = hash(0, h_imag)
 
 function hash(z::Complex, h::Uint)
@@ -53,10 +72,10 @@ end
 
 ## special hashing for booleans and characters ##
 
-hash(x::Bool, h::Uint) = hash(int(x), h + 0x4cd135a1755139a5)
-hash(x::Char, h::Uint) = hash(int(x), h + 0x10f989ff0f886f11)
+hash(x::Bool, h::Uint) = hash(int(x), h + uint(0x4cd135a1755139a5))
+hash(x::Char, h::Uint) = hash(int(x), h + uint(0x10f989ff0f886f11))
 
 ## symbol & expression hashing ##
 
 hash(x::Symbol, h::Uint) = hash(object_id(x), h)
-hash(x::Expr, h::Uint) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6))
+hash(x::Expr, h::Uint) = hash(x.args, hash(x.head, h + uint(0x83c7900696d26dc6)))
diff --git a/base/hashing2.jl b/base/hashing2.jl
index 8a52a4795f31f..fececd3784446 100644
--- a/base/hashing2.jl
+++ b/base/hashing2.jl
@@ -156,9 +156,11 @@ hash(x::Float16, h::Uint) = hash(float64(x), h)
 
 ## hashing strings ##
 
+const memhash = Uint == Uint64 ? :memhash_seed : :memhash32_seed
+
 function hash{T<:ByteString}(s::Union(T,SubString{T}), h::Uint)
-    h += 0x71e729fd56419c81
-    ccall(:memhash_seed, Uint64, (Ptr{Void}, Int, Uint32), pointer(s), sizeof(s), h) + h
+    h += uint(0x71e729fd56419c81)
+    ccall(memhash, Uint, (Ptr{Uint8}, Csize_t, Uint32), pointer(s), sizeof(s), h) + h
 end
 hash(s::String, h::Uint) = hash(bytestring(s), h)
 
@@ -179,8 +181,12 @@ hash(B::BitArray, h::Uint) = hash((size(B),B.chunks), h)
 hash(a::AbstractArray{Bool}, h::Uint) = hash(bitpack(a), h)
 
 # hashing ranges by component at worst leads to collisions for very similar ranges
-hash{T<:Range}(r::T, h::Uint) =
-    hash(first(r), hash(step(r), hash(last(r), h + object_id(eltype(T)))))
+function hash{T<:Range}(r::T, h::Uint)
+    h += uint(0x80707b6821b70087)
+    h = hash(first(r), h)
+    h = hash(step(r), h)
+    h = hash(last(r), h)
+end
 
 ## hashing general objects ##
 
diff --git a/base/pkg/types.jl b/base/pkg/types.jl
index eaf172c76f915..78a6bf8efdeb6 100644
--- a/base/pkg/types.jl
+++ b/base/pkg/types.jl
@@ -44,7 +44,7 @@ function intersect(A::VersionSet, B::VersionSet)
     VersionSet(ivals)
 end
 isequal(A::VersionSet, B::VersionSet) = (A.intervals == B.intervals)
-hash(s::VersionSet, h::Uint) = hash(s.intervals, h + 0x2fd2ca6efa023f44)
+hash(s::VersionSet, h::Uint) = hash(s.intervals, h + uint(0x2fd2ca6efa023f44))
 deepcopy_internal(vs::VersionSet, ::ObjectIdDict) = VersionSet(copy(vs.intervals))
 
 typealias Requires Dict{ByteString,VersionSet}
diff --git a/base/profile.jl b/base/profile.jl
index 7770a19bf7fd7..e47c09b5490f1 100644
--- a/base/profile.jl
+++ b/base/profile.jl
@@ -77,7 +77,7 @@ const UNKNOWN = LineInfo("?", "?", -1)
 isequal(a::LineInfo, b::LineInfo) = a.line == b.line && a.func == b.func && a.file == b.file
 
 function hash(li::LineInfo, h::Uint)
-    h += 0xf4fbda67fe20ce88
+    h += uint(0xf4fbda67fe20ce88)
     h = hash(li.line, h)
     h = hash(li.file, h)
     h = hash(li.func, h)
diff --git a/base/version.jl b/base/version.jl
index 490fb8c32b1fb..cb34c9cbafa5c 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -146,7 +146,7 @@ function isless(a::VersionNumber, b::VersionNumber)
 end
 
 function hash(v::VersionNumber, h::Uint)
-    h += 0x8ff4ffdb75f9fede
+    h += uint(0x8ff4ffdb75f9fede)
     h = hash(v.major, h)
     h = hash(v.minor, h)
     h = hash(v.patch, h)

From 737ad6eba2f94a8542e9013fa84b6f4e3f5d827a Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Mon, 28 Apr 2014 16:22:54 -0400
Subject: [PATCH 13/18] isequal, isless: bring comparison and sorting in line
 with new hashing.

---
 base/base.jl                      |  6 ++---
 base/bitarray.jl                  |  2 --
 base/comparison.jl                |  0
 base/expr.jl                      |  4 +--
 base/float.jl                     |  8 ------
 base/gmp.jl                       |  1 -
 base/intset.jl                    |  2 +-
 base/operators.jl                 | 44 +++++++++++++++++++------------
 base/pkg/reqs.jl                  |  2 +-
 base/pkg/resolve/fieldvalue.jl    |  4 +--
 base/pkg/resolve/versionweight.jl | 16 +++++------
 base/pkg/types.jl                 | 10 +++----
 base/range.jl                     | 12 +--------
 base/set.jl                       |  4 +--
 base/string.jl                    | 19 +++++++------
 base/sysimg.jl                    |  2 +-
 base/version.jl                   |  4 +--
 test/numbers.jl                   |  8 +++---
 test/ranges.jl                    |  6 ++---
 19 files changed, 70 insertions(+), 84 deletions(-)
 create mode 100644 base/comparison.jl

diff --git a/base/base.jl b/base/base.jl
index 62e5577404ad3..7b6d58fbb94c4 100644
--- a/base/base.jl
+++ b/base/base.jl
@@ -108,9 +108,9 @@ type Colon
 end
 const (:) = Colon()
 
-isequal(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
-isequal(w::WeakRef, v) = isequal(w.value, v)
-isequal(w, v::WeakRef) = isequal(w, v.value)
+==(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
+==(w::WeakRef, v) = isequal(w.value, v)
+==(w, v::WeakRef) = isequal(w, v.value)
 
 function finalizer(o::ANY, f::Union(Function,Ptr))
     if isimmutable(o)
diff --git a/base/bitarray.jl b/base/bitarray.jl
index 9f92e665ff195..d53b7e9d15938 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -1826,5 +1826,3 @@ function cat(catdim::Integer, X::Union(BitArray, Integer)...)
 end
 
 # hvcat -> use fallbacks in abstractarray.jl
-
-isequal(A::BitArray, B::BitArray) = (A == B)
diff --git a/base/comparison.jl b/base/comparison.jl
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/base/expr.jl b/base/expr.jl
index b9f09abd54425..b4d759fa9d3fc 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -39,8 +39,8 @@ astcopy(x::Union(SymbolNode,GetfieldNode,Expr)) = copy(x)
 astcopy(x::Array{Any,1}) = map(astcopy, x)
 astcopy(x) = x
 
-isequal(x::Expr, y::Expr) = (is(x.head,y.head) && isequal(x.args,y.args))
-isequal(x::QuoteNode, y::QuoteNode) = isequal(x.value, y.value)
+==(x::Expr, y::Expr) = x.head === y.head && x.args == y.args
+==(x::QuoteNode, y::QuoteNode) = x.value == y.value
 
 function show(io::IO, tv::TypeVar)
     if !is(tv.lb, None)
diff --git a/base/float.jl b/base/float.jl
index 0c7577131d9f7..44b66df09f3cf 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -150,19 +150,11 @@ mod{T<:FloatingPoint}(x::T, y::T) = rem(y+rem(x,y),y)
 <=(x::Float32, y::Float32) = le_float(unbox(Float32,x),unbox(Float32,y))
 <=(x::Float64, y::Float64) = le_float(unbox(Float64,x),unbox(Float64,y))
 
-isequal{T<:FloatingPoint}(x::T, y::T) =
-    ((x==y) & (signbit(x)==signbit(y))) | (isnan(x)&isnan(y))
-
 isequal(x::Float32, y::Float32) = fpiseq(unbox(Float32,x),unbox(Float32,y))
 isequal(x::Float64, y::Float64) = fpiseq(unbox(Float64,x),unbox(Float64,y))
 isless (x::Float32, y::Float32) = fpislt(unbox(Float32,x),unbox(Float32,y))
 isless (x::Float64, y::Float64) = fpislt(unbox(Float64,x),unbox(Float64,y))
 
-isless(a::FloatingPoint, b::FloatingPoint) =
-    (a<b) | (!isnan(a) & (isnan(b) | (signbit(a)>signbit(b))))
-isless(a::Real, b::FloatingPoint) = (a<b) | isless(float(a),b)
-isless(a::FloatingPoint, b::Real) = (a<b) | isless(a,float(b))
-
 function cmp(x::FloatingPoint, y::FloatingPoint)
     (isnan(x) || isnan(y)) && throw(DomainError())
     ifelse(x<y, -1, ifelse(x>y, 1, 0))
diff --git a/base/gmp.jl b/base/gmp.jl
index 5f8733c63abd0..af2ea296a06c7 100644
--- a/base/gmp.jl
+++ b/base/gmp.jl
@@ -407,7 +407,6 @@ end
 binomial(n::BigInt, k::Integer) = k < 0 ? throw(DomainError()) : binomial(n, uint(k))
 
 ==(x::BigInt, y::BigInt) = cmp(x,y) == 0
-isequal(x::BigInt, y::BigInt) = cmp(x,y) == 0
 <=(x::BigInt, y::BigInt) = cmp(x,y) <= 0
 >=(x::BigInt, y::BigInt) = cmp(x,y) >= 0
 <(x::BigInt, y::BigInt) = cmp(x,y) < 0
diff --git a/base/intset.jl b/base/intset.jl
index 383be0f6ec88e..d383925026785 100644
--- a/base/intset.jl
+++ b/base/intset.jl
@@ -258,7 +258,7 @@ function symdiff!(s::IntSet, s2::IntSet)
     s
 end
 
-function isequal(s1::IntSet, s2::IntSet)
+function ==(s1::IntSet, s2::IntSet)
     if s1.fill1s != s2.fill1s
         return false
     end
diff --git a/base/operators.jl b/base/operators.jl
index dfe1243b5158b..5fdcbd03bb833 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -4,27 +4,37 @@ const (<:) = issubtype
 
 super(T::DataType) = T.super
 
+## generic comparison ##
+
+==(x,y) = x === y
+
+isequal(x, y) = x == y
+isequal(x::FloatingPoint, y::FloatingPoint) = (isnan(x) & isnan(y)) | (signbit(x) == signbit(y)) & (x == y)
+isequal(x::Real,          y::FloatingPoint) = (isnan(x) & isnan(y)) | (signbit(x) == signbit(y)) & (x == y)
+isequal(x::FloatingPoint, y::Real         ) = (isnan(x) & isnan(y)) | (signbit(x) == signbit(y)) & (x == y)
+
+isless(x::Any, y::Any) = x < y
+isless(x::FloatingPoint, y::FloatingPoint) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y)
+isless(x::Real,          y::FloatingPoint) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y)
+isless(x::FloatingPoint, y::Real         ) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y)
+
 # avoid ambiguity with isequal(::Tuple, ::Tuple)
-isequal(T::(Type...), S::(Type...)) = typeseq(T, S)
-isequal(T::Type, S::Type) = typeseq(T, S)
+==(T::(Type...), S::(Type...)) = typeseq(T, S)
+==(T::Type, S::Type) = typeseq(T, S)
 
-## comparison ##
+## comparison fallbacks ##
 
-isequal(x,y) = is(x,y)
-==(x,y) = isequal(x,y)
 !=(x,y) = !(x==y)
 !==(x,y) = !is(x,y)
 
-< (x,y) = isless(x,y)
-> (x,y) = y < x
+>(x,y) = y < x
 <=(x,y) = !(y < x)
 >=(x,y) = (y <= x)
-.> (x,y) = y.<x
-.>=(x,y) = y.<=x
+.>(x,y) = y .< x
+.>=(x,y) = y .<= x
 
 # this definition allows Number types to implement < instead of isless,
 # which is more idiomatic:
-isless(x::Real, y::Real) = x<y
 lexcmp(x::Real, y::Real) = isless(x,y) ? -1 : ifelse(isless(y,x), 1, 0)
 
 ifelse(c::Bool, x, y) = Intrinsics.select_value(c, x, y)
@@ -80,18 +90,18 @@ end
 .+(x,y) = x+y
 .-(x,y) = x-y
 
-.==(x::Number,y::Number) = x==y
-.!=(x::Number,y::Number) = x!=y
-.< (x::Real,y::Real) = x<y
-.<=(x::Real,y::Real) = x<=y
+.==(x::Number,y::Number) = x == y
+.!=(x::Number,y::Number) = x != y
+.< (x::Real,y::Real) = x < y
+.<=(x::Real,y::Real) = x <= y
 
 # core << >> and >>> takes Int32 as second arg
-<<(x,y::Integer)  = x << convert(Int32,y)
 <<(x,y::Int32)    = no_op_err("<<", typeof(x))
->>(x,y::Integer)  = x >> convert(Int32,y)
 >>(x,y::Int32)    = no_op_err(">>", typeof(x))
->>>(x,y::Integer) = x >>> convert(Int32,y)
 >>>(x,y::Int32)   = no_op_err(">>>", typeof(x))
+<<(x,y::Integer)  = x << convert(Int32,y)
+>>(x,y::Integer)  = x >> convert(Int32,y)
+>>>(x,y::Integer) = x >>> convert(Int32,y)
 
 # fallback div and fld implementations
 # NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,
diff --git a/base/pkg/reqs.jl b/base/pkg/reqs.jl
index bf7fe90202e15..b8b58bfecded9 100644
--- a/base/pkg/reqs.jl
+++ b/base/pkg/reqs.jl
@@ -46,7 +46,7 @@ immutable Requirement <: Line
 end
 
 # TODO: shouldn't be neccessary #4648
-Base.isequal(a::Line, b::Line) = (a.content == b.content)
+==(a::Line, b::Line) = a.content == b.content
 
 # general machinery for parsing REQUIRE files
 
diff --git a/base/pkg/resolve/fieldvalue.jl b/base/pkg/resolve/fieldvalue.jl
index d450121bfee99..233d7750ccf89 100644
--- a/base/pkg/resolve/fieldvalue.jl
+++ b/base/pkg/resolve/fieldvalue.jl
@@ -40,7 +40,7 @@ Base.typemin(::Type{FieldValue}) = (x=typemin(Int); y=typemin(VersionWeight); Fi
 Base.(:-)(a::FieldValue, b::FieldValue) = FieldValue(a.l0-b.l0, a.l1-b.l1, a.l2-b.l2, a.l3-b.l3, a.l4-b.l4)
 Base.(:+)(a::FieldValue, b::FieldValue) = FieldValue(a.l0+b.l0, a.l1+b.l1, a.l2+b.l2, a.l3+b.l3, a.l4+b.l4)
 
-function Base.isless(a::FieldValue, b::FieldValue)
+function <(a::FieldValue, b::FieldValue)
     a.l0 < b.l0 && return true
     a.l0 > b.l0 && return false
     c = cmp(a.l1, b.l1)
@@ -55,7 +55,7 @@ function Base.isless(a::FieldValue, b::FieldValue)
     return false
 end
 
-Base.isequal(a::FieldValue, b::FieldValue) =
+==(a::FieldValue, b::FieldValue) =
     a.l0 == b.l0 && a.l1 == b.l1 && a.l2 == b.l2 && a.l3 == b.l3 && a.l4 == b.l4
 
 Base.abs(a::FieldValue) = FieldValue(abs(a.l0), abs(a.l1), abs(a.l2), abs(a.l3), abs(a.l4))
diff --git a/base/pkg/resolve/versionweight.jl b/base/pkg/resolve/versionweight.jl
index 05c9803fbb473..e3c189c65a570 100644
--- a/base/pkg/resolve/versionweight.jl
+++ b/base/pkg/resolve/versionweight.jl
@@ -60,8 +60,8 @@ function Base.cmp{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T})
     end
     return cmp(a.rest, b.rest)
 end
-Base.isless{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = (cmp(a, b) == -1)
-Base.isequal{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = (a.v == b.v) && (a.rest == b.rest)
+<{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = cmp(a,b) < 0
+=={T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = a.v == b.v && a.rest == b.rest
 
 Base.abs{T}(a::HierarchicalValue{T}) = HierarchicalValue(T[abs(x) for x in a.v], abs(a.rest))
 
@@ -88,8 +88,8 @@ function Base.cmp(a::VWPreBuildItem, b::VWPreBuildItem)
     c = cmp(a.s, b.s); c != 0 && return c
     return cmp(a.i, b.i)
 end
-Base.isless(a::VWPreBuildItem, b::VWPreBuildItem) = (cmp(a,b) == -1)
-Base.isequal(a::VWPreBuildItem, b::VWPreBuildItem) = (cmp(a,b) == 0)
+<(a::VWPreBuildItem, b::VWPreBuildItem) = cmp(a,b) < 0
+==(a::VWPreBuildItem, b::VWPreBuildItem) = cmp(a,b) == 0
 
 Base.abs(a::VWPreBuildItem) = VWPreBuildItem(abs(a.nonempty), abs(a.s), abs(a.i))
 
@@ -125,8 +125,8 @@ function Base.cmp(a::VWPreBuild, b::VWPreBuild)
     c = cmp(a.nonempty, b.nonempty); c != 0 && return c
     return cmp(a.w, b.w)
 end
-Base.isless(a::VWPreBuild, b::VWPreBuild) = (cmp(a,b) == -1)
-Base.isequal(a::VWPreBuild, b::VWPreBuild) = (cmp(a,b) == 0)
+<(a::VWPreBuild, b::VWPreBuild) = cmp(a,b) < 0
+==(a::VWPreBuild, b::VWPreBuild) = cmp(a,b) == 0
 
 Base.abs(a::VWPreBuild) = VWPreBuild(abs(a.nonempty), abs(a.w))
 
@@ -179,8 +179,8 @@ function Base.cmp(a::VersionWeight, b::VersionWeight)
     c = cmp(a.build, b.build); c != 0 && return c
     return cmp(a.uninstall, b.uninstall)
 end
-Base.isless(a::VersionWeight, b::VersionWeight) = (cmp(a, b) == -1)
-Base.isequal(a::VersionWeight, b::VersionWeight) = (cmp(a, b) == 0)
+<(a::VersionWeight, b::VersionWeight) = cmp(a,b) < 0
+==(a::VersionWeight, b::VersionWeight) = cmp(a,b) == 0
 
 Base.abs(a::VersionWeight) =
     VersionWeight(abs(a.major), abs(a.minor), abs(a.patch),
diff --git a/base/pkg/types.jl b/base/pkg/types.jl
index 78a6bf8efdeb6..fb8b63513527e 100644
--- a/base/pkg/types.jl
+++ b/base/pkg/types.jl
@@ -1,7 +1,7 @@
 module Types
 
 export VersionInterval, VersionSet, Requires, Available, Fixed, merge_requires!, satisfies
-import Base: show, isempty, in, intersect, isequal, hash, deepcopy_internal
+import Base: show, isempty, in, intersect, hash, deepcopy_internal
 
 immutable VersionInterval
     lower::VersionNumber
@@ -14,7 +14,7 @@ show(io::IO, i::VersionInterval) = print(io, "[$(i.lower),$(i.upper))")
 isempty(i::VersionInterval) = i.upper <= i.lower
 in(v::VersionNumber, i::VersionInterval) = i.lower <= v < i.upper
 intersect(a::VersionInterval, b::VersionInterval) = VersionInterval(max(a.lower,b.lower), min(a.upper,b.upper))
-isequal(a::VersionInterval, b::VersionInterval) = (a.lower == b.lower) & (a.upper == b.upper)
+==(a::VersionInterval, b::VersionInterval) = a.lower == b.lower && a.upper == b.upper
 
 immutable VersionSet
     intervals::Vector{VersionInterval}
@@ -43,7 +43,7 @@ function intersect(A::VersionSet, B::VersionSet)
     sort!(ivals, by=i->i.lower)
     VersionSet(ivals)
 end
-isequal(A::VersionSet, B::VersionSet) = (A.intervals == B.intervals)
+==(A::VersionSet, B::VersionSet) = A.intervals == B.intervals
 hash(s::VersionSet, h::Uint) = hash(s.intervals, h + uint(0x2fd2ca6efa023f44))
 deepcopy_internal(vs::VersionSet, ::ObjectIdDict) = VersionSet(copy(vs.intervals))
 
@@ -64,7 +64,7 @@ immutable Available
     requires::Requires
 end
 
-isequal(a::Available, b::Available) = (a.sha1 == b.sha1 && a.requires == b.requires)
+==(a::Available, b::Available) = a.sha1 == b.sha1 && a.requires == b.requires
 
 show(io::IO, a::Available) = isempty(a.requires) ?
     print(io, "Available(", repr(a.sha1), ")") :
@@ -76,7 +76,7 @@ immutable Fixed
 end
 Fixed(v::VersionNumber) = Fixed(v,Requires())
 
-isequal(a::Fixed, b::Fixed) = (a.version == b.version && a.requires == b.requires)
+==(a::Fixed, b::Fixed) = a.version == b.version && a.requires == b.requires
 
 show(io::IO, f::Fixed) = isempty(f.requires) ?
     print(io, "Fixed(", repr(f.version), ")") :
diff --git a/base/range.jl b/base/range.jl
index 910637c880505..f414db7270c51 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -273,15 +273,7 @@ function show(io::IO, r::Range)
 end
 show(io::IO, r::UnitRange) = print(io, repr(first(r)), ':', repr(last(r)))
 
-isequal{T<:Range}(r::T, s::T) =
-    (first(r)==first(s)) & (step(r)==step(s)) & (last(r)==last(s))
-
-isequal(r::Range, s::Range) = false
-
-=={T<:Range}(r::T, s::T) = isequal(r, s)
-
-=={T<:Integer, S<:Integer}(r::Range{T}, s::Range{S}) =
-    (first(r)==first(s)) & (step(r)==step(s)) & (last(r)==last(s))
+=={T<:Range}(r::T, s::T) = (first(r) == first(s)) & (step(r) == step(s)) & (last(r) == last(s))
 
 function ==(r::Range, s::Range)
     lr = length(r)
@@ -299,8 +291,6 @@ function ==(r::Range, s::Range)
     return true
 end
 
-# TODO: isless?
-
 intersect{T1<:Integer, T2<:Integer}(r::UnitRange{T1}, s::UnitRange{T2}) = max(r.start,s.start):min(last(r),last(s))
 
 intersect{T<:Integer}(i::Integer, r::UnitRange{T}) =
diff --git a/base/set.jl b/base/set.jl
index a83cf3a1e98a6..9ea5c0545a734 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -73,8 +73,8 @@ function setdiff(a::Set, b::Set)
     d
 end
 
-isequal(l::Set, r::Set) = (length(l) == length(r)) && (l <= r)
-<(l::Set, r::Set) = (length(l) < length(r)) && (l <= r)
+==(l::Set, r::Set) = (length(l) == length(r)) && (l <= r)
+< (l::Set, r::Set) = (length(l) < length(r)) && (l <= r)
 <=(l::Set, r::Set) = issubset(l, r)
 
 function issubset(l, r)
diff --git a/base/string.jl b/base/string.jl
index c7be7e7dea612..847b82de0caf2 100644
--- a/base/string.jl
+++ b/base/string.jl
@@ -482,8 +482,8 @@ function cmp(a::String, b::String)
     !done(a,i) && done(b,j) ? +1 : 0
 end
 
-isequal(a::String, b::String) = cmp(a,b) == 0
-isless(a::String, b::String)  = cmp(a,b) <  0
+==(a::String, b::String) = cmp(a,b) == 0
+< (a::String, b::String) = cmp(a,b) <  0
 
 # begins with and ends with predicates
 
@@ -515,19 +515,18 @@ function endswith(a::String, b::String)
 end
 endswith(a::String, c::Char) = !isempty(a) && a[end] == c
 
-# faster comparisons for byte strings
+# faster comparisons for byte strings and symbols
 
-cmp(a::ByteString, b::ByteString)     = lexcmp(a.data, b.data)
-isequal(a::ByteString, b::ByteString) = endof(a)==endof(b) && cmp(a,b)==0
-beginswith(a::ByteString, b::ByteString) = beginswith(a.data, b.data)
+cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data)
+cmp(a::Symbol, b::Symbol) = int(sign(ccall(:strcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}), a, b)))
+
+==(a::ByteString, b::ByteString) = endof(a) == endof(b) && cmp(a,b) == 0
+<(a::Symbol, b::Symbol) = cmp(a,b) < 0
 
+beginswith(a::ByteString, b::ByteString) = beginswith(a.data, b.data)
 beginswith(a::Array{Uint8,1}, b::Array{Uint8,1}) =
     (length(a) >= length(b) && ccall(:strncmp, Int32, (Ptr{Uint8}, Ptr{Uint8}, Uint), a, b, length(b)) == 0)
 
-cmp(a::Symbol, b::Symbol) =
-    int(sign(ccall(:strcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}), a, b)))
-isless(a::Symbol, b::Symbol) = cmp(a,b)<0
-
 # TODO: fast endswith
 
 ## character column width function ##
diff --git a/base/sysimg.jl b/base/sysimg.jl
index add8cffd3ea0d..c89ff144fdb59 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -175,7 +175,7 @@ big(q::Rational) = big(num(q))//big(den(q))
 big(z::Complex) = complex(big(real(z)),big(imag(z)))
 @vectorize_1arg Number big
 
-# moer hashing definitions
+# more hashing definitions
 include("hashing2.jl")
 
 # random number generation and statistics
diff --git a/base/version.jl b/base/version.jl
index cb34c9cbafa5c..05966dbd7af9b 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -115,7 +115,7 @@ function ident_cmp(A::(Union(Int,ASCIIString)...),
     !done(A,i) && done(B,j) ? +1 : 0
 end
 
-function isequal(a::VersionNumber, b::VersionNumber)
+function ==(a::VersionNumber, b::VersionNumber)
     (a.major != b.major) && return false
     (a.minor != b.minor) && return false
     (a.patch != b.patch) && return false
@@ -126,7 +126,7 @@ end
 
 issupbuild(v::VersionNumber) = length(v.build)==1 && isempty(v.build[1])
 
-function isless(a::VersionNumber, b::VersionNumber)
+function <(a::VersionNumber, b::VersionNumber)
     (a.major < b.major) && return true
     (a.major > b.major) && return false
     (a.minor < b.minor) && return true
diff --git a/test/numbers.jl b/test/numbers.jl
index d2fd2fc479b23..2afa54cb0fa37 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -382,8 +382,8 @@ end
 @test !isequal(+1.0,-1.0)
 @test !isequal(+Inf,-Inf)
 
-@test !isequal(-0.0f0,-0.0)
-@test !isequal( 0.0f0, 0.0)
+@test  isequal(-0.0f0,-0.0)
+@test  isequal( 0.0f0, 0.0)
 @test !isequal(-0.0f0, 0.0)
 @test !isequal(0.0f0 ,-0.0)
 
@@ -459,8 +459,8 @@ end
 @test !isless(+NaN,-NaN)
 @test !isless(+NaN,+NaN)
 
-@test !isequal(   0, 0.0)
-@test !isequal( 0.0,   0)
+@test  isequal(   0, 0.0)
+@test  isequal( 0.0,   0)
 @test !isequal(   0,-0.0)
 @test !isequal(-0.0,   0)
 @test   isless(-0.0,   0)
diff --git a/test/ranges.jl b/test/ranges.jl
index a544cbba8ece5..469c67a2d687e 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -245,12 +245,10 @@ let
     for r in Rs
         ar = collect(r)
         @test r != ar
-        @test !isequal(r, ar)
+        @test !isequal(r,ar)
         for s in Rs
             as = collect(s)
-
-            @test !isequal(r, s) || hash(r)==hash(s)
-
+            @test !isequal(r,s) || hash(r)==hash(s)
             @test (r==s) == (ar==as)
         end
     end

From f11b2e8bb70ceffffb0a9b7d88da6723b32eed87 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Wed, 30 Apr 2014 19:40:31 -0400
Subject: [PATCH 14/18] `d[k] = v`: replace key when new values are assigned in
 dicts.

Setup:

	julia> d = Dict()
	Dict{Any,Any}()

	julia> d[1] = "foo"
	"foo"

	julia> d
	{1=>"foo"}

	julia> d[1.0] = "bar"
	"bar"

Before:

	julia> d
	{1=>"bar"}

After:

	julia> d
	{1.0=>"bar"}

The new behavior seems far less surprising to me.
---
 base/dict.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/base/dict.jl b/base/dict.jl
index 8527690430037..bf0f5f32acd24 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -425,6 +425,7 @@ function setindex!{K,V}(h::Dict{K,V}, v0, key0)
     index = ht_keyindex2(h, key)
 
     if index > 0
+        h.keys[index] = key
         h.vals[index] = v
     else
         _setindex!(h, v, key, -index)

From f9cb1e3d001dbed0fc2d4b66a86b21a9e7e2acbc Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Thu, 1 May 2014 23:10:04 -0400
Subject: [PATCH 15/18] workaround for the Type{()} error

---
 src/gf.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/gf.c b/src/gf.c
index 6d3a168122c37..0ac5ab8c2f133 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -467,6 +467,14 @@ static jl_function_t *cache_method(jl_methtable_t *mt, jl_tuple_t *type,
         jl_value_t *elt = jl_tupleref(type,i);
         jl_value_t *decl_i = nth_slot_type(decl,i);
         if (jl_is_type_type(elt) && jl_is_tuple(jl_tparam0(elt)) &&
+            /*
+              NOTE: without this, () is sometimes specialized as () and
+              sometimes as Type{()}. In #6624, this caused a
+                TypeError(func=:tuplelen, context="", expected=(Any...,), got=Type{()}())
+              inside ==, inside isstructtype. Not quite clear why, however.
+            */
+            jl_tparam0(elt) != (jl_value_t*)jl_null &&
+
             !jl_is_type_type(decl_i)) {
             jl_methlist_t *curr = mt->defs;
             int ok=1;

From b5e0b73b6e0e1c81bb0895ffca3bd516394836b2 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Fri, 2 May 2014 13:29:02 -0400
Subject: [PATCH 16/18] update docs for == and isequal

---
 doc/stdlib/base.rst | 45 ++++++++++++++++++++-------------------------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/doc/stdlib/base.rst b/doc/stdlib/base.rst
index 7a1aec99e3d81..c8e32dd51472d 100644
--- a/doc/stdlib/base.rst
+++ b/doc/stdlib/base.rst
@@ -139,34 +139,19 @@ All Objects
 
 .. function:: isequal(x, y)
 
-   True if and only if ``x`` and ``y`` would cause a typical function to behave the
-   same. A "typical" function is one that uses only intended interfaces, and does not
-   unreasonably exploit implementation details of its arguments. For example,
-   floating-point ``NaN`` values are ``isequal`` regardless of their sign bits, since
-   the sign of a ``NaN`` has no meaning in the vast majority of cases (but can be
-   discovered if you really want to).
-
-   One implication of this definition is that implementing ``isequal`` for a new type
-   encapsulates, to a large extent, what the true abstraction presented by that type
-   is. For example, a ``String`` is a sequence of characters, so two strings are
-   ``isequal`` if they generate the same characters. Other concerns, such as encoding,
-   are not considered.
-
-   When calling ``isequal``, be aware that it cannot be all things to all people.
-   For example, if your use of strings *does* care about encoding, you will have to
-   perform a check like ``typeof(x)==typeof(y) && isequal(x,y)``.
-
-   ``isequal`` is the default comparison function used by hash tables (``Dict``).
-   ``isequal(x,y)`` must imply ``hash(x)==hash(y)``.
+   Similar to ``==``, except treats all floating-point ``NaN`` values as equal, and
+   treats ``-0.0`` as unequal to ``0.0``. Falls back to ``==``.
 
-   New types with a notion of equality should implement this function, except for
-   numbers, which should implement ``==`` instead. However, numeric types with special
-   values like ``NaN`` might need to implement ``isequal`` as well. Numbers of different
-   types are considered unequal.
+   ``isequal`` is the comparison function used by hash tables (``Dict``).
+   ``isequal(x,y)`` must imply ``hash(x)==hash(y)``.
 
    Mutable containers should generally implement ``isequal`` by calling ``isequal``
    recursively on all contents.
 
+   Other new types generally do not need to implement this function, unless they
+   represent floating-point numbers amenable to a more efficient implementation
+   than that provided by a generic fallback (based on ``isnan``, ``signbit``, and ``==``).
+
 .. function:: isless(x, y)
 
    Test whether ``x`` is less than ``y``, according to a canonical total order.
@@ -2288,8 +2273,18 @@ Mathematical Operators
 .. _==:
 .. function:: ==(x, y)
 
-   Numeric equality operator. Compares numbers and number-like values (e.g. arrays) by numeric value. True for numbers of different types that represent the same value (e.g. ``2`` and ``2.0``). Follows IEEE semantics for floating-point numbers.
-   New numeric types should implement this function for two arguments of the new type.
+   Generic equality operator, giving a single ``Bool`` result. Falls back to ``===``.
+   Should be implemented for all types with a notion of equality, based
+   on the abstract value that an instance represents. For example, all numeric types are compared
+   by numeric value, ignoring type. Strings are compared as sequences of characters, ignoring
+   encoding.
+
+   Follows IEEE semantics for floating-point numbers.
+
+   Mutable containers should generally implement ``==`` by calling ``==`` recursively on all contents.
+
+   New numeric types should implement this function for two arguments of the new type, and handle
+   comparison to other types via promotion rules where possible.
 
 .. _!=:
 .. function:: !=(x, y)

From 9a48950a0729bed3cf2de72602ab991bb1cb4d63 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Fri, 2 May 2014 17:03:36 -0400
Subject: [PATCH 17/18] doc: some wording adjustments for isequal.

---
 doc/stdlib/base.rst | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/doc/stdlib/base.rst b/doc/stdlib/base.rst
index c8e32dd51472d..988e0a1cd1776 100644
--- a/doc/stdlib/base.rst
+++ b/doc/stdlib/base.rst
@@ -139,18 +139,19 @@ All Objects
 
 .. function:: isequal(x, y)
 
-   Similar to ``==``, except treats all floating-point ``NaN`` values as equal, and
-   treats ``-0.0`` as unequal to ``0.0``. Falls back to ``==``.
+   Similar to ``==``, except treats all floating-point ``NaN`` values as equal to each other
+   and greater than all other real values, and treats ``-0.0`` as unequal to ``0.0``.
+   For values that are not floating-point, ``isequal`` is the same as ``==``.
 
    ``isequal`` is the comparison function used by hash tables (``Dict``).
-   ``isequal(x,y)`` must imply ``hash(x)==hash(y)``.
+   ``isequal(x,y)`` must imply that ``hash(x) == hash(y)``.
 
-   Mutable containers should generally implement ``isequal`` by calling ``isequal``
-   recursively on all contents.
+   Mutable containers typically implement ``isequal`` by calling ``isequal`` recursively on
+   all contents.
 
-   Other new types generally do not need to implement this function, unless they
+   Scalar types generally do not need to implement ``isequal``, unless they
    represent floating-point numbers amenable to a more efficient implementation
-   than that provided by a generic fallback (based on ``isnan``, ``signbit``, and ``==``).
+   than that provided as a generic fallback (based on ``isnan``, ``signbit``, and ``==``).
 
 .. function:: isless(x, y)
 

From 9738dd3516ce65955e37ee910618a497cab6dd97 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Wed, 7 May 2014 14:49:01 -0400
Subject: [PATCH 18/18] restore former behavior of isless as a total order

---
 base/operators.jl                 | 3 ++-
 base/pkg/resolve/fieldvalue.jl    | 2 +-
 base/pkg/resolve/versionweight.jl | 8 ++++----
 base/string.jl                    | 4 ++--
 base/version.jl                   | 2 +-
 doc/stdlib/base.rst               | 8 ++++----
 6 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/base/operators.jl b/base/operators.jl
index 5fdcbd03bb833..46f5e64ef956a 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -13,7 +13,6 @@ isequal(x::FloatingPoint, y::FloatingPoint) = (isnan(x) & isnan(y)) | (signbit(x
 isequal(x::Real,          y::FloatingPoint) = (isnan(x) & isnan(y)) | (signbit(x) == signbit(y)) & (x == y)
 isequal(x::FloatingPoint, y::Real         ) = (isnan(x) & isnan(y)) | (signbit(x) == signbit(y)) & (x == y)
 
-isless(x::Any, y::Any) = x < y
 isless(x::FloatingPoint, y::FloatingPoint) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y)
 isless(x::Real,          y::FloatingPoint) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y)
 isless(x::FloatingPoint, y::Real         ) = (!isnan(x) & isnan(y)) | (signbit(x) & !signbit(y)) | (x < y)
@@ -27,6 +26,7 @@ isless(x::FloatingPoint, y::Real         ) = (!isnan(x) & isnan(y)) | (signbit(x
 !=(x,y) = !(x==y)
 !==(x,y) = !is(x,y)
 
+<(x,y) = isless(x,y)
 >(x,y) = y < x
 <=(x,y) = !(y < x)
 >=(x,y) = (y <= x)
@@ -35,6 +35,7 @@ isless(x::FloatingPoint, y::Real         ) = (!isnan(x) & isnan(y)) | (signbit(x
 
 # this definition allows Number types to implement < instead of isless,
 # which is more idiomatic:
+isless(x::Real, y::Real) = x<y
 lexcmp(x::Real, y::Real) = isless(x,y) ? -1 : ifelse(isless(y,x), 1, 0)
 
 ifelse(c::Bool, x, y) = Intrinsics.select_value(c, x, y)
diff --git a/base/pkg/resolve/fieldvalue.jl b/base/pkg/resolve/fieldvalue.jl
index 233d7750ccf89..5884b666fc371 100644
--- a/base/pkg/resolve/fieldvalue.jl
+++ b/base/pkg/resolve/fieldvalue.jl
@@ -40,7 +40,7 @@ Base.typemin(::Type{FieldValue}) = (x=typemin(Int); y=typemin(VersionWeight); Fi
 Base.(:-)(a::FieldValue, b::FieldValue) = FieldValue(a.l0-b.l0, a.l1-b.l1, a.l2-b.l2, a.l3-b.l3, a.l4-b.l4)
 Base.(:+)(a::FieldValue, b::FieldValue) = FieldValue(a.l0+b.l0, a.l1+b.l1, a.l2+b.l2, a.l3+b.l3, a.l4+b.l4)
 
-function <(a::FieldValue, b::FieldValue)
+function Base.isless(a::FieldValue, b::FieldValue)
     a.l0 < b.l0 && return true
     a.l0 > b.l0 && return false
     c = cmp(a.l1, b.l1)
diff --git a/base/pkg/resolve/versionweight.jl b/base/pkg/resolve/versionweight.jl
index e3c189c65a570..f89360e68918b 100644
--- a/base/pkg/resolve/versionweight.jl
+++ b/base/pkg/resolve/versionweight.jl
@@ -60,7 +60,7 @@ function Base.cmp{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T})
     end
     return cmp(a.rest, b.rest)
 end
-<{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = cmp(a,b) < 0
+Base.isless{T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = cmp(a,b) < 0
 =={T}(a::HierarchicalValue{T}, b::HierarchicalValue{T}) = a.v == b.v && a.rest == b.rest
 
 Base.abs{T}(a::HierarchicalValue{T}) = HierarchicalValue(T[abs(x) for x in a.v], abs(a.rest))
@@ -88,7 +88,7 @@ function Base.cmp(a::VWPreBuildItem, b::VWPreBuildItem)
     c = cmp(a.s, b.s); c != 0 && return c
     return cmp(a.i, b.i)
 end
-<(a::VWPreBuildItem, b::VWPreBuildItem) = cmp(a,b) < 0
+Base.isless(a::VWPreBuildItem, b::VWPreBuildItem) = cmp(a,b) < 0
 ==(a::VWPreBuildItem, b::VWPreBuildItem) = cmp(a,b) == 0
 
 Base.abs(a::VWPreBuildItem) = VWPreBuildItem(abs(a.nonempty), abs(a.s), abs(a.i))
@@ -125,7 +125,7 @@ function Base.cmp(a::VWPreBuild, b::VWPreBuild)
     c = cmp(a.nonempty, b.nonempty); c != 0 && return c
     return cmp(a.w, b.w)
 end
-<(a::VWPreBuild, b::VWPreBuild) = cmp(a,b) < 0
+Base.isless(a::VWPreBuild, b::VWPreBuild) = cmp(a,b) < 0
 ==(a::VWPreBuild, b::VWPreBuild) = cmp(a,b) == 0
 
 Base.abs(a::VWPreBuild) = VWPreBuild(abs(a.nonempty), abs(a.w))
@@ -179,7 +179,7 @@ function Base.cmp(a::VersionWeight, b::VersionWeight)
     c = cmp(a.build, b.build); c != 0 && return c
     return cmp(a.uninstall, b.uninstall)
 end
-<(a::VersionWeight, b::VersionWeight) = cmp(a,b) < 0
+Base.isless(a::VersionWeight, b::VersionWeight) = cmp(a,b) < 0
 ==(a::VersionWeight, b::VersionWeight) = cmp(a,b) == 0
 
 Base.abs(a::VersionWeight) =
diff --git a/base/string.jl b/base/string.jl
index 847b82de0caf2..1af1db70bfa8f 100644
--- a/base/string.jl
+++ b/base/string.jl
@@ -483,7 +483,7 @@ function cmp(a::String, b::String)
 end
 
 ==(a::String, b::String) = cmp(a,b) == 0
-< (a::String, b::String) = cmp(a,b) <  0
+isless(a::String, b::String) = cmp(a,b) < 0
 
 # begins with and ends with predicates
 
@@ -521,7 +521,7 @@ cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data)
 cmp(a::Symbol, b::Symbol) = int(sign(ccall(:strcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}), a, b)))
 
 ==(a::ByteString, b::ByteString) = endof(a) == endof(b) && cmp(a,b) == 0
-<(a::Symbol, b::Symbol) = cmp(a,b) < 0
+isless(a::Symbol, b::Symbol) = cmp(a,b) < 0
 
 beginswith(a::ByteString, b::ByteString) = beginswith(a.data, b.data)
 beginswith(a::Array{Uint8,1}, b::Array{Uint8,1}) =
diff --git a/base/version.jl b/base/version.jl
index 05966dbd7af9b..b1b0fa99e6419 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -126,7 +126,7 @@ end
 
 issupbuild(v::VersionNumber) = length(v.build)==1 && isempty(v.build[1])
 
-function <(a::VersionNumber, b::VersionNumber)
+function isless(a::VersionNumber, b::VersionNumber)
     (a.major < b.major) && return true
     (a.major > b.major) && return false
     (a.minor < b.minor) && return true
diff --git a/doc/stdlib/base.rst b/doc/stdlib/base.rst
index 988e0a1cd1776..c5db6ab905b32 100644
--- a/doc/stdlib/base.rst
+++ b/doc/stdlib/base.rst
@@ -139,14 +139,14 @@ All Objects
 
 .. function:: isequal(x, y)
 
-   Similar to ``==``, except treats all floating-point ``NaN`` values as equal to each other
-   and greater than all other real values, and treats ``-0.0`` as unequal to ``0.0``.
+   Similar to ``==``, except treats all floating-point ``NaN`` values as equal to each other,
+   and treats ``-0.0`` as unequal to ``0.0``.
    For values that are not floating-point, ``isequal`` is the same as ``==``.
 
    ``isequal`` is the comparison function used by hash tables (``Dict``).
    ``isequal(x,y)`` must imply that ``hash(x) == hash(y)``.
 
-   Mutable containers typically implement ``isequal`` by calling ``isequal`` recursively on
+   Collections typically implement ``isequal`` by calling ``isequal`` recursively on
    all contents.
 
    Scalar types generally do not need to implement ``isequal``, unless they
@@ -2282,7 +2282,7 @@ Mathematical Operators
 
    Follows IEEE semantics for floating-point numbers.
 
-   Mutable containers should generally implement ``==`` by calling ``==`` recursively on all contents.
+   Collections should generally implement ``==`` by calling ``==`` recursively on all contents.
 
    New numeric types should implement this function for two arguments of the new type, and handle
    comparison to other types via promotion rules where possible.