Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Char no longer a subtype of Integer #8816

Merged
merged 1 commit into from
Oct 27, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ Language changes
dicts are synchronized. As part of this change, `=>` is parsed as a normal
operator, and `Base` defines it to construct `Pair` objects ([#6739]).

* `Char` is no longer a subtype of `Integer`. ([#8816])
Char now supports a more limited set of operations with `Integer` types:

* comparison / equality
* `Char` + `Int` = `Int`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wasn't this supposed to be Char + Int = Char ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like that's how it is implemented

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 816ee25

* `Char` - `Char` = `Int`

Compiler improvements
---------------------

Expand Down
4 changes: 2 additions & 2 deletions base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,11 @@ end
getindex(T::(Type...)) = Array(T,0)

# T[a:b] and T[a:s:b] also construct typed ranges
function getindex{T<:Number}(::Type{T}, r::Range)
function getindex{T<:Union(Char,Number)}(::Type{T}, r::Range)
copy!(Array(T,length(r)), r)
end

function getindex{T<:Number}(::Type{T}, r1::Range, rs::Range...)
function getindex{T<:Union(Char,Number)}(::Type{T}, r1::Range, rs::Range...)
a = Array(T,length(r1)+sum(length,rs))
o = 1
copy!(a, o, r1)
Expand Down
12 changes: 6 additions & 6 deletions base/ascii.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ sizeof(s::ASCIIString) = sizeof(s.data)
getindex(s::ASCIIString, r::Vector) = ASCIIString(getindex(s.data,r))
getindex(s::ASCIIString, r::UnitRange{Int}) = ASCIIString(getindex(s.data,r))
getindex(s::ASCIIString, indx::AbstractVector{Int}) = ASCIIString(s.data[indx])
search(s::ASCIIString, c::Char, i::Integer) = c < 0x80 ? search(s.data,uint8(c),i) : 0
rsearch(s::ASCIIString, c::Char, i::Integer) = c < 0x80 ? rsearch(s.data,uint8(c),i) : 0
search(s::ASCIIString, c::Char, i::Integer) = c < char(0x80) ? search(s.data,uint8(c),i) : 0
rsearch(s::ASCIIString, c::Char, i::Integer) = c < char(0x80) ? rsearch(s.data,uint8(c),i) : 0

function string(c::ASCIIString...)
if length(c) == 1
Expand Down Expand Up @@ -58,10 +58,10 @@ end
function uppercase(s::ASCIIString)
d = s.data
for i = 1:length(d)
if 'a' <= d[i] <= 'z'
if 'a' <= char(d[i]) <= 'z'
td = copy(d)
for j = i:length(td)
if 'a' <= td[j] <= 'z'
if 'a' <= char(td[j]) <= 'z'
td[j] -= 32
end
end
Expand All @@ -73,10 +73,10 @@ end
function lowercase(s::ASCIIString)
d = s.data
for i = 1:length(d)
if 'A' <= d[i] <= 'Z'
if 'A' <= char(d[i]) <= 'Z'
td = copy(d)
for j = i:length(td)
if 'A' <= td[j] <= 'Z'
if 'A' <= char(td[j]) <= 'Z'
td[j] += 32
end
end
Expand Down
2 changes: 2 additions & 0 deletions base/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ cconvert{T}(::Type{Ptr{Ptr{T}}}, a::Array) = a
# convert strings to ByteString to pass as pointers
cconvert{P<:Union(Int8,Uint8)}(::Type{Ptr{P}}, s::String) = bytestring(s)

reinterpret{T,S}(::Type{T}, x::S) = box(T,unbox(S,x))

abstract IO

type ErrorException <: Exception
Expand Down
2 changes: 1 addition & 1 deletion base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ bitstype 32 Float32 <: FloatingPoint
bitstype 64 Float64 <: FloatingPoint

bitstype 8 Bool <: Integer
bitstype 32 Char <: Integer
bitstype 32 Char

bitstype 8 Int8 <: Signed
bitstype 8 Uint8 <: Unsigned
Expand Down
68 changes: 40 additions & 28 deletions base/char.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,46 @@ convert(::Type{Char}, x::Float16) = char(convert(Uint32, x))
convert(::Type{Char}, x::Float32) = char(convert(Uint32, x))
convert(::Type{Char}, x::Float64) = char(convert(Uint32, x))

## char promotions ##

promote_rule(::Type{Char}, ::Type{Int8}) = Int32
promote_rule(::Type{Char}, ::Type{Uint8}) = Uint32
promote_rule(::Type{Char}, ::Type{Int16}) = Int32
promote_rule(::Type{Char}, ::Type{Uint16}) = Uint32
promote_rule(::Type{Char}, ::Type{Int32}) = Int32
promote_rule(::Type{Char}, ::Type{Uint32}) = Uint32
promote_rule(::Type{Char}, ::Type{Int64}) = Int64
promote_rule(::Type{Char}, ::Type{Uint64}) = Uint64
promote_rule(::Type{Char}, ::Type{Int128}) = Int128
promote_rule(::Type{Char}, ::Type{Uint128}) = Uint128
typemax(::Type{Char}) = char(typemax(Uint32))
typemin(::Type{Char}) = char(typemin(Uint32))

## character operations & comparisons ##
size(c::Char) = ()
size(c::Char,d) = convert(Int, d) < 1 ? throw(BoundsError()) : 1
ndims(c::Char) = 0
ndims(::Type{Char}) = 0
length(c::Char) = 1
endof(c::Char) = 1
getindex(c::Char) = c
getindex(c::Char, i::Integer) = i == 1 ? c : throw(BoundsError())
getindex(c::Char, I::Integer...) = all([i == 1 for i in I]) ? c : throw(BoundsError())
getindex(c::Char, I::Real...) = getindex(c, to_index(I)...)
first(c::Char) = c
last(c::Char) = c

start(c::Char) = false
next(c::Char, state) = (c, true)
done(c::Char, state) = state
isempty(c::Char) = false
in(x::Char, y::Char) = x == y

==(x::Char, y::Char) = uint32(x) == uint32(y)
==(x::Char, y::Integer) = uint32(x) == y
==(x::Integer, y::Char) = x == uint32(y)

< (x::Char, y::Char) = uint32(x) < uint32(y)
<=(x::Char, y::Char) = uint32(x) <= uint32(y)

isless(x::Char, y::Integer) = isless(uint32(x), y)
isless(x::Integer, y::Char) = isless(x, uint32(y))

# numeric operations
# TODO: this should be removed, but needs to be here as long as Char <: Integer
+(x::Char , y::Char ) = int(x)+int(y)

# ordinal operations
+(x::Char , y::Integer) = reinterpret(Char, int32(x)+int32(y))
+(x::Integer, y::Char ) = y+x
-(x::Char , y::Char ) = int(x)-int(y)
-(x::Char , y::Integer) = reinterpret(Char, int32(x)-int32(y))
+(x::Char , y::Integer) = reinterpret(Char, int32(x) + int32(y))
+(x::Integer, y::Char) = y + x
-(x::Char , y::Char) = int(x) - int(y)
-(x::Char , y::Integer) = reinterpret(Char, int32(x) - int32(y))

# bitwise operations
(~)(x::Char) = char(~uint32(x))
Expand All @@ -40,14 +56,10 @@ promote_rule(::Type{Char}, ::Type{Uint128}) = Uint128

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to keep these bitwise operations for Char?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be useful for masking.

bswap(x::Char) = char(bswap(uint32(x)))

<<(x::Char, y::Int32) = uint32(x) << y
>>(x::Char, y::Int32) = uint32(x) >>> y
>>>(x::Char, y::Int32) = uint32(x) >>> y

< (x::Char, y::Char) = uint32(x) < uint32(y)
<=(x::Char, y::Char) = uint32(x) <= uint32(y)

## printing & showing characters ##

print(io::IO, c::Char) = (write(io,c); nothing)
show(io::IO, c::Char) = (print(io,'\''); print_escaped(io,utf32(c),"'"); print(io,'\''))
print(io::IO, c::Char) = (write(io, c); nothing)
show(io::IO, c::Char) = begin
print(io, '\'')
print_escaped(io, utf32(c), "'")
print(io, '\'')
end
29 changes: 13 additions & 16 deletions base/datafmt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ function countlines(io::IO, eol::Char)
while !eof(io)
nb = readbytes!(io, a)
for i=1:nb
if a[i] == eol
if char(a[i]) == eol
preceded_by_eol = true
elseif preceded_by_eol
preceded_by_eol = false
Expand All @@ -38,8 +38,6 @@ function countlines(io::IO, eol::Char)
nl
end



readdlm(input, T::Type; opts...) = readdlm(input, invalid_dlm, T, '\n'; opts...)
readdlm(input, dlm::Char, T::Type; opts...) = readdlm(input, dlm, T, '\n'; opts...)

Expand Down Expand Up @@ -67,11 +65,10 @@ end

function ascii_if_possible(sbuff::String)
isa(sbuff, ASCIIString) && return sbuff

asci = true
d = sbuff.data
for idx in 1:length(d)
(d[idx] < 0x80) ? continue : (asci = false; break)
(char(d[idx]) < char(0x80)) ? continue : (asci = false; break)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't have thought to do this, but now I'm wondering if I should -- what's the benefit?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this doesn't make sense. This code is really comparing bytes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is because for UTF32 strings this is a Char / Integer comparison, and I removed those definitions some intermediate point.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see -- good to know that's what UTF32String looks like under the hood. Thanks.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I got confused because this code is kind of wrong. If it's supposed to work for arbitrary Strings it shouldn't be accessing .data.

end
asci ? ASCIIString(sbuff.data) : sbuff
end
Expand Down Expand Up @@ -168,7 +165,7 @@ function store_cell{T,S<:String}(dlmstore::DLMStore{T,S}, row::Int, col::Int, qu
tmp64 = dlmstore.tmp64

endpos = prevind(sbuff, nextind(sbuff,endpos))
(endpos > 0) && ('\n' == dlmstore.eol) && ('\r' == sbuff[endpos]) && (endpos = prevind(sbuff, endpos))
(endpos > 0) && ('\n' == dlmstore.eol) && ('\r' == char(sbuff[endpos])) && (endpos = prevind(sbuff, endpos))
sval = quoted ? SubString(sbuff, startpos+1, endpos-1) : SubString(sbuff, startpos, endpos)

if drow > 0
Expand Down Expand Up @@ -200,7 +197,6 @@ function store_cell{T,S<:String}(dlmstore::DLMStore{T,S}, row::Int, col::Int, qu
((T <: Number) && dlmstore.auto) ? throw(TypeError(:store_cell, "", Any, T)) : error("file entry \"$(sval)\" cannot be converted to $T")
end


dlmstore.lastrow = drow
dlmstore.lastcol = col
else
Expand Down Expand Up @@ -330,15 +326,16 @@ colval{S<:String}(sval::S, cells::Array{Any,2}, row::Int, col::Int, tmp64::Array
colval{T<:Char, S<:String}(sval::S, cells::Array{T,2}, row::Int, col::Int, tmp64::Array{Float64,1}) = ((length(sval) == 1) ? ((cells[row,col] = next(sval,1)[1]); false) : true)
colval{S<:String}(sval::S, cells::Array, row::Int, col::Int, tmp64::Array{Float64,1}) = true


dlm_parse(s::ASCIIString, eol::Char, dlm::Char, qchar::Char, cchar::Char, ign_adj_dlm::Bool, allow_quote::Bool, allow_comments::Bool, skipstart::Int, skipblanks::Bool, dh::DLMHandler) =
dlm_parse(s.data, uint8(eol), uint8(dlm), uint8(qchar), uint8(cchar), ign_adj_dlm, allow_quote, allow_comments, skipstart, skipblanks, dh)
dlm_parse(s::ASCIIString, eol::Char, dlm::Char, qchar::Char, cchar::Char, ign_adj_dlm::Bool, allow_quote::Bool, allow_comments::Bool, skipstart::Int, skipblanks::Bool, dh::DLMHandler) = begin
dlm_parse(s.data, uint8(uint32(eol)), uint8(uint32(dlm)), uint8(uint32(qchar)), uint8(uint32(cchar)),
ign_adj_dlm, allow_quote, allow_comments, skipstart, skipblanks, dh)
end

function dlm_parse{T,D}(dbuff::T, eol::D, dlm::D, qchar::D, cchar::D, ign_adj_dlm::Bool, allow_quote::Bool, allow_comments::Bool, skipstart::Int, skipblanks::Bool, dh::DLMHandler)
all_ascii = (D <: Uint8) || (isascii(eol) && isascii(dlm) && (!allow_quote || isascii(qchar)) && (!allow_comments || isascii(cchar)))
(T <: UTF8String) && all_ascii && (return dlm_parse(dbuff.data, uint8(eol), uint8(dlm), uint8(qchar), uint8(cchar), ign_adj_dlm, allow_quote, allow_comments, skipstart, skipblanks, dh))
ncols = nrows = col = 0
is_default_dlm = (dlm == invalid_dlm % D)
is_default_dlm = (dlm == uint32(invalid_dlm) % D)
error_str = ""
# 0: begin field, 1: quoted field, 2: unquoted field, 3: second quote (could either be end of field or escape character), 4: comment, 5: skipstart
state = (skipstart > 0) ? 5 : 0
Expand All @@ -350,16 +347,16 @@ function dlm_parse{T,D}(dbuff::T, eol::D, dlm::D, qchar::D, cchar::D, ign_adj_dl
was_cr = false
while idx <= slen
val,idx = next(dbuff, idx)
if (is_eol = (val == eol))
if (is_eol = (char(val) == char(eol)))
is_dlm = is_comment = is_cr = is_quote = false
elseif (is_dlm = (is_default_dlm ? in(val, _default_delims) : (val == dlm)))
elseif (is_dlm = (is_default_dlm ? in(char(val), _default_delims) : (char(val) == char(dlm))))
is_comment = is_cr = is_quote = false
elseif (is_quote = (val == qchar))
elseif (is_quote = (char(val) == char(qchar)))
is_comment = is_cr = false
elseif (is_comment = (val == cchar))
elseif (is_comment = (char(val) == char(cchar)))
is_cr = false
else
is_cr = (eol == '\n') && (val == '\r')
is_cr = (char(eol) == '\n') && (char(val) == '\r')
end

if 2 == state # unquoted field
Expand Down
5 changes: 3 additions & 2 deletions base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ function write(s::IO, a::AbstractArray)
nb
end

function write(s::IO, c::Char)
function write(s::IO, ch::Char)
c = reinterpret(Uint32, ch)
if c < 0x80
write(s, uint8(c))
return 1
Expand Down Expand Up @@ -149,7 +150,7 @@ function read(s::IO, ::Type{Char})
end

function readuntil(s::IO, delim::Char)
if delim < 0x80
if delim < char(0x80)
data = readuntil(s, uint8(delim))
enc = byte_string_classify(data)
return (enc==1) ? ASCIIString(data) : UTF8String(data)
Expand Down
1 change: 0 additions & 1 deletion base/iobuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -317,4 +317,3 @@ function readuntil(io::IOBuffer, delim::Uint8)
end
A
end

2 changes: 0 additions & 2 deletions base/number.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ done(x::Number, state) = state
isempty(x::Number) = false
in(x::Number, y::Number) = x == y

reinterpret{T,S}(::Type{T}, x::S) = box(T,unbox(S,x))

map(f::Callable, x::Number) = f(x)

zero(x::Number) = oftype(x,0)
Expand Down
2 changes: 1 addition & 1 deletion base/random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ RandIntGen{T<:Unsigned}(r::UnitRange{T}) = isempty(r) ? error("range must be non
# specialized versions
for (T, U) in [(Uint8, Uint32), (Uint16, Uint32),
(Int8, Uint32), (Int16, Uint32), (Int32, Uint32), (Int64, Uint64), (Int128, Uint128),
(Bool, Uint32), (Char, Uint32)]
(Bool, Uint32)]

@eval RandIntGen(r::UnitRange{$T}) = isempty(r) ? error("range must be non-empty") : RandIntGen(first(r), convert($U, unsigned(last(r) - first(r)) + one($U))) # overflow ok
end
Expand Down
3 changes: 2 additions & 1 deletion base/range.jl
Original file line number Diff line number Diff line change
Expand Up @@ -561,4 +561,5 @@ function in(x, r::Range)
n >= 1 && n <= length(r) && r[n] == x
end

in{T<:Integer}(x, r::Range{T}) = isinteger(x) && !isempty(r) && x>=minimum(r) && x<=maximum(r) && (mod(int(x)-first(r),step(r))==0)
in{T<:Integer}(x, r::Range{T}) = isinteger(x) && !isempty(r) && x>=minimum(r) && x<=maximum(r) && (mod(int(x)-first(r),step(r)) == 0)
in(x::Char, r::Range{Char}) = !isempty(r) && x >= minimum(r) && x <= maximum(r) && (mod(int(x) - int(first(r)), step(r)) == 0)
2 changes: 1 addition & 1 deletion base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ strwidth(s::String) = (w=0; for c in s; w += charwidth(c); end; w)
strwidth(s::ByteString) = int(ccall(:u8_strwidth, Csize_t, (Ptr{Uint8},), s.data))
# TODO: implement and use u8_strnwidth that takes a length argument

isascii(c::Char) = c < 0x80
isascii(c::Char) = c < char(0x80)
isascii(s::String) = all(isascii, s)
isascii(s::ASCIIString) = true

Expand Down
3 changes: 2 additions & 1 deletion base/utf16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ end
# TODO: optmize this
function encode16(s::String)
buf = Uint16[]
for c in s
for ch in s
c = reinterpret(Uint32, ch)
if c < 0x10000
push!(buf, uint16(c))
else
Expand Down
Loading