Skip to content

Commit

Permalink
Merge pull request #11947 from JuliaLang/jq/countlines
Browse files Browse the repository at this point in the history
Improve countlines() performance
  • Loading branch information
quinnj committed Jun 30, 2015
2 parents a726ea7 + a5c9967 commit e2b78df
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 19 deletions.
23 changes: 5 additions & 18 deletions base/datafmt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,15 @@ export countlines, readdlm, readcsv, writedlm, writecsv
const invalid_dlm = Char(0xfffffffe)
const offs_chunk_size = 5000

countlines(nameorfile) = countlines(nameorfile, '\n')
function countlines(filename::AbstractString, eol::Char)
open(filename) do io
countlines(io, eol)
end
end
function countlines(io::IO, eol::Char)
if !isascii(eol)
throw(ArgumentError("only ASCII line terminators are supported"))
end
countlines(f::AbstractString,eol::Char='\n') = open(io->countlines(io,eol),f)::Int
function countlines(io::IO, eol::Char='\n')
isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
a = Array(UInt8, 8192)
nl = 0
preceded_by_eol = true
while !eof(io)
nb = readbytes!(io, a)
for i=1:nb
if Char(a[i]) == eol
preceded_by_eol = true
elseif preceded_by_eol
preceded_by_eol = false
nl+=1
end
@simd for i=1:nb
@inbounds nl += a[i] == eol
end
end
nl
Expand Down
2 changes: 1 addition & 1 deletion doc/stdlib/io-network.rst
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ General I/O

.. function:: countlines(io,[eol::Char])

Read io until the end of the stream/file and count the number of non-empty lines. To specify a file pass the filename as the first
Read ``io`` until the end of the stream/file and count the number of lines. To specify a file pass the filename as the first
argument. EOL markers other than '\\n' are supported by passing them as the second argument.

.. function:: PipeBuffer()
Expand Down
15 changes: 15 additions & 0 deletions test/readdlm.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

# countlines
@test countlines(IOBuffer("\n")) == 1
@test countlines(IOBuffer("\n"),'\r') == 0
@test countlines(IOBuffer("\n\n\n\n\n\n\n\n\n\n")) == 10
@test countlines(IOBuffer("\n \n \n \n \n \n \n \n \n \n")) == 10
@test countlines(IOBuffer("\r\n \r\n \r\n \r\n \r\n")) == 5
file = tempname()
open(file,"w") do f
write(f,"Spiffy header\nspectacular first row\neven better 2nd row\nalmost done\n")
end
@test countlines(file) == 4
@test countlines(file,'\r') == 0
@test countlines(file,'\n') == 4
rm(file)

isequaldlm(m1, m2, t) = isequal(m1, m2) && (eltype(m1) == eltype(m2) == t)

@test isequaldlm(readdlm(IOBuffer("1\t2\n3\t4\n5\t6\n")), [1. 2; 3 4; 5 6], Float64)
Expand Down

0 comments on commit e2b78df

Please sign in to comment.