From dcf274c9f59e189b40fb2f30b9aab1342fb49c47 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Mon, 29 Jun 2015 21:25:29 -0600 Subject: [PATCH 1/2] Improve countlines() performance --- base/datafmt.jl | 23 +++++------------------ test/readdlm.jl | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/base/datafmt.jl b/base/datafmt.jl index 0b123ec705808..aeef0f07f29eb 100644 --- a/base/datafmt.jl +++ b/base/datafmt.jl @@ -13,28 +13,15 @@ export countlines, readdlm, readcsv, writedlm, writecsv const invalid_dlm = Char(0xfffffffe) const offs_chunk_size = 5000 -countlines(nameorfile) = countlines(nameorfile, '\n') -function countlines(filename::AbstractString, eol::Char) - open(filename) do io - countlines(io, eol) - end -end -function countlines(io::IO, eol::Char) - if !isascii(eol) - throw(ArgumentError("only ASCII line terminators are supported")) - end +countlines(f::AbstractString,eol::Char='\n') = open(io->countlines(io,eol),f)::Int +function countlines(io::IO, eol::Char='\n') + isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported")) a = Array(UInt8, 8192) nl = 0 - preceded_by_eol = true while !eof(io) nb = readbytes!(io, a) - for i=1:nb - if Char(a[i]) == eol - preceded_by_eol = true - elseif preceded_by_eol - preceded_by_eol = false - nl+=1 - end + @simd for i=1:nb + @inbounds nl += a[i] == eol end end nl diff --git a/test/readdlm.jl b/test/readdlm.jl index b2c1462b7dd0e..c0032acc27049 100644 --- a/test/readdlm.jl +++ b/test/readdlm.jl @@ -1,5 +1,20 @@ # This file is a part of Julia. License is MIT: http://julialang.org/license +# countlines +@test countlines(IOBuffer("\n")) == 1 +@test countlines(IOBuffer("\n"),'\r') == 0 +@test countlines(IOBuffer("\n\n\n\n\n\n\n\n\n\n")) == 10 +@test countlines(IOBuffer("\n \n \n \n \n \n \n \n \n \n")) == 10 +@test countlines(IOBuffer("\r\n \r\n \r\n \r\n \r\n")) == 5 +file = tempname() +open(file,"w") do f + write(f,"Spiffy header\nspectacular first row\neven better 2nd row\nalmost done\n") +end +@test countlines(file) == 4 +@test countlines(file,'\r') == 0 +@test countlines(file,'\n') == 4 +rm(file) + isequaldlm(m1, m2, t) = isequal(m1, m2) && (eltype(m1) == eltype(m2) == t) @test isequaldlm(readdlm(IOBuffer("1\t2\n3\t4\n5\t6\n")), [1. 2; 3 4; 5 6], Float64) From a5c9967d6e91a0706d6d5f1c9036ce0036ca06d6 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Tue, 30 Jun 2015 10:58:38 -0600 Subject: [PATCH 2/2] Update docs on countlines [ci skip] --- doc/stdlib/io-network.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/stdlib/io-network.rst b/doc/stdlib/io-network.rst index e7c32ec862adf..6832e4910b88a 100644 --- a/doc/stdlib/io-network.rst +++ b/doc/stdlib/io-network.rst @@ -243,7 +243,7 @@ General I/O .. function:: countlines(io,[eol::Char]) - Read io until the end of the stream/file and count the number of non-empty lines. To specify a file pass the filename as the first + Read ``io`` until the end of the stream/file and count the number of lines. To specify a file pass the filename as the first argument. EOL markers other than '\\n' are supported by passing them as the second argument. .. function:: PipeBuffer()