Skip to content

Commit

Permalink
Selectively reduce multithreaded parsing @error (#1099)
Browse files Browse the repository at this point in the history
  • Loading branch information
Liozou authored Jun 20, 2023
1 parent 07fb6c2 commit 4e6a332
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/context.jl
Original file line number Diff line number Diff line change
Expand Up @@ -649,9 +649,10 @@ end
debug && println("single-threaded estimated rows = $origrowsguess, multi-threaded estimated rows = $rowsguess")
debug && println("multi-threaded column types sampled as: $columns")
else
@error "Multi-threaded parsing failed (are there newlines inside quoted fields?), falling back to single-threaded parsing"
# The following debug statement is doubled by a loud @warning or @error in parsefilechunk!
debug && println("multi-threaded parsing failed! Falling back to single thread, reinitializing column types.")
reinitialize_column_type!(columns, types, names, stringtype, streaming)
threaded = false
threaded = false # the failing is signaled by having !ctx.threaded && ctx.ntasks > 1
end
end
if !threaded
Expand Down
9 changes: 9 additions & 0 deletions src/file.jl
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,15 @@ function parsefilechunk!(ctx::Context, pos, len, rowsguess, rowoffset, columns,
rowsguess = newrowsguess
end
end
if !ctx.threaded && ctx.ntasks > 1 && !ctx.silencewarnings
# !ctx.threaded && ctx.ntasks > 1 indicate that multithreaded parsing failed.
# Thes messages echo the corresponding debug statement in the definition of ctx
if numwarnings[] > 0
@warn "Multithreaded parsing failed and fell back to single-threaded parsing, check previous warnings for possible reasons."
else
@error "Multithreaded parsing failed and fell back to single-threaded parsing. This can happen if the input contains multi-line fields; otherwise, please report this issue."
end
end
end
# done parsing (at least this chunk), so resize columns to final row count
for col in columns
Expand Down
13 changes: 13 additions & 0 deletions test/testfiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -709,3 +709,16 @@ testfile("test_basic.csv", (types=Dict(2=>Float64),),
(col1 = [1, 4, 7], col2 = [2.0, 5.0, 8.0], col3 = [3, 6, 9]);
dir=Path(dir)
)

# https://github.com/JuliaData/CSV.jl/pull/1099
@info "The following test is expected to @error with \"Multithreaded parsing failed...\""
testfile("test_multiline_field_errorwarning.csv", (ntasks=3,),
(20, 3),
NamedTuple{(:col1, :col2, :col3), Tuple{String3, String, Int}},
let col1 = [String3("A$i") for i in 1:19], col2 = [".$i" for i in 1:19], col3 = collect(1:19)
insert!(col1, 14, String3("foo"))
insert!(col2, 14, "a field to thwart all heuristics\n ,,,\n, ,\n , ,,\n, ,,\n , ,,\n,,\n ,,\n , ,\n , ,\n ,, , ,\n , ,,,\n, ,,\n\n, , , ,\n , ,\n\n ,,,\n,,,\n,,,\n ,,,\n\n,\n,\n")
insert!(col3, 14, 0)
(; col1, col2, col3)
end
)
45 changes: 45 additions & 0 deletions test/testfiles/test_multiline_field_errorwarning.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
col1,col2,col3
A1,.1,1
A2,.2,2
A3,.3,3
A4,.4,4
A5,.5,5
A6,.6,6
A7,.7,7
A8,.8,8
A9,.9,9
A10,.10,10
A11,.11,11
A12,.12,12
A13,.13,13
foo,"a field to thwart all heuristics
,,,
, ,
, ,,
, ,,
, ,,
,,
,,
, ,
, ,
,, , ,
, ,,,
, ,,

, , , ,
, ,

,,,
,,,
,,,
,,,

,
,
",-0
A14,.14,14
A15,.15,15
A16,.16,16
A17,.17,17
A18,.18,18
A19,.19,19

0 comments on commit 4e6a332

Please sign in to comment.