forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtoml_parser.jl
1210 lines (1076 loc) · 37.2 KB
/
toml_parser.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# This file is a part of Julia. License is MIT: https://julialang.org/license
module TOML
using Base: IdSet
# In case we do not have the Dates stdlib available
# we parse DateTime into these internal structs,
# note that these do not do any argument checking
struct Date
year::Int
month::Int
day::Int
end
struct Time
hour::Int
minute::Int
second::Int
ms::Int
end
struct DateTime
date::Date
time::Time
end
DateTime(y, m, d, h, mi, s, ms) =
DateTime(Date(y,m,d), Time(h, mi, s, ms))
const EOF_CHAR = typemax(Char)
const TOMLDict = Dict{String, Any}
##########
# Parser #
##########
mutable struct Parser
str::String
# 1 character look ahead
current_char::Char
pos::Int
# prevpos equals the startbyte of the look ahead character
# prevpos-1 is therefore the end byte of the character we last ate
prevpos::Int
# File info
column::Int
line::Int
# The function `take_substring` takes the substring from `marker` up
# to `prevpos-1`.
marker::Int
# The current table that `key = value` entries are inserted into
active_table::TOMLDict
# As we parse dotted keys we store each part of the key in this cache
# A future improvement would be to also store the spans of the keys
# so that in error messages we could also show the previous key
# definition in case of duplicated keys
dotted_keys::Vector{String}
# Strings in TOML can have line continuations ('\' as the last character
# on a line. We store the byte ranges for each of these "chunks" in here
chunks::Vector{UnitRange{Int}}
# We need to keep track of those tables / arrays that are defined
# inline since we are not allowed to add keys to those
inline_tables::IdSet{TOMLDict}
static_arrays::IdSet{Any}
# [a.b.c.d] doesn't "define" the table [a]
# so keys can later be added to [a], therefore
# we need to keep track of what tables are
# actually defined
defined_tables::IdSet{TOMLDict}
# The table we will finally return to the user
root::TOMLDict
# Filled in in case we are parsing a file to improve error messages
filepath::Union{String, Nothing}
# Get's populated with the Dates stdlib if it exists
Dates::Union{Module, Nothing}
end
const DATES_PKGID = Base.PkgId(Base.UUID("ade2ca70-3891-5945-98fb-dc099432e06a"), "Dates")
function Parser(str::String; filepath=nothing)
root = TOMLDict()
l = Parser(
str, # str
EOF_CHAR, # current_char
firstindex(str), # pos
0, # prevpos
0, # column
1, # line
0, # marker
root, # active_table
String[], # dotted_keys
UnitRange{Int}[], # chunks
IdSet{TOMLDict}(), # inline_tables
IdSet{Any}(), # static_arrays
IdSet{TOMLDict}(), # defined_tables
root,
filepath,
isdefined(Base, :maybe_root_module) ? Base.maybe_root_module(DATES_PKGID) : nothing,
)
startup(l)
return l
end
function startup(l::Parser)
# Populate our one character look-ahead
c = eat_char(l)
# Skip BOM
if c === '\ufeff'
l.column -= 1
eat_char(l)
end
end
Parser() = Parser("")
Parser(io::IO) = Parser(read(io, String))
function reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing)
p.str = str
p.current_char = EOF_CHAR
p.pos = firstindex(str)
p.prevpos = 0
p.column = 0
p.line = 1
p.marker = 0
p.root = TOMLDict()
p.active_table = p.root
empty!(p.dotted_keys)
empty!(p.chunks)
empty!(p.inline_tables)
empty!(p.static_arrays)
empty!(p.defined_tables)
p.filepath = filepath
startup(p)
return p
end
##########
# Errors #
##########
throw_internal_error(msg) = error("internal TOML parser error: $msg")
# Many functions return a ParserError. We want this to bubble up
# all the way and have this error be returned to the user
# if the parse is called with `raise=false`. This macro
# makes that easier
@eval macro $(:var"try")(expr)
return quote
v = $(esc(expr))
v isa ParserError && return v
v
end
end
# TODO: Check all of these are used
@enum ErrorType begin
# Toplevel #
############
ErrRedefineTableArray
ErrExpectedNewLineKeyValue
ErrAddKeyToInlineTable
ErrAddArrayToStaticArray
ErrArrayTreatedAsDictionary
ErrExpectedEndOfTable
ErrExpectedEndArrayOfTable
# Keys #
########
ErrExpectedEqualAfterKey
# Check, are these the same?
ErrDuplicatedKey
ErrKeyAlreadyHasValue
ErrInvalidBareKeyCharacter
ErrEmptyBareKey
# Values #
##########
ErrUnexpectedEofExpectedValue
ErrUnexpectedStartOfValue
ErrGenericValueError
# Arrays
ErrExpectedCommaBetweenItemsArray
# Inline tables
ErrExpectedCommaBetweenItemsInlineTable
ErrTrailingCommaInlineTable
ErrInlineTableRedefine
# Numbers
ErrUnderscoreNotSurroundedByDigits
ErrLeadingZeroNotAllowedInteger
ErrOverflowError
ErrLeadingDot
ErrNoTrailingDigitAfterDot
ErrTrailingUnderscoreNumber
ErrSignInNonBase10Number
# DateTime
ErrParsingDateTime
ErrOffsetDateNotSupported
# Strings
ErrNewLineInString
ErrUnexpectedEndString
ErrInvalidEscapeCharacter
ErrInvalidUnicodeScalar
end
const err_message = Dict(
ErrTrailingCommaInlineTable => "trailing comma not allowed in inline table",
ErrExpectedCommaBetweenItemsArray => "expected comma between items in array",
ErrExpectedCommaBetweenItemsInlineTable => "expected comma between items in inline table",
ErrExpectedEndArrayOfTable => "expected array of table to end with ']]'",
ErrInvalidBareKeyCharacter => "invalid bare key character",
ErrRedefineTableArray => "tried to redefine an existing table as an array",
ErrDuplicatedKey => "key already defined",
ErrKeyAlreadyHasValue => "key already has a value",
ErrEmptyBareKey => "bare key cannot be empty",
ErrExpectedNewLineKeyValue => "expected newline after key value pair",
ErrNewLineInString => "newline character in single quoted string",
ErrUnexpectedEndString => "string literal ended unexpectedly",
ErrExpectedEndOfTable => "expected end of table ']'",
ErrAddKeyToInlineTable => "tried to add a new key to an inline table",
ErrInlineTableRedefine => "inline table overwrote key from other table",
ErrArrayTreatedAsDictionary => "tried to add a key to an array",
ErrAddArrayToStaticArray => "tried to append to a statically defined array",
ErrGenericValueError => "failed to parse value",
ErrLeadingZeroNotAllowedInteger => "leading zero in integer not allowed",
ErrUnderscoreNotSurroundedByDigits => "underscore is not surrounded by digits",
ErrUnexpectedStartOfValue => "unexpected start of value",
ErrOffsetDateNotSupported => "offset date-time is not supported",
ErrParsingDateTime => "parsing date/time value failed",
ErrTrailingUnderscoreNumber => "trailing underscore in number",
ErrLeadingDot => "floats require a leading zero",
ErrExpectedEqualAfterKey => "expected equal sign after key",
ErrNoTrailingDigitAfterDot => "expected digit after dot",
ErrOverflowError => "overflowed when parsing integer",
ErrInvalidUnicodeScalar => "invalid unicode scalar",
ErrInvalidEscapeCharacter => "invalid escape character",
ErrUnexpectedEofExpectedValue => "unexpected end of file, expected a value",
ErrSignInNonBase10Number => "number not in base 10 is not allowed to have a sign",
)
for err in instances(ErrorType)
@assert haskey(err_message, err) "$err does not have an error message"
end
mutable struct ParserError <: Exception
type::ErrorType
# Arbitrary data to store at the
# call site to be used when formatting
# the error
data
# These are filled in before returning from parse function
str ::Union{String, Nothing}
filepath ::Union{String, Nothing}
line ::Union{Int, Nothing}
column ::Union{Int, Nothing}
pos ::Union{Int, Nothing} # position of parser when
table ::Union{TOMLDict, Nothing} # result parsed until error
end
ParserError(type, data) = ParserError(type, data, nothing, nothing, nothing, nothing, nothing, nothing)
ParserError(type) = ParserError(type, nothing)
# Defining these below can be useful when debugging code that erroneously returns a
# ParserError because you get a stacktrace to where the ParserError was created
#ParserError(type) = error(type)
#ParserError(type, data) = error(type,data)
# Many functions return either a T or a ParserError
const Err{T} = Union{T, ParserError}
function format_error_message_for_err_type(error::ParserError)
msg = err_message[error.type]
if error.type == ErrInvalidBareKeyCharacter
c_escaped = escape_string(string(error.data)::String)
msg *= ": '$c_escaped'"
end
return msg
end
# This is used in error formatting, for example,
# point_to_line("aa\nfoobar\n\bb", 4, 6) would return the strings:
# str1 = "foobar"
# str2 = "^^^"
# used to show the interval where an error happened
# Right now, it is only called with a == b
function point_to_line(str::AbstractString, a::Int, b::Int, context)
@assert b >= a
a = thisind(str, a)
b = thisind(str, b)
pos = something(findprev('\n', str, prevind(str, a)), 0) + 1
io1 = IOContext(IOBuffer(), context)
io2 = IOContext(IOBuffer(), context)
while true
if a <= pos <= b
printstyled(io2, "^"; color=:light_green)
else
print(io2, " ")
end
it = iterate(str, pos)
it === nothing && break
c, pos = it
c == '\n' && break
print(io1, c)
end
return String(take!(io1.io)), String(take!(io2.io))
end
function Base.showerror(io::IO, err::ParserError)
printstyled(io, "TOML Parser error:\n"; color=Base.error_color())
f = something(err.filepath, "none")
printstyled(io, f, ':', err.line, ':', err.column; bold=true)
printstyled(io, " error: "; color=Base.error_color())
println(io, format_error_message_for_err_type(err))
# In this case we want the arrow to point one character
pos = err.pos::Int
err.type == ErrUnexpectedEofExpectedValue && (pos += 1)
str1, err1 = point_to_line(err.str::String, pos, pos, io)
@static if VERSION <= v"1.6.0-DEV.121"
# See https://github.com/JuliaLang/julia/issues/36015
format_fixer = get(io, :color, false)::Bool == true ? "\e[0m" : ""
println(io, "$format_fixer ", str1)
print(io, "$format_fixer ", err1)
else
println(io, " ", str1)
print(io, " ", err1)
end
end
################
# Parser utils #
################
@inline function next_char(l::Parser)::Char
state = iterate(l.str, l.pos)
l.prevpos = l.pos
l.column += 1
state === nothing && return EOF_CHAR
c, pos = state
l.pos = pos
if c == '\n'
l.line += 1
l.column = 0
end
return c
end
@inline function eat_char(l::Parser)::Char
c = l.current_char
l.current_char = next_char(l)
return c
end
@inline peek(l::Parser) = l.current_char
# Return true if the character was accepted. When a character
# is accepted it get's eaten and we move to the next character
@inline function accept(l::Parser, f::Union{Function, Char})::Bool
c = peek(l)
c == EOF_CHAR && return false
ok = false
if isa(f, Function)
ok = f(c)
elseif isa(f, Char)
ok = c === f
end
ok && eat_char(l)
return ok
end
# Return true if any character was accepted
function accept_batch(l::Parser, f::F)::Bool where {F}
ok = false
while accept(l, f)
ok = true
end
return ok
end
# Return true if `f` was accepted `n` times
@inline function accept_n(l::Parser, n, f::F)::Bool where {F}
for i in 1:n
if !accept(l, f)
return false
end
end
return true
end
@inline iswhitespace(c::Char) = c == ' ' || c == '\t'
@inline isnewline(c::Char) = c == '\n' || c == '\r'
skip_ws(l::Parser) = accept_batch(l, iswhitespace)
skip_ws_nl_no_comment(l::Parser)::Bool = accept_batch(l, x -> iswhitespace(x) || isnewline(x))
function skip_ws_nl(l::Parser)::Bool
skipped = false
while true
skipped_ws = accept_batch(l, x -> iswhitespace(x) || isnewline(x))
skipped_comment = skip_comment(l)
if !skipped_ws && !skipped_comment
break
end
skipped = true
end
return skipped
end
# Returns true if a comment was skipped
function skip_comment(l::Parser)::Bool
found_comment = accept(l, '#')
if found_comment
accept_batch(l, !isnewline)
end
return found_comment
end
skip_ws_comment(l::Parser) = skip_ws(l) && skip_comment(l)
@inline set_marker!(l::Parser) = l.marker = l.prevpos
take_substring(l::Parser) = SubString(l.str, l.marker:(l.prevpos-1))
############
# Toplevel #
############
# Driver, keeps parsing toplevel until we either get
# a `ParserError` or eof.
function parse(l::Parser)::TOMLDict
v = tryparse(l)
v isa ParserError && throw(v)
return v
end
function tryparse(l::Parser)::Err{TOMLDict}
while true
skip_ws_nl(l)
peek(l) == EOF_CHAR && break
v = parse_toplevel(l)
if v isa ParserError
v.str = l.str
v.pos = l.prevpos-1
v.table = l.root
v.filepath = l.filepath
v.line = l.line
v.column = l.column-1
return v
end
end
return l.root
end
# Top level can be either a table key, an array of table statement
# or a key/value entry.
function parse_toplevel(l::Parser)::Err{Nothing}
if accept(l, '[')
l.active_table = l.root
@try parse_table(l)
skip_ws_comment(l)
if !(peek(l) == '\n' || peek(l) == '\r' || peek(l) == '#' || peek(l) == EOF_CHAR)
eat_char(l)
return ParserError(ErrExpectedNewLineKeyValue)
end
else
@try parse_entry(l, l.active_table)
skip_ws_comment(l)
# SPEC: "There must be a newline (or EOF) after a key/value pair."
if !(peek(l) == '\n' || peek(l) == '\r' || peek(l) == '#' || peek(l) == EOF_CHAR)
c = eat_char(l)
return ParserError(ErrExpectedNewLineKeyValue)
end
end
end
function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String}, check=true)::Err{TOMLDict}
for i in 1:length(dotted_keys)
d = d::TOMLDict
key = dotted_keys[i]
d = get!(TOMLDict, d, key)
if d isa Vector
d = d[end]
end
check && @try check_allowed_add_key(l, d, i == length(dotted_keys))
end
return d::TOMLDict
end
function check_allowed_add_key(l::Parser, d, check_defined=true)::Err{Nothing}
if !(d isa Dict)
return ParserError(ErrKeyAlreadyHasValue)
elseif d isa Dict && d in l.inline_tables
return ParserError(ErrAddKeyToInlineTable)
elseif check_defined && d in l.defined_tables
return ParserError(ErrDuplicatedKey)
end
return nothing
end
# Can only enter here from toplevel
function parse_table(l)
if accept(l, '[')
return parse_array_table(l)
end
table_key = @try parse_key(l)
skip_ws(l)
if !accept(l, ']')
return ParserError(ErrExpectedEndOfTable)
end
l.active_table = @try recurse_dict!(l, l.root, table_key)
push!(l.defined_tables, l.active_table)
return
end
function parse_array_table(l)::Union{Nothing, ParserError}
table_key = @try parse_key(l)
skip_ws(l)
if !(accept(l, ']') && accept(l, ']'))
return ParserError(ErrExpectedEndArrayOfTable)
end
d = @try recurse_dict!(l, l.root, @view(table_key[1:end-1]), false)
k = table_key[end]
old = get!(() -> [], d, k)
if old isa Vector
if old in l.static_arrays
return ParserError(ErrAddArrayToStaticArray)
end
else
return ParserError(ErrArrayTreatedAsDictionary)
end
d_new = TOMLDict()
push!(old, d_new)
push!(l.defined_tables, d_new)
l.active_table = d_new
return
end
function parse_entry(l::Parser, d)::Union{Nothing, ParserError}
key = @try parse_key(l)
skip_ws(l)
if !accept(l, '=')
return ParserError(ErrExpectedEqualAfterKey)
end
if length(key) > 1
d = @try recurse_dict!(l, d, @view(key[1:end-1]))
end
last_key_part = l.dotted_keys[end]
v = get(d, last_key_part, nothing)
if v !== nothing
@try check_allowed_add_key(l, v)
end
skip_ws(l)
value = @try parse_value(l)
# Not allowed to overwrite a value with an inline dict
if value isa Dict && haskey(d, last_key_part)
return ParserError(ErrInlineTableRedefine)
end
# TODO: Performance, hashing `last_key_part` again here
d[last_key_part] = value
return
end
########
# Keys #
########
# SPEC: "Bare keys may only contain ASCII letters, ASCII digits, underscores,
# and dashes (A-Za-z0-9_-).
# Note that bare keys are allowed to be composed of only ASCII digits, e.g. 1234,
# but are always interpreted as strings."
@inline isvalid_barekey_char(c::Char) =
'a' <= c <= 'z' ||
'A' <= c <= 'Z' ||
isdigit(c) ||
c == '-' || c == '_'
# Current key...
function parse_key(l::Parser)
empty!(l.dotted_keys)
_parse_key(l)
end
# Recursively add dotted keys to `l.dotted_key`
function _parse_key(l::Parser)
skip_ws(l)
# SPEC: "A bare key must be non-empty,"
if isempty(l.dotted_keys) && accept(l, '=')
return ParserError(ErrEmptyBareKey)
end
keyval = if accept(l, '"')
@try parse_string_start(l, false)
elseif accept(l, '\'')
@try parse_string_start(l, true)
else
set_marker!(l)
if accept_batch(l, isvalid_barekey_char)
if !(peek(l) == '.' || peek(l) == ' ' || peek(l) == ']' || peek(l) == '=')
c = eat_char(l)
return ParserError(ErrInvalidBareKeyCharacter, c)
end
String(take_substring(l))
else
c = eat_char(l)
return ParserError(ErrInvalidBareKeyCharacter, c)
end
end
new_key = keyval
push!(l.dotted_keys, new_key)
# SPEC: "Whitespace around dot-separated parts is ignored."
skip_ws(l)
if accept(l, '.')
skip_ws(l)
@try _parse_key(l)
end
return l.dotted_keys
end
##########
# Values #
##########
function parse_value(l::Parser)
val = if accept(l, '[')
parse_array(l)
elseif accept(l, '{')
parse_inline_table(l)
elseif accept(l, '"')
parse_string_start(l, false)
elseif accept(l, '\'')
parse_string_start(l, true)
elseif accept(l, 't')
parse_bool(l, true)
elseif accept(l, 'f')
parse_bool(l, false)
else
parse_number_or_date_start(l)
end
if val === nothing
return ParserError(ErrGenericValueError)
end
return val
end
#########
# Array #
#########
function push!!(v::Vector, el)
# Since these types are typically non-inferrable, they are a big invalidation risk,
# and since it's used by the package-loading infrastructure the cost of invalidation
# is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather
# than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there
# is no ambiguity about what types of objects will be created.
T = eltype(v)
t = typeof(el)
if el isa T || t === T
push!(v, el::T)
return v
elseif T === Union{}
out = Vector{t}(undef, 1)
out[1] = el
return out
else
if T isa Union
newT = Any
else
newT = Union{T, typeof(el)}
end
new = Array{newT}(undef, length(v))
copy!(new, v)
return push!(new, el)
end
end
function parse_array(l::Parser)::Err{Vector}
skip_ws_nl(l)
array = Vector{Union{}}()
empty_array = accept(l, ']')
while !empty_array
v = @try parse_value(l)
# TODO: Worth to function barrier this?
array = push!!(array, v)
# There can be an arbitrary number of newlines and comments before a value and before the closing bracket.
skip_ws_nl(l)
comma = accept(l, ',')
skip_ws_nl(l)
accept(l, ']') && break
if !comma
return ParserError(ErrExpectedCommaBetweenItemsArray)
end
end
push!(l.static_arrays, array)
return array
end
################
# Inline table #
################
function parse_inline_table(l::Parser)::Err{TOMLDict}
dict = TOMLDict()
push!(l.inline_tables, dict)
skip_ws(l)
accept(l, '}') && return dict
while true
@try parse_entry(l, dict)
# SPEC: No newlines are allowed between the curly braces unless they are valid within a value.
skip_ws(l)
accept(l, '}') && return dict
if accept(l, ',')
skip_ws(l)
if accept(l, '}')
return ParserError(ErrTrailingCommaInlineTable)
end
else
return ParserError(ErrExpectedCommaBetweenItemsInlineTable)
end
end
end
###########
# Numbers #
###########
parse_inf(l::Parser, sgn::Int) = accept(l, 'n') && accept(l, 'f') ? sgn * Inf : nothing
parse_nan(l::Parser) = accept(l, 'a') && accept(l, 'n') ? NaN : nothing
function parse_bool(l::Parser, v::Bool)::Union{Bool, Nothing}
# Have eaten a 't' if `v` is true, otherwise have eaten a `f`.
v ? (accept(l, 'r') && accept(l, 'u') && accept(l, 'e') && return true) :
(accept(l, 'a') && accept(l, 'l') && accept(l, 's') && accept(l, 'e') && return false)
return nothing
end
isvalid_hex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F')
isvalid_oct(c::Char) = '0' <= c <= '7'
isvalid_binary(c::Char) = '0' <= c <= '1'
const ValidSigs = Union{typeof.([isvalid_hex, isvalid_oct, isvalid_binary, isdigit])...}
# This function eats things accepted by `f` but also allows eating `_` in between
# digits. Returns if it ate at lest one character and if it ate an underscore
function accept_batch_underscore(l::Parser, f::ValidSigs, fail_if_underscore=true)::Err{Tuple{Bool, Bool}}
contains_underscore = false
at_least_one = false
last_underscore = false
while true
c = peek(l)
if c == '_'
contains_underscore = true
if fail_if_underscore
return ParserError(ErrUnderscoreNotSurroundedByDigits)
end
eat_char(l)
fail_if_underscore = true
last_underscore = true
else
# SPEC: "Each underscore must be surrounded by at least one digit on each side."
fail_if_underscore = false
if f(c)
at_least_one = true
eat_char(l)
else
if last_underscore
return ParserError(ErrTrailingUnderscoreNumber)
end
return at_least_one, contains_underscore
end
last_underscore = false
end
end
end
function parse_number_or_date_start(l::Parser)
integer = true
read_dot = false
set_marker!(l)
sgn = 1
parsed_sign = false
if accept(l, '+')
parsed_sign = true
elseif accept(l, '-')
parsed_sign = true
sgn = -1
end
if accept(l, 'i')
return parse_inf(l, sgn)
elseif accept(l, 'n')
return parse_nan(l)
end
if accept(l, '.')
return ParserError(ErrLeadingDot)
end
# Zero is allowed to follow by a end value char, a base x, o, b or a dot
readed_zero = false
if accept(l, '0')
readed_zero = true # Intentional bad grammar to remove the ambiguity in "read"...
if ok_end_value(peek(l))
return Int64(0)
elseif accept(l, 'x')
parsed_sign && return ParserError(ErrSignInNonBase10Number)
ate, contains_underscore = @try accept_batch_underscore(l, isvalid_hex)
ate && return parse_hex(l, contains_underscore)
elseif accept(l, 'o')
parsed_sign && return ParserError(ErrSignInNonBase10Number)
ate, contains_underscore = @try accept_batch_underscore(l, isvalid_oct)
ate && return parse_oct(l, contains_underscore)
elseif accept(l, 'b')
parsed_sign && return ParserError(ErrSignInNonBase10Number)
ate, contains_underscore = @try accept_batch_underscore(l, isvalid_binary)
ate && return parse_bin(l, contains_underscore)
elseif accept(l, isdigit)
return parse_local_time(l)
end
end
read_underscore = false
read_digit = accept(l, isdigit)
if !readed_zero && !read_digit
if peek(l) == EOF_CHAR
return ParserError(ErrUnexpectedEofExpectedValue)
else
return ParserError(ErrUnexpectedStartOfValue)
end
end
ate, contains_underscore = @try accept_batch_underscore(l, isdigit, readed_zero)
read_underscore |= contains_underscore
if (read_digit || ate) && ok_end_value(peek(l))
return parse_int(l, contains_underscore)
end
# Done with integers here
if !read_underscore
# No underscores in date / times
if peek(l) == '-'
return parse_datetime(l)
elseif peek(l) == ':'
return parse_local_time(l)
end
end
# Done with datetime / localtime here
# can optionally read a . + digits and then exponent
ate_dot = accept(l, '.')
ate, contains_underscore = @try accept_batch_underscore(l, isdigit, true)
if ate_dot && !ate
return ParserError(ErrNoTrailingDigitAfterDot)
end
read_underscore |= contains_underscore
if accept(l, x -> x == 'e' || x == 'E')
accept(l, x-> x == '+' || x == '-')
# SPEC: (which follows the same rules as decimal integer values but may include leading zeros)
read_digit = accept_batch(l, isdigit)
ate, read_underscore = @try accept_batch_underscore(l, isdigit, !read_digit)
contains_underscore |= read_underscore
end
if !ok_end_value(peek(l))
eat_char(l)
return ParserError(ErrGenericValueError)
end
return parse_float(l, read_underscore)
end
function take_string_or_substring(l, contains_underscore)::SubString
subs = take_substring(l)
# Need to pass a AbstractString to `parse` so materialize it in case it
# contains underscore.
return contains_underscore ? SubString(filter(!=('_'), subs)) : subs
end
function parse_float(l::Parser, contains_underscore)::Err{Float64}
s = take_string_or_substring(l, contains_underscore)
v = Base.tryparse(Float64, s)
v === nothing && return(ParserError(ErrGenericValueError))
return v
end
for (name, T1, T2, n1, n2) in (("int", Int64, Int128, 17, 33),
("hex", UInt64, UInt128, 18, 34),
("oct", UInt64, UInt128, 24, 45),
("bin", UInt64, UInt128, 66, 130),
)
@eval function $(Symbol("parse_", name))(l::Parser, contains_underscore, base=nothing)::Err{Union{$(T1), $(T2), BigInt}}
s = take_string_or_substring(l, contains_underscore)
len = length(s)
v = try
if len ≤ $(n1)
Base.parse($(T1), s; base)
elseif $(n1) < len ≤ $(n2)
Base.parse($(T2), s; base)
else
Base.parse(BigInt, s; base)
end
catch e
e isa Base.OverflowError && return(ParserError(ErrOverflowError))
error("internal parser error: did not correctly discredit $(repr(s)) as an int")
end
return v
end
end
##########################
# Date / Time / DateTime #
##########################
ok_end_value(c::Char) = iswhitespace(c) || c == '#' || c == EOF_CHAR || c == ']' ||
c == '}' || c == ',' || c == '\n' || c == '\r'
#=
# https://tools.ietf.org/html/rfc3339
# Internet Protocols MUST generate four digit years in dates.
date-fullyear = 4DIGIT
date-month = 2DIGIT ; 01-12
date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
; month/year
time-hour = 2DIGIT ; 00-23
time-minute = 2DIGIT ; 00-59
time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second
; rules
time-secfrac = "." 1*DIGIT
time-numoffset = ("+" / "-") time-hour ":" time-minute
time-offset = "Z" / time-numoffset
partial-time = time-hour ":" time-minute ":" time-second
[time-secfrac]
full-date = date-fullyear "-" date-month "-" date-mday
full-time = partial-time time-offset
date-time = full-date "T" full-time
=#
accept_two(l, f::F) where {F} = accept_n(l, 2, f) || return(ParserError(ErrParsingDateTime))
function parse_datetime(l)
# Year has already been eaten when we reach here
year = @try parse_int(l, false)
year in 0:9999 || return ParserError(ErrParsingDateTime)
# Month
accept(l, '-') || return ParserError(ErrParsingDateTime)
set_marker!(l)
@try accept_two(l, isdigit)
month = @try parse_int(l, false)
month in 1:12 || return ParserError(ErrParsingDateTime)
accept(l, '-') || return ParserError(ErrParsingDateTime)
# Day
set_marker!(l)
@try accept_two(l, isdigit)
day = @try parse_int(l, false)
# Verify the real range in the constructor below
day in 1:31 || return ParserError(ErrParsingDateTime)
# We might have a local date now
read_space = false
if ok_end_value(peek(l))
if (read_space = accept(l, ' '))
if !isdigit(peek(l))
return try_return_date(l, year, month, day)
end
else
return try_return_date(l, year, month, day)
end
end
if !read_space
accept(l, 'T') || accept(l, 't') || return ParserError(ErrParsingDateTime)
end
h, m, s, ms = @try _parse_local_time(l)
# Julia doesn't support offset times
if !accept(l, 'Z')
if accept(l, '+') || accept(l, '-')
return ParserError(ErrOffsetDateNotSupported)