From 891d2617ddbdfa265c4095b53103c010c98e6591 Mon Sep 17 00:00:00 2001 From: Martin Tournoij Date: Mon, 4 Apr 2022 21:53:34 +0200 Subject: [PATCH] Don't error out if a multiline string ends with an incomplete UTF-8 sequence --- lex.go | 5 +++++ toml_test.go | 7 +------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lex.go b/lex.go index 63ef20f4..ce7f546b 100644 --- a/lex.go +++ b/lex.go @@ -128,6 +128,11 @@ func (lx lexer) getPos() Position { } func (lx *lexer) emit(typ itemType) { + // Needed for multiline strings ending with an incomplete UTF-8 sequence. + if lx.start > lx.pos { + lx.error(errLexUTF8{lx.input[lx.pos]}) + return + } lx.items <- item{typ: typ, pos: lx.getPos(), val: lx.current()} lx.start = lx.pos } diff --git a/toml_test.go b/toml_test.go index d79d751c..9ebd1703 100644 --- a/toml_test.go +++ b/toml_test.go @@ -23,7 +23,7 @@ import ( // // Filepaths are glob'd var errorTests = map[string][]string{ - "encoding/bad-utf8-in*": {"invalid UTF-8 byte"}, + "encoding/bad-utf8*": {"invalid UTF-8 byte"}, "encoding/utf16*": {"files cannot contain NULL bytes; probably using UTF-16"}, "string/multiline-escape-space": {`invalid escape: '\ '`}, } @@ -282,11 +282,6 @@ func TestToml(t *testing.T) { Parser: parser{}, RunTests: runTests, SkipTests: []string{ - // This one is annoying to fix, and such an obscure edge case - // it's okay to leave it like this for now. - // https://github.com/BurntSushi/toml/issues/329 - "invalid/encoding/bad-utf8-at-end", - // "15" in time.Parse() accepts both "1" and "01". The TOML // specification says that times *must* start with a leading // zero, but this requires writing out own datetime parser.