From e8292cc4b951ae0f859398f5dcadbd79ff2cf397 Mon Sep 17 00:00:00 2001 From: Brett Vickers Date: Sun, 7 Jul 2024 12:12:11 -0700 Subject: [PATCH] Default CharsetReader improvements When nil, the ReadSettings struct's CharsetReader field now causes the XML decoder to use a "pass-though" charset converter, passing the reader's data through without modification. This was already the default behavior when creating a new etree document with the NewDocument function, but now a default- constructed ReadSettings struct will result in the same default CharsetReader behavior. --- etree.go | 51 ++++++++++++++++++++++----------------------------- etree_test.go | 16 +++++++++------- 2 files changed, 31 insertions(+), 36 deletions(-) diff --git a/etree.go b/etree.go index c7939dd..fedf2ca 100644 --- a/etree.go +++ b/etree.go @@ -31,9 +31,14 @@ var ErrXML = errors.New("etree: invalid XML format") var cdataPrefix = []byte(" ` - charsetLabel := "" doc := newDocumentFromString2(t, s, ReadSettings{ CharsetReader: func(label string, input io.Reader) (io.Reader, error) { - charsetLabel = label - return &lowercaseCharsetReader{input}, nil + if label == "lowercase" { + return &lowercaseCharsetReader{input}, nil + } + return nil, errors.New("unknown charset") }, }) - if charsetLabel != "lowercase" { - t.Fatalf("etree: incorrect charset encoding, expected lowercase, got %s", charsetLabel) - } cases := []struct { path string @@ -772,9 +770,13 @@ func TestSortAttrs(t *testing.T) { checkStrEq(t, out, ``+"\n") } -func TestCharsetReaderEncoding(t *testing.T) { +func TestCharsetReaderDefaultSetting(t *testing.T) { + // Test encodings where the default pass-through charset conversion + // should work for common single-byte character encodings. cases := []string{ + ``, ``, + ``, ``, ``, }