From 08aae82d2f9c9269e77ae55384b0b6b9f1552fc9 Mon Sep 17 00:00:00 2001 From: awskii Date: Thu, 11 Apr 2024 14:22:21 +0100 Subject: [PATCH] release decompressor mmap on errors --- erigon-lib/seg/decompress.go | 33 ++++++----- erigon-lib/seg/decompress_test.go | 92 +++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 15 deletions(-) diff --git a/erigon-lib/seg/decompress.go b/erigon-lib/seg/decompress.go index f82572d7602..7fee25b2976 100644 --- a/erigon-lib/seg/decompress.go +++ b/erigon-lib/seg/decompress.go @@ -166,6 +166,10 @@ func NewDecompressor(compressedFilePath string) (d *Decompressor, err error) { if rec := recover(); rec != nil { err = fmt.Errorf("decompressing file: %s, %+v, trace: %s", compressedFilePath, rec, dbg.Stack()) } + if err != nil && d != nil { + d.Close() + d = nil + } }() d.f, err = os.Open(compressedFilePath) @@ -200,19 +204,19 @@ func NewDecompressor(compressedFilePath string) (d *Decompressor, err error) { var patternMaxDepth uint64 for dictPos < dictSize { - d, ns := binary.Uvarint(data[dictPos:]) - if d > maxAllowedDepth { - return nil, fmt.Errorf("dictionary is invalid: patternMaxDepth=%d", d) + depth, ns := binary.Uvarint(data[dictPos:]) + if depth > maxAllowedDepth { + return nil, fmt.Errorf("dictionary is invalid: patternMaxDepth=%depth", depth) } - depths = append(depths, d) - if d > patternMaxDepth { - patternMaxDepth = d + depths = append(depths, depth) + if depth > patternMaxDepth { + patternMaxDepth = depth } dictPos += uint64(ns) l, n := binary.Uvarint(data[dictPos:]) dictPos += uint64(n) patterns = append(patterns, data[dictPos:dictPos+l]) - //fmt.Printf("depth = %d, pattern = [%x]\n", d, data[dictPos:dictPos+l]) + //fmt.Printf("depth = %d, pattern = [%x]\n", depth, data[dictPos:dictPos+l]) dictPos += l } @@ -241,13 +245,14 @@ func NewDecompressor(compressedFilePath string) (d *Decompressor, err error) { dictPos = 0 for dictPos < dictSize { - d, ns := binary.Uvarint(data[dictPos:]) - if d > maxAllowedDepth { - return nil, fmt.Errorf("dictionary is invalid: posMaxDepth=%d", d) + depth, ns := binary.Uvarint(data[dictPos:]) + if depth > maxAllowedDepth { + d.Close() + return nil, fmt.Errorf("dictionary is invalid: posMaxDepth=%d", depth) } - posDepths = append(posDepths, d) - if d > posMaxDepth { - posMaxDepth = d + posDepths = append(posDepths, depth) + if depth > posMaxDepth { + posMaxDepth = depth } dictPos += uint64(ns) pos, n := binary.Uvarint(data[dictPos:]) @@ -277,8 +282,6 @@ func NewDecompressor(compressedFilePath string) (d *Decompressor, err error) { d.wordsStart = pos + 8 + dictSize if d.Count() == 0 && dictSize == 0 && d.size > compressedMinSize { - d.Close() - return nil, fmt.Errorf("corrupted file: size %v but no words in it: %v", fName, datasize.ByteSize(d.size).HR()) } diff --git a/erigon-lib/seg/decompress_test.go b/erigon-lib/seg/decompress_test.go index b3fbc43f093..a39a45bf1f5 100644 --- a/erigon-lib/seg/decompress_test.go +++ b/erigon-lib/seg/decompress_test.go @@ -323,6 +323,98 @@ func TestUncompressed(t *testing.T) { } } +func TestDecompressor_OpenCorrupted(t *testing.T) { + t.Helper() + logger := log.New() + tmpDir := t.TempDir() + + t.Run("uncompressed", func(t *testing.T) { + file := filepath.Join(tmpDir, "unc") + c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug, logger) + require.NoError(t, err) + defer c.Close() + for k, w := range loremStrings { + if err = c.AddUncompressedWord([]byte(fmt.Sprintf("%s %d", w, k))); err != nil { + t.Fatal(err) + } + } + err = c.Compress() + require.NoError(t, err) + + d, err := NewDecompressor(file) + require.NoError(t, err) + d.Close() + + }) + + t.Run("uncompressed_empty", func(t *testing.T) { + file := filepath.Join(tmpDir, "unc_empty") + c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug, logger) + require.NoError(t, err) + defer c.Close() + err = c.Compress() + require.NoError(t, err) + + // this file is empty and its size will be 32 bytes, it's not corrupted + d, err := NewDecompressor(file) + require.NoError(t, err) + d.Close() + }) + + t.Run("compressed", func(t *testing.T) { + file := filepath.Join(tmpDir, "comp") + c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug, logger) + require.NoError(t, err) + defer c.Close() + for k, w := range loremStrings { + if err = c.AddWord([]byte(fmt.Sprintf("%s %d", w, k))); err != nil { + t.Fatal(err) + } + } + err = c.Compress() + require.NoError(t, err) + + d, err := NewDecompressor(file) + require.NoError(t, err) + d.Close() + + }) + + t.Run("compressed_empty", func(t *testing.T) { + file := filepath.Join(tmpDir, "comp_empty") + c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug, logger) + require.NoError(t, err) + defer c.Close() + err = c.Compress() + require.NoError(t, err) + + d, err := NewDecompressor(file) + require.NoError(t, err) + d.Close() + }) + + t.Run("notExist", func(t *testing.T) { + file := filepath.Join(tmpDir, "comp_bad") + d, err := NewDecompressor(file) + require.Error(t, err, "file is not exist") + require.Nil(t, d) + }) + + t.Run("gibberish", func(t *testing.T) { + aux := make([]byte, rand.Intn(129)) + _, err := rand.Read(aux) + require.NoError(t, err) + + fpath := filepath.Join(tmpDir, "gibberish") + err = os.WriteFile(fpath, aux, 0644) + require.NoError(t, err) + + d, err := NewDecompressor(fpath) + require.Error(t, err, "file is some garbage or smaller 32 bytes. Or we got exactly 32 zeros from /rand") + require.Nil(t, d) + }) +} + const lorem = `Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur