Skip to content

Commit

Permalink
release decompressor mmap on errors (#9917)
Browse files Browse the repository at this point in the history
  • Loading branch information
awskii authored Apr 12, 2024
1 parent 1cbdfe3 commit 400c9ba
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 15 deletions.
33 changes: 18 additions & 15 deletions erigon-lib/seg/decompress.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ func NewDecompressor(compressedFilePath string) (d *Decompressor, err error) {
if rec := recover(); rec != nil {
err = fmt.Errorf("decompressing file: %s, %+v, trace: %s", compressedFilePath, rec, dbg.Stack())
}
if err != nil && d != nil {
d.Close()
d = nil
}
}()

d.f, err = os.Open(compressedFilePath)
Expand Down Expand Up @@ -200,19 +204,19 @@ func NewDecompressor(compressedFilePath string) (d *Decompressor, err error) {
var patternMaxDepth uint64

for dictPos < dictSize {
d, ns := binary.Uvarint(data[dictPos:])
if d > maxAllowedDepth {
return nil, fmt.Errorf("dictionary is invalid: patternMaxDepth=%d", d)
depth, ns := binary.Uvarint(data[dictPos:])
if depth > maxAllowedDepth {
return nil, fmt.Errorf("dictionary is invalid: patternMaxDepth=%depth", depth)
}
depths = append(depths, d)
if d > patternMaxDepth {
patternMaxDepth = d
depths = append(depths, depth)
if depth > patternMaxDepth {
patternMaxDepth = depth
}
dictPos += uint64(ns)
l, n := binary.Uvarint(data[dictPos:])
dictPos += uint64(n)
patterns = append(patterns, data[dictPos:dictPos+l])
//fmt.Printf("depth = %d, pattern = [%x]\n", d, data[dictPos:dictPos+l])
//fmt.Printf("depth = %d, pattern = [%x]\n", depth, data[dictPos:dictPos+l])
dictPos += l
}

Expand Down Expand Up @@ -241,13 +245,14 @@ func NewDecompressor(compressedFilePath string) (d *Decompressor, err error) {

dictPos = 0
for dictPos < dictSize {
d, ns := binary.Uvarint(data[dictPos:])
if d > maxAllowedDepth {
return nil, fmt.Errorf("dictionary is invalid: posMaxDepth=%d", d)
depth, ns := binary.Uvarint(data[dictPos:])
if depth > maxAllowedDepth {
d.Close()
return nil, fmt.Errorf("dictionary is invalid: posMaxDepth=%d", depth)
}
posDepths = append(posDepths, d)
if d > posMaxDepth {
posMaxDepth = d
posDepths = append(posDepths, depth)
if depth > posMaxDepth {
posMaxDepth = depth
}
dictPos += uint64(ns)
pos, n := binary.Uvarint(data[dictPos:])
Expand Down Expand Up @@ -277,8 +282,6 @@ func NewDecompressor(compressedFilePath string) (d *Decompressor, err error) {
d.wordsStart = pos + 8 + dictSize

if d.Count() == 0 && dictSize == 0 && d.size > compressedMinSize {
d.Close()

return nil, fmt.Errorf("corrupted file: size %v but no words in it: %v",
fName, datasize.ByteSize(d.size).HR())
}
Expand Down
92 changes: 92 additions & 0 deletions erigon-lib/seg/decompress_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,98 @@ func TestUncompressed(t *testing.T) {
}
}

func TestDecompressor_OpenCorrupted(t *testing.T) {
t.Helper()
logger := log.New()
tmpDir := t.TempDir()

t.Run("uncompressed", func(t *testing.T) {
file := filepath.Join(tmpDir, "unc")
c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug, logger)
require.NoError(t, err)
defer c.Close()
for k, w := range loremStrings {
if err = c.AddUncompressedWord([]byte(fmt.Sprintf("%s %d", w, k))); err != nil {
t.Fatal(err)
}
}
err = c.Compress()
require.NoError(t, err)

d, err := NewDecompressor(file)
require.NoError(t, err)
d.Close()

})

t.Run("uncompressed_empty", func(t *testing.T) {
file := filepath.Join(tmpDir, "unc_empty")
c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug, logger)
require.NoError(t, err)
defer c.Close()
err = c.Compress()
require.NoError(t, err)

// this file is empty and its size will be 32 bytes, it's not corrupted
d, err := NewDecompressor(file)
require.NoError(t, err)
d.Close()
})

t.Run("compressed", func(t *testing.T) {
file := filepath.Join(tmpDir, "comp")
c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug, logger)
require.NoError(t, err)
defer c.Close()
for k, w := range loremStrings {
if err = c.AddWord([]byte(fmt.Sprintf("%s %d", w, k))); err != nil {
t.Fatal(err)
}
}
err = c.Compress()
require.NoError(t, err)

d, err := NewDecompressor(file)
require.NoError(t, err)
d.Close()

})

t.Run("compressed_empty", func(t *testing.T) {
file := filepath.Join(tmpDir, "comp_empty")
c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug, logger)
require.NoError(t, err)
defer c.Close()
err = c.Compress()
require.NoError(t, err)

d, err := NewDecompressor(file)
require.NoError(t, err)
d.Close()
})

t.Run("notExist", func(t *testing.T) {
file := filepath.Join(tmpDir, "comp_bad")
d, err := NewDecompressor(file)
require.Error(t, err, "file is not exist")
require.Nil(t, d)
})

t.Run("gibberish", func(t *testing.T) {
aux := make([]byte, rand.Intn(129))
_, err := rand.Read(aux)
require.NoError(t, err)

fpath := filepath.Join(tmpDir, "gibberish")
err = os.WriteFile(fpath, aux, 0644)
require.NoError(t, err)

d, err := NewDecompressor(fpath)
require.Error(t, err, "file is some garbage or smaller 32 bytes. Or we got exactly 32 zeros from /rand")
require.Nil(t, d)
})
}

const lorem = `Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et
dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur
Expand Down

0 comments on commit 400c9ba

Please sign in to comment.