From 2d11484afb3b5da14bd70ca0fa10e26f923b4d08 Mon Sep 17 00:00:00 2001 From: Daniel Mai Date: Fri, 9 Oct 2020 18:07:22 -0700 Subject: [PATCH] [BREAKING] feat(bulk): Use snappy compression by default. Like #6616, add a --badger.compression flag to dgraph bulk which allows changing the compression algorithm. This changes the default from Zstd (level 1) to Snappy. $ dgraph bulk --help ... Flags: ... --badger.compression string [none, zstd:level, snappy] Specifies the compression algorithm and the compressionlevel (if applicable) for the postings directory. none would disable compression, while zstd:1 would set zstd compression at level 1. (default "snappy") --- dgraph/cmd/bulk/loader.go | 3 +++ dgraph/cmd/bulk/reduce.go | 2 +- dgraph/cmd/bulk/run.go | 10 +++++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dgraph/cmd/bulk/loader.go b/dgraph/cmd/bulk/loader.go index dd848ebfa78..56d9d7ace67 100644 --- a/dgraph/cmd/bulk/loader.go +++ b/dgraph/cmd/bulk/loader.go @@ -32,6 +32,7 @@ import ( "time" "github.com/dgraph-io/badger/v2" + bo "github.com/dgraph-io/badger/v2/options" "github.com/dgraph-io/badger/v2/y" "github.com/dgraph-io/dgraph/chunker" @@ -77,6 +78,8 @@ type options struct { // ........... Badger options .......... // EncryptionKey is the key used for encryption. Enterprise only feature. EncryptionKey x.SensitiveByteSlice + // BadgerCompression is the compression algorithm to use while writing to badger. + BadgerCompression bo.CompressionType // BadgerCompressionlevel is the compression level to use while writing to badger. BadgerCompressionLevel int BlockCacheSize int64 diff --git a/dgraph/cmd/bulk/reduce.go b/dgraph/cmd/bulk/reduce.go index ab337b205ab..4c62d6c767e 100644 --- a/dgraph/cmd/bulk/reduce.go +++ b/dgraph/cmd/bulk/reduce.go @@ -154,7 +154,7 @@ func (r *reducer) createBadgerInternal(dir string, compression bool) *badger.DB opt.ZSTDCompressionLevel = 0 // Overwrite badger options based on the options provided by the user. if compression { - opt.Compression = bo.ZSTD + opt.Compression = r.state.opt.BadgerCompression opt.ZSTDCompressionLevel = r.state.opt.BadgerCompressionLevel } diff --git a/dgraph/cmd/bulk/run.go b/dgraph/cmd/bulk/run.go index 06bf3e6a8fc..9fa1a273d96 100644 --- a/dgraph/cmd/bulk/run.go +++ b/dgraph/cmd/bulk/run.go @@ -108,8 +108,10 @@ func init() { "Ignore UIDs in load files and assign new ones.") // Options around how to set up Badger. - flag.Int("badger.compression_level", 1, - "The compression level for Badger. A higher value uses more resources.") + flag.String("badger.compression", "snappy", + "[none, zstd:level, snappy] Specifies the compression algorithm and the compression"+ + "level (if applicable) for the postings directory. none would disable compression,"+ + " while zstd:1 would set zstd compression at level 1.") flag.Int64("badger.cache_mb", 64, "Total size of cache (in MB) per shard in reducer.") flag.String("badger.cache_percentage", "70,30", "Cache percentages summing up to 100 for various caches"+ @@ -120,6 +122,7 @@ func init() { } func run() { + ctype, clevel := x.ParseCompression(Bulk.Conf.GetString("badger.compression")) opt := options{ DataFiles: Bulk.Conf.GetString("files"), DataFormat: Bulk.Conf.GetString("format"), @@ -147,7 +150,8 @@ func run() { NewUids: Bulk.Conf.GetBool("new_uids"), ClientDir: Bulk.Conf.GetString("xidmap"), // Badger options - BadgerCompressionLevel: Bulk.Conf.GetInt("badger.compression_level"), + BadgerCompression: ctype, + BadgerCompressionLevel: clevel, } x.PrintVersion()