From 2d11484afb3b5da14bd70ca0fa10e26f923b4d08 Mon Sep 17 00:00:00 2001
From: Daniel Mai <daniel@dgraph.io>
Date: Fri, 9 Oct 2020 18:07:22 -0700
Subject: [PATCH] [BREAKING] feat(bulk): Use snappy compression by default.

Like #6616, add a --badger.compression flag to dgraph bulk which allows changing
the compression algorithm.

This changes the default from Zstd (level 1) to Snappy.

     $ dgraph bulk --help
     ...
     Flags:
     ...
            --badger.compression string        [none, zstd:level, snappy] Specifies the compression algorithm and the compressionlevel (if applicable) for the postings directory. none would disable compression, while zstd:1 would set zstd compression at level 1. (default "snappy")
---
 dgraph/cmd/bulk/loader.go |  3 +++
 dgraph/cmd/bulk/reduce.go |  2 +-
 dgraph/cmd/bulk/run.go    | 10 +++++++---
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/dgraph/cmd/bulk/loader.go b/dgraph/cmd/bulk/loader.go
index dd848ebfa78..56d9d7ace67 100644
--- a/dgraph/cmd/bulk/loader.go
+++ b/dgraph/cmd/bulk/loader.go
@@ -32,6 +32,7 @@ import (
 	"time"
 
 	"github.com/dgraph-io/badger/v2"
+	bo "github.com/dgraph-io/badger/v2/options"
 	"github.com/dgraph-io/badger/v2/y"
 
 	"github.com/dgraph-io/dgraph/chunker"
@@ -77,6 +78,8 @@ type options struct {
 	// ........... Badger options ..........
 	// EncryptionKey is the key used for encryption. Enterprise only feature.
 	EncryptionKey x.SensitiveByteSlice
+	// BadgerCompression is the compression algorithm to use while writing to badger.
+	BadgerCompression bo.CompressionType
 	// BadgerCompressionlevel is the compression level to use while writing to badger.
 	BadgerCompressionLevel int
 	BlockCacheSize         int64
diff --git a/dgraph/cmd/bulk/reduce.go b/dgraph/cmd/bulk/reduce.go
index ab337b205ab..4c62d6c767e 100644
--- a/dgraph/cmd/bulk/reduce.go
+++ b/dgraph/cmd/bulk/reduce.go
@@ -154,7 +154,7 @@ func (r *reducer) createBadgerInternal(dir string, compression bool) *badger.DB
 	opt.ZSTDCompressionLevel = 0
 	// Overwrite badger options based on the options provided by the user.
 	if compression {
-		opt.Compression = bo.ZSTD
+		opt.Compression = r.state.opt.BadgerCompression
 		opt.ZSTDCompressionLevel = r.state.opt.BadgerCompressionLevel
 	}
 
diff --git a/dgraph/cmd/bulk/run.go b/dgraph/cmd/bulk/run.go
index 06bf3e6a8fc..9fa1a273d96 100644
--- a/dgraph/cmd/bulk/run.go
+++ b/dgraph/cmd/bulk/run.go
@@ -108,8 +108,10 @@ func init() {
 		"Ignore UIDs in load files and assign new ones.")
 
 	// Options around how to set up Badger.
-	flag.Int("badger.compression_level", 1,
-		"The compression level for Badger. A higher value uses more resources.")
+	flag.String("badger.compression", "snappy",
+		"[none, zstd:level, snappy] Specifies the compression algorithm and the compression"+
+			"level (if applicable) for the postings directory. none would disable compression,"+
+			" while zstd:1 would set zstd compression at level 1.")
 	flag.Int64("badger.cache_mb", 64, "Total size of cache (in MB) per shard in reducer.")
 	flag.String("badger.cache_percentage", "70,30",
 		"Cache percentages summing up to 100 for various caches"+
@@ -120,6 +122,7 @@ func init() {
 }
 
 func run() {
+	ctype, clevel := x.ParseCompression(Bulk.Conf.GetString("badger.compression"))
 	opt := options{
 		DataFiles:        Bulk.Conf.GetString("files"),
 		DataFormat:       Bulk.Conf.GetString("format"),
@@ -147,7 +150,8 @@ func run() {
 		NewUids:          Bulk.Conf.GetBool("new_uids"),
 		ClientDir:        Bulk.Conf.GetString("xidmap"),
 		// Badger options
-		BadgerCompressionLevel: Bulk.Conf.GetInt("badger.compression_level"),
+		BadgerCompression:      ctype,
+		BadgerCompressionLevel: clevel,
 	}
 
 	x.PrintVersion()