Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

splitstore shed utils #6811

Merged
merged 28 commits into from
Jul 26, 2021
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
0c68bcc
add splitstore rollback lotus-shed command
vyzo Jul 21, 2021
92b9d8c
quiet linter
vyzo Jul 21, 2021
36b209c
compact and gc coldstore after copying
vyzo Jul 21, 2021
e696a2c
fix newline in progres message
vyzo Jul 21, 2021
da66e7a
fix typo
vyzo Jul 21, 2021
33cdc90
fix typo
vyzo Jul 22, 2021
254c489
fix typo
vyzo Jul 22, 2021
ce6f410
add options to control compaction/gc of the coldstore and config rewr…
vyzo Jul 22, 2021
e317c83
quiet excessive badger logs
vyzo Jul 25, 2021
1918ffd
implement splitstore check
vyzo Jul 25, 2021
3d2ae43
add ChainCheckBlockstore API
vyzo Jul 25, 2021
c99dc3e
add splitstore check command
vyzo Jul 25, 2021
5b2e4d8
add permission tag to ChainCheckBlockstore API
vyzo Jul 25, 2021
21bb2bd
make gen
vyzo Jul 25, 2021
5718da2
handle newlines consistently in check output
vyzo Jul 25, 2021
5285a14
write check summary at the end
vyzo Jul 25, 2021
2dc72d5
satisfy linter who wants to be a spell checker in comments
vyzo Jul 25, 2021
c00b86e
stop the walk on missing references
vyzo Jul 25, 2021
a0d6fdb
add ChainBlockstoreInfo APIv1 endpoint
vyzo Jul 26, 2021
77604db
make gen
vyzo Jul 26, 2021
baaa9a7
add BlockstoreSize trait for reporting size
vyzo Jul 26, 2021
30e4b40
implement BlockstoreSize for badger
vyzo Jul 26, 2021
ce528a1
implement Info in splitstore
vyzo Jul 26, 2021
221dc70
add splitstore info command
vyzo Jul 26, 2021
74009bd
document lotus-shed splitstore utiilities in the README
vyzo Jul 26, 2021
2cfd73c
manually compute size when badger is being stupid
vyzo Jul 26, 2021
3c409d5
require admin for running checks on the splitstore.
vyzo Jul 26, 2021
1f6935f
make gen
vyzo Jul 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions api/api_full.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,13 @@ type FullNode interface {
// If oldmsgskip is set, messages from before the requested roots are also not included.
ChainExport(ctx context.Context, nroots abi.ChainEpoch, oldmsgskip bool, tsk types.TipSetKey) (<-chan []byte, error) //perm:read

// ChainCheckBlockstore performs an (asynchronous) health check on the chain/state blockstore
// if supported by the underlying implementation.
ChainCheckBlockstore(context.Context) error //perm:read
vyzo marked this conversation as resolved.
Show resolved Hide resolved

// ChainBlockstoreInfo returns some basic information about the blockstore
ChainBlockstoreInfo(context.Context) (map[string]interface{}, error) //perm:read

// MethodGroup: Beacon
// The Beacon method group contains methods for interacting with the random beacon (DRAND)

Expand Down
29 changes: 29 additions & 0 deletions api/mocks/mock_full.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions api/proxy_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 37 additions & 2 deletions blockstore/badger/blockstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"sync"

Expand Down Expand Up @@ -84,7 +86,8 @@ type Blockstore struct {
state int
viewers sync.WaitGroup

DB *badger.DB
DB *badger.DB
opts Options

prefixing bool
prefix []byte
Expand All @@ -95,6 +98,7 @@ var _ blockstore.Blockstore = (*Blockstore)(nil)
var _ blockstore.Viewer = (*Blockstore)(nil)
var _ blockstore.BlockstoreIterator = (*Blockstore)(nil)
var _ blockstore.BlockstoreGC = (*Blockstore)(nil)
var _ blockstore.BlockstoreSize = (*Blockstore)(nil)
var _ io.Closer = (*Blockstore)(nil)

// Open creates a new badger-backed blockstore, with the supplied options.
Expand All @@ -109,7 +113,7 @@ func Open(opts Options) (*Blockstore, error) {
return nil, fmt.Errorf("failed to open badger blockstore: %w", err)
}

bs := &Blockstore{DB: db}
bs := &Blockstore{DB: db, opts: opts}
if p := opts.Prefix; p != "" {
bs.prefixing = true
bs.prefix = []byte(p)
Expand Down Expand Up @@ -191,6 +195,37 @@ func (b *Blockstore) CollectGarbage() error {
return err
}

// Size returns the aggregate size of the blockstore
func (b *Blockstore) Size() (int64, error) {
if err := b.access(); err != nil {
return 0, err
}
defer b.viewers.Done()

lsm, vlog := b.DB.Size()
size := lsm + vlog

if size == 0 {
// badger reports a 0 size on symlinked directories... sigh
dir := b.opts.Dir
entries, err := os.ReadDir(dir)
if err != nil {
return 0, err
}

for _, e := range entries {
path := filepath.Join(dir, e.Name())
finfo, err := os.Stat(path)
if err != nil {
return 0, err
}
size += finfo.Size()
}
}

return size, nil
}

// View implements blockstore.Viewer, which leverages zero-copy read-only
// access to values.
func (b *Blockstore) View(cid cid.Cid, fn func([]byte) error) error {
Expand Down
5 changes: 5 additions & 0 deletions blockstore/blockstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ type BlockstoreGC interface {
CollectGarbage() error
}

// BlockstoreSize is a trait for on-disk blockstores that can report their size
type BlockstoreSize interface {
Size() (int64, error)
}

// WrapIDStore wraps the underlying blockstore in an "identity" blockstore.
// The ID store filters out all puts for blocks with CIDs using the "identity"
// hash function. It also extracts inlined blocks from CIDs using the identity
Expand Down
14 changes: 14 additions & 0 deletions blockstore/splitstore/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,17 @@ Compaction works transactionally with the following algorithm:
## Garbage Collection

TBD -- see [#6577](https://github.com/filecoin-project/lotus/issues/6577)

## Utilities

`lotus-shed` has a `splitstore` command which provides some utilities:

- `rollback` -- rolls back a splitstore installation.
This command copies the hotstore on top of the coldstore, and then deletes the splitstore
directory and associated metadata keys.
It can also optionally compact/gc the coldstore after the copy (with the `--gc-coldstore` flag)
and automatically rewrite the lotus config to disable splitstore (with the `--rewrite-config` flag).
Note: the node *must be stopped* before running this command.
- `check` -- asynchronously runs a basic healthcheck on the splitstore.
The results are appended to `<lotus-repo>/datastore/splitstore/check.txt`.
- `info` -- prints some basic information about the splitstore.
4 changes: 3 additions & 1 deletion blockstore/splitstore/splitstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ type SplitStore struct {
compacting int32 // compaction/prune/warmup in progress
closing int32 // the splitstore is closing

cfg *Config
cfg *Config
path string

mx sync.Mutex
warmupEpoch abi.ChainEpoch // protected by mx
Expand Down Expand Up @@ -169,6 +170,7 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co
// and now we can make a SplitStore
ss := &SplitStore{
cfg: cfg,
path: path,
ds: ds,
cold: cold,
hot: hots,
Expand Down
150 changes: 150 additions & 0 deletions blockstore/splitstore/splitstore_check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
package splitstore

import (
"fmt"
"os"
"path/filepath"
"sync/atomic"
"time"

"golang.org/x/xerrors"

cid "github.com/ipfs/go-cid"

bstore "github.com/filecoin-project/lotus/blockstore"
"github.com/filecoin-project/lotus/chain/types"
)

// performs an asynchronous health-check on the splitstore; results are appended to
// <splitstore-path>/check.txt
func (s *SplitStore) Check() error {
s.headChangeMx.Lock()
defer s.headChangeMx.Unlock()

// try to take compaction lock and inhibit compaction while the health-check is running
if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) {
return xerrors.Errorf("can't acquire compaction lock; compacting operation in progress")
}

if s.compactionIndex == 0 {
atomic.StoreInt32(&s.compacting, 0)
return xerrors.Errorf("splitstore hasn't compacted yet; health check is not meaningful")
}

// check if we are actually closing first
if err := s.checkClosing(); err != nil {
atomic.StoreInt32(&s.compacting, 0)
return err
}

curTs := s.chain.GetHeaviestTipSet()
go func() {
defer atomic.StoreInt32(&s.compacting, 0)

log.Info("checking splitstore health")
start := time.Now()

err := s.doCheck(curTs)
if err != nil {
log.Errorf("error checking splitstore health: %s", err)
return
}

log.Infow("health check done", "took", time.Since(start))
}()

return nil
}

func (s *SplitStore) doCheck(curTs *types.TipSet) error {
currentEpoch := curTs.Height()
boundaryEpoch := currentEpoch - CompactionBoundary

outputPath := filepath.Join(s.path, "check.txt")
output, err := os.OpenFile(outputPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
if err != nil {
return xerrors.Errorf("error opening check output file %s: %w", outputPath, err)
}
defer output.Close() //nolint:errcheck

write := func(format string, args ...interface{}) {
_, err := fmt.Fprintf(output, format+"\n", args...)
if err != nil {
log.Warnf("error writing check output: %s", err)
}
}

ts, _ := time.Now().MarshalText()
write("---------------------------------------------")
write("start check at %s", ts)
write("current epoch: %d", currentEpoch)
write("boundary epoch: %d", boundaryEpoch)
write("compaction index: %d", s.compactionIndex)
write("--")

var coldCnt, missingCnt int64
err = s.walkChain(curTs, boundaryEpoch, boundaryEpoch,
func(c cid.Cid) error {
if isUnitaryObject(c) {
return errStopWalk
}

has, err := s.hot.Has(c)
if err != nil {
return xerrors.Errorf("error checking hotstore: %w", err)
}

if has {
return nil
}

has, err = s.cold.Has(c)
if err != nil {
return xerrors.Errorf("error checking coldstore: %w", err)
}

if has {
coldCnt++
write("cold object reference: %s", c)
} else {
missingCnt++
write("missing object reference: %s", c)
return errStopWalk
}

return nil
})

if err != nil {
err = xerrors.Errorf("error walking chain: %w", err)
write("ERROR: %s", err)
return err
}

log.Infow("check done", "cold", coldCnt, "missing", missingCnt)
write("--")
write("cold: %d missing: %d", coldCnt, missingCnt)
write("DONE")

return nil
}

// provides some basic information about the splitstore
func (s *SplitStore) Info() map[string]interface{} {
info := make(map[string]interface{})
info["base epoch"] = s.baseEpoch
info["warmup epoch"] = s.warmupEpoch
info["compactions"] = s.compactionIndex

sizer, ok := s.hot.(bstore.BlockstoreSize)
if ok {
size, err := sizer.Size()
if err != nil {
log.Warnf("error getting hotstore size: %s", err)
} else {
info["hotstore size"] = size
}
}

return info
}
Binary file modified build/openrpc/full.json.gz
Binary file not shown.
Binary file modified build/openrpc/miner.json.gz
Binary file not shown.
Binary file modified build/openrpc/worker.json.gz
Binary file not shown.
1 change: 1 addition & 0 deletions cmd/lotus-shed/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ func main() {
actorCmd,
minerTypesCmd,
minerMultisigsCmd,
splitstoreCmd,
}

app := &cli.App{
Expand Down
Loading