From 034f65e9e8976f3ca7d9622e4c362ad9c26ad5f7 Mon Sep 17 00:00:00 2001 From: holisticode Date: Wed, 6 Feb 2019 06:16:43 -0500 Subject: [PATCH] swarm/storage: Get all chunk references for a given file (#19002) (cherry picked from commit 3eff652a7b606f25d43bef6ccb998b8e306f8a75) --- swarm/storage/filestore.go | 40 +++++++++++++++++++++++++++++++++ swarm/storage/filestore_test.go | 36 +++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/swarm/storage/filestore.go b/swarm/storage/filestore.go index 2d8d82d95a50..aebe03c1ef6b 100644 --- a/swarm/storage/filestore.go +++ b/swarm/storage/filestore.go @@ -19,6 +19,7 @@ package storage import ( "context" "io" + "sort" ) /* @@ -96,3 +97,42 @@ func (f *FileStore) Store(ctx context.Context, data io.Reader, size int64, toEnc func (f *FileStore) HashSize() int { return f.hashFunc().Size() } + +// Public API. This endpoint returns all chunk hashes (only) for a given file +func (f *FileStore) GetAllReferences(ctx context.Context, data io.Reader, toEncrypt bool) (addrs AddressCollection, err error) { + // create a special kind of putter, which only will store the references + putter := &HashExplorer{ + hasherStore: NewHasherStore(f.ChunkStore, f.hashFunc, toEncrypt), + References: make([]Reference, 0), + } + // do the actual splitting anyway, no way around it + _, _, err = PyramidSplit(ctx, data, putter, putter) + if err != nil { + return nil, err + } + // collect all references + addrs = NewAddressCollection(0) + for _, ref := range putter.References { + addrs = append(addrs, Address(ref)) + } + sort.Sort(addrs) + return addrs, nil +} + +// HashExplorer is a special kind of putter which will only store chunk references +type HashExplorer struct { + *hasherStore + References []Reference +} + +// HashExplorer's Put will add just the chunk hashes to its `References` +func (he *HashExplorer) Put(ctx context.Context, chunkData ChunkData) (Reference, error) { + // Need to do the actual Put, which returns the references + ref, err := he.hasherStore.Put(ctx, chunkData) + if err != nil { + return nil, err + } + // internally store the reference + he.References = append(he.References, ref) + return ref, nil +} diff --git a/swarm/storage/filestore_test.go b/swarm/storage/filestore_test.go index fb0f761a4a6d..2dbccdf114e2 100644 --- a/swarm/storage/filestore_test.go +++ b/swarm/storage/filestore_test.go @@ -173,3 +173,39 @@ func testFileStoreCapacity(toEncrypt bool, t *testing.T) { t.Fatalf("Comparison error after clearing memStore.") } } + +// TestGetAllReferences only tests that GetAllReferences returns an expected +// number of references for a given file +func TestGetAllReferences(t *testing.T) { + tdb, cleanup, err := newTestDbStore(false, false) + defer cleanup() + if err != nil { + t.Fatalf("init dbStore failed: %v", err) + } + db := tdb.LDBStore + memStore := NewMemStore(NewDefaultStoreParams(), db) + localStore := &LocalStore{ + memStore: memStore, + DbStore: db, + } + fileStore := NewFileStore(localStore, NewFileStoreParams()) + + checkRefs := func(dataSize int, expectedLen int) { + slice := testutil.RandomBytes(1, dataSize) + + addrs, err := fileStore.GetAllReferences(context.Background(), bytes.NewReader(slice), false) + if err != nil { + t.Fatal(err) + } + if len(addrs) != expectedLen { + t.Fatalf("Expected reference array length to be %d, but is %d", expectedLen, len(addrs)) + } + } + + // testRuns[i] and expectedLen[i] are dataSize and expected length respectively + testRuns := []int{1024, 8192, 16000, 30000, 1000000} + expectedLens := []int{1, 3, 5, 9, 248} + for i, r := range testRuns { + checkRefs(r, expectedLens[i]) + } +}