From 211e4e31874358187a5e1304937584b506e09135 Mon Sep 17 00:00:00 2001 From: Pedro Teixeira Date: Sun, 12 Nov 2017 11:39:52 +0000 Subject: [PATCH] feat: exporter maxDepth (#197) * big exporter overhaul: centralized dag reslving inside internal resolver stream * exporter: maxDepth instead of recursive flag * exporter: exposing name and depth * exporter: exposing dir size * tests: increased timeout for importing big file * tests: test exporter maxDepth * fixed linting error * exporter: exporting link size insteaf of node size to mimc go-ipfs * test: moving timeout def to the test top * tests: fixed this.timeout because arrow functions --- package.json | 1 - src/exporter/dir-flat.js | 32 +++++++------- src/exporter/dir-hamt-sharded.js | 47 ++++++++------------- src/exporter/file.js | 15 ++++--- src/exporter/index.js | 72 ++++++++++++++++++++++++-------- src/exporter/object.js | 37 +++++++--------- src/exporter/resolve.js | 56 +++++++++++++++++++++---- src/importer/tree-builder.js | 2 +- test/builder-dir-sharding.js | 2 +- test/exporter-subtree.js | 4 +- test/exporter.js | 59 ++++++++++++++++++++++---- test/importer.js | 6 ++- 12 files changed, 219 insertions(+), 114 deletions(-) diff --git a/package.json b/package.json index 50f540a8..fb2a8a0b 100644 --- a/package.json +++ b/package.json @@ -70,7 +70,6 @@ "pull-batch": "^1.0.0", "pull-block": "1.2.0", "pull-cat": "^1.1.11", - "pull-defer": "~0.2.2", "pull-pair": "^1.1.0", "pull-paramap": "^1.2.2", "pull-pause": "0.0.1", diff --git a/src/exporter/dir-flat.js b/src/exporter/dir-flat.js index 9392018e..9a75f6fa 100644 --- a/src/exporter/dir-flat.js +++ b/src/exporter/dir-flat.js @@ -1,38 +1,38 @@ 'use strict' const pull = require('pull-stream') -const paramap = require('pull-paramap') -const CID = require('cids') const cat = require('pull-cat') // Logic to export a unixfs directory. module.exports = dirExporter -function dirExporter (node, name, pathRest, ipldResolver, resolve, parent) { +function dirExporter (node, name, path, pathRest, resolve, size, dag, parent, depth) { const accepts = pathRest[0] const dir = { - path: name, - hash: node.multihash + name: name, + depth: depth, + path: path, + hash: node.multihash, + size: node.size, + type: 'dir' } const streams = [ pull( pull.values(node.links), pull.map((link) => ({ + depth: depth + 1, + size: link.size, + name: link.name, + path: path + '/' + link.name, + multihash: link.multihash, linkName: link.name, - path: name + '/' + link.name, - hash: link.multihash + pathRest: pathRest.slice(1), + type: 'dir' })), pull.filter((item) => accepts === undefined || item.linkName === accepts), - paramap((item, cb) => ipldResolver.get(new CID(item.hash), (err, n) => { - if (err) { - return cb(err) - } - - cb(null, resolve(n.value, accepts || item.path, pathRest, ipldResolver, name, parent)) - })), - pull.flatten() + resolve ) ] @@ -41,7 +41,5 @@ function dirExporter (node, name, pathRest, ipldResolver, resolve, parent) { streams.unshift(pull.values([dir])) } - pathRest.shift() - return cat(streams) } diff --git a/src/exporter/dir-hamt-sharded.js b/src/exporter/dir-hamt-sharded.js index 1bf81c51..6569d2a4 100644 --- a/src/exporter/dir-hamt-sharded.js +++ b/src/exporter/dir-hamt-sharded.js @@ -1,21 +1,23 @@ 'use strict' const pull = require('pull-stream') -const paramap = require('pull-paramap') -const CID = require('cids') const cat = require('pull-cat') const cleanHash = require('./clean-multihash') // Logic to export a unixfs directory. module.exports = shardedDirExporter -function shardedDirExporter (node, name, pathRest, ipldResolver, resolve, parent) { +function shardedDirExporter (node, name, path, pathRest, resolve, size, dag, parent, depth) { let dir - if (!parent || parent.path !== name) { - dir = [{ - path: name, - hash: cleanHash(node.multihash) - }] + if (!parent || (parent.path !== path)) { + dir = { + name: name, + depth: depth, + path: path, + hash: cleanHash(node.multihash), + size: node.size, + type: 'dir' + } } const streams = [ @@ -24,47 +26,32 @@ function shardedDirExporter (node, name, pathRest, ipldResolver, resolve, parent pull.map((link) => { // remove the link prefix (2 chars for the bucket index) const p = link.name.substring(2) - const pp = p ? name + '/' + p : name + const pp = p ? path + '/' + p : path let accept = true - let fromPathRest = false if (p && pathRest.length) { - fromPathRest = true accept = (p === pathRest[0]) } if (accept) { return { - fromPathRest: fromPathRest, + depth: depth + 1, name: p, path: pp, - hash: link.multihash, - pathRest: p ? pathRest.slice(1) : pathRest + multihash: link.multihash, + pathRest: p ? pathRest.slice(1) : pathRest, + parent: dir || parent } } else { return '' } }), pull.filter(Boolean), - paramap((item, cb) => ipldResolver.get(new CID(item.hash), (err, n) => { - if (err) { - return cb(err) - } - - cb( - null, - resolve( - n.value, - item.fromPathRest ? item.name : item.path, - item.pathRest, - ipldResolver, - (dir && dir[0]) || parent)) - })), - pull.flatten() + resolve ) ] if (!pathRest.length) { - streams.unshift(pull.values(dir)) + streams.unshift(pull.values([dir])) } return cat(streams) diff --git a/src/exporter/file.js b/src/exporter/file.js index 67763073..38259eb4 100644 --- a/src/exporter/file.js +++ b/src/exporter/file.js @@ -7,7 +7,7 @@ const pull = require('pull-stream') const paramap = require('pull-paramap') // Logic to export a single (possibly chunked) unixfs file. -module.exports = (node, name, pathRest, ipldResolver) => { +module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth) => { function getData (node) { try { const file = UnixFS.unmarshal(node.data) @@ -20,14 +20,14 @@ module.exports = (node, name, pathRest, ipldResolver) => { function visitor (node) { return pull( pull.values(node.links), - paramap((link, cb) => ipldResolver.get(new CID(link.multihash), cb)), + paramap((link, cb) => dag.get(new CID(link.multihash), cb)), pull.map((result) => result.value) ) } - const accepts = pathRest.shift() + const accepts = pathRest[0] - if (accepts !== undefined && accepts !== name) { + if (accepts !== undefined && accepts !== path) { return pull.empty() } @@ -38,9 +38,12 @@ module.exports = (node, name, pathRest, ipldResolver) => { const file = UnixFS.unmarshal(node.data) return pull.values([{ + depth: depth, content: content, - path: name, + name: name, + path: path, hash: node.multihash, - size: file.fileSize() + size: size || file.fileSize(), + type: 'file' }]) } diff --git a/src/exporter/index.js b/src/exporter/index.js index 08017c9f..14a174dd 100644 --- a/src/exporter/index.js +++ b/src/exporter/index.js @@ -2,9 +2,8 @@ const pull = require('pull-stream') const CID = require('cids') -const pullDefer = require('pull-defer') -const resolve = require('./resolve').resolve +const createResolver = require('./resolve').createResolver function pathBaseAndRest (path) { // Buffer -> raw multihash or CID in buffer @@ -36,28 +35,67 @@ function pathBaseAndRest (path) { } } -module.exports = (path, dag) => { +const defaultOptions = { + maxDepth: Infinity +} + +module.exports = (path, dag, _options) => { + const options = Object.assign({}, defaultOptions, _options) + + let dPath try { - path = pathBaseAndRest(path) + dPath = pathBaseAndRest(path) } catch (err) { return pull.error(err) } - const d = pullDefer.source() + const pathLengthToCut = join( + [dPath.base].concat(dPath.rest.slice(0, dPath.rest.length - 1))).length - const cid = new CID(path.base) + return pull( + pull.values([{ + multihash: new CID(dPath.base), + name: dPath.base, + path: dPath.base, + pathRest: dPath.rest, + depth: 0 + }]), + createResolver(dag, options), + pull.filter(Boolean), + pull.map((node) => { + return { + depth: node.depth, + name: node.name, + path: finalPathFor(node), + size: node.size, + hash: node.hash || node.multihash, + content: node.content, + type: node.type + } + }) + ) - dag.get(cid, (err, node) => { - if (err) { - return pull.error(err) + function finalPathFor (node) { + if (!dPath.rest.length) { + return node.path } - d.resolve(pull.values([node])) - }) - return pull( - d, - pull.map((result) => result.value), - pull.map((node) => resolve(node, path.base, path.rest, dag)), - pull.flatten() - ) + let retPath = node.path.substring(pathLengthToCut) + if (retPath.charAt(0) === '/') { + retPath = retPath.substring(1) + } + if (!retPath) { + retPath = dPath.rest[dPath.rest.length - 1] || dPath.base + } + return retPath + } +} + +function join (paths) { + return paths.reduce((acc, path) => { + if (acc.length) { + acc += '/' + } + return acc + path + }, '') } diff --git a/src/exporter/object.js b/src/exporter/object.js index af24a970..f6383eab 100644 --- a/src/exporter/object.js +++ b/src/exporter/object.js @@ -2,34 +2,29 @@ const CID = require('cids') const pull = require('pull-stream') -const pullDefer = require('pull-defer') -module.exports = (node, name, pathRest, ipldResolver, resolve) => { +module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth) => { let newNode if (pathRest.length) { - const pathElem = pathRest.shift() + const pathElem = pathRest[0] newNode = node[pathElem] - const newName = name + '/' + pathElem - if (CID.isCID(newNode)) { - const d = pullDefer.source() - ipldResolver.get(sanitizeCID(newNode), (err, newNode) => { - if (err) { - d.resolve(pull.error(err)) - } else { - d.resolve(resolve(newNode.value, newName, pathRest, ipldResolver, node)) - } - }) - return d - } else if (newNode !== undefined) { - return resolve(newNode, newName, pathRest, ipldResolver, node) - } else { + const newName = path + '/' + pathElem + if (!newNode) { return pull.error('not found') } + const isCID = CID.isCID(newNode) + return pull( + pull.values([{ + depth: depth, + name: pathElem, + path: newName, + pathRest: pathRest.slice(1), + multihash: isCID && newNode, + object: !isCID && newNode, + parent: parent + }]), + resolve) } else { return pull.error(new Error('invalid node type')) } } - -function sanitizeCID (cid) { - return new CID(cid.version, cid.codec, cid.multihash) -} diff --git a/src/exporter/resolve.js b/src/exporter/resolve.js index 71b1067a..1aa8976f 100644 --- a/src/exporter/resolve.js +++ b/src/exporter/resolve.js @@ -2,6 +2,8 @@ const UnixFS = require('ipfs-unixfs') const pull = require('pull-stream') +const paramap = require('pull-paramap') +const CID = require('cids') const resolvers = { directory: require('./dir-flat'), @@ -11,17 +13,53 @@ const resolvers = { } module.exports = Object.assign({ - resolve: resolve, + createResolver: createResolver, typeOf: typeOf }, resolvers) -function resolve (node, hash, pathRest, ipldResolver, parentNode) { - const type = typeOf(node) - const resolver = resolvers[type] - if (!resolver) { - return pull.error(new Error('Unkown node type ' + type)) +function createResolver (dag, options, depth, parent) { + if (!depth) { + depth = 0 + } + + if (depth > options.maxDepth) { + return pull.map(identity) + } + + return pull( + paramap((item, cb) => { + if ((typeof item.depth) !== 'number') { + return pull.error(new Error('no depth')) + } + if (item.object) { + return cb(null, resolveItem(item.object, item)) + } + dag.get(new CID(item.multihash), (err, node) => { + if (err) { + return cb(err) + } + // const name = item.fromPathRest ? item.name : item.path + cb(null, resolveItem(node.value, item)) + }) + }), + pull.flatten(), + pull.filter(Boolean), + pull.filter((node) => node.depth <= options.maxDepth) + ) + + function resolveItem (node, item) { + return resolve(node, item.name, item.path, item.pathRest, item.size, dag, item.parent || parent, item.depth) + } + + function resolve (node, name, path, pathRest, size, dag, parentNode, depth) { + const type = typeOf(node) + const nodeResolver = resolvers[type] + if (!nodeResolver) { + return pull.error(new Error('Unkown node type ' + type)) + } + const resolveDeep = createResolver(dag, options, depth, node) + return nodeResolver(node, name, path, pathRest, resolveDeep, size, dag, parentNode, depth) } - return resolver(node, hash, pathRest, ipldResolver, resolve, parentNode) } function typeOf (node) { @@ -31,3 +69,7 @@ function typeOf (node) { return 'object' } } + +function identity (o) { + return o +} diff --git a/src/importer/tree-builder.js b/src/importer/tree-builder.js index c04cb0ac..eaf5101f 100644 --- a/src/importer/tree-builder.js +++ b/src/importer/tree-builder.js @@ -91,7 +91,7 @@ function createTreeBuilder (ipldResolver, _options) { // ---- Add to tree function addToTree (elem, callback) { - const pathElems = elem.path.split('/').filter(notEmpty) + const pathElems = (elem.path || '').split('/').filter(notEmpty) let parent = tree const lastIndex = pathElems.length - 1 diff --git a/test/builder-dir-sharding.js b/test/builder-dir-sharding.js index 203e63c2..f2573ae3 100644 --- a/test/builder-dir-sharding.js +++ b/test/builder-dir-sharding.js @@ -91,7 +91,7 @@ module.exports = (repo) => { expect(nodes[0].path).to.be.eql(expectedHash) expect(mh.toB58String(nodes[0].hash)).to.be.eql(expectedHash) expect(nodes[1].path).to.be.eql(expectedHash + '/b') - expect(nodes[1].size).to.be.eql(21) + expect(nodes[1].size).to.be.eql(29) pull( nodes[1].content, pull.collect(collected) diff --git a/test/exporter-subtree.js b/test/exporter-subtree.js index d2c59048..70b92e07 100644 --- a/test/exporter-subtree.js +++ b/test/exporter-subtree.js @@ -16,8 +16,8 @@ const exporter = unixFSEngine.exporter const smallFile = loadFixture(__dirname, 'fixtures/200Bytes.txt') module.exports = (repo) => { - describe('exporter', function () { - this.timeout(10 * 1000) + describe('exporter subtree', () => { + // this.timeout(10 * 1000) let ipldResolver diff --git a/test/exporter.js b/test/exporter.js index eacbbb21..39f72f7f 100644 --- a/test/exporter.js +++ b/test/exporter.js @@ -73,7 +73,8 @@ module.exports = (repo) => { ) }) - it('export a small file with links', (done) => { + it('export a small file with links', function (done) { + this.timeout(30 * 1000) const hash = 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q' pull( exporter(hash, ipldResolver), @@ -83,9 +84,10 @@ module.exports = (repo) => { fileEql(files[0], bigFile, done) }) ) - }).timeout(30 * 1000) + }) - it('export a small file with links using CID instead of multihash', (done) => { + it('export a small file with links using CID instead of multihash', function (done) { + this.timeout(30 * 1000) const cid = new CID('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q') pull( @@ -96,9 +98,10 @@ module.exports = (repo) => { fileEql(files[0], bigFile, done) }) ) - }).timeout(30 * 1000) + }) - it('export a large file > 5mb', (done) => { + it('export a large file > 5mb', function (done) { + this.timeout(30 * 1000) const hash = 'QmRQgufjp9vLE8XK2LGKZSsPCFCF6e4iynCQtNB5X2HBKE' pull( exporter(hash, ipldResolver), @@ -109,16 +112,17 @@ module.exports = (repo) => { fileEql(files[0], null, done) }) ) - }).timeout(30 * 1000) + }) - it('export a directory', (done) => { + it('export a directory', function (done) { + this.timeout(30 * 1000) const hash = 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN' pull( exporter(hash, ipldResolver), pull.collect((err, files) => { - files.forEach(file => expect(file).to.have.property('hash')) expect(err).to.not.exist() + files.forEach(file => expect(file).to.have.property('hash')) expect( files.map((file) => file.path) @@ -149,7 +153,44 @@ module.exports = (repo) => { ) }) ) - }).timeout(30 * 1000) + }) + + it('export a directory one deep', function (done) { + this.timeout(30 * 1000) + const hash = 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN' + + pull( + exporter(hash, ipldResolver, { maxDepth: 1 }), + pull.collect((err, files) => { + expect(err).to.not.exist() + files.forEach(file => expect(file).to.have.property('hash')) + + expect( + files.map((file) => file.path) + ).to.be.eql([ + 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN', + 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/200Bytes.txt', + 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/dir-another', + 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1' + ]) + + pull( + pull.values(files), + pull.map((file) => Boolean(file.content)), + pull.collect((err, contents) => { + expect(err).to.not.exist() + expect(contents).to.be.eql([ + false, + true, + false, + false + ]) + done() + }) + ) + }) + ) + }) it('returns an empty stream for dir', (done) => { const hash = 'QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn' diff --git a/test/importer.js b/test/importer.js index cdc74333..468f1299 100644 --- a/test/importer.js +++ b/test/importer.js @@ -298,7 +298,8 @@ module.exports = (repo) => { } }) - it('file bigger than a single chunk', (done) => { + it('file bigger than a single chunk', function (done) { + this.timeout(60 * 1000) pull( pull.values([{ path: '1.2MiB.txt', @@ -313,7 +314,8 @@ module.exports = (repo) => { ) }) - it('file bigger than a single chunk inside a dir', (done) => { + it('file bigger than a single chunk inside a dir', function (done) { + this.timeout(60 * 1000) pull( pull.values([{ path: 'foo-big/1.2MiB.txt',