Skip to content
This repository has been archived by the owner on Aug 12, 2020. It is now read-only.

Commit

Permalink
feat: exporter maxDepth (#197)
Browse files Browse the repository at this point in the history
* big exporter overhaul: centralized dag reslving inside internal resolver stream

* exporter: maxDepth instead of recursive flag

* exporter: exposing name and depth

* exporter: exposing dir size

* tests: increased timeout for importing big file

* tests: test exporter maxDepth

* fixed linting error

* exporter: exporting link size insteaf of node size to mimc go-ipfs

* test: moving timeout def to the test top

* tests: fixed this.timeout because arrow functions
  • Loading branch information
pgte authored and daviddias committed Nov 12, 2017
1 parent e0b9da3 commit 211e4e3
Show file tree
Hide file tree
Showing 12 changed files with 219 additions and 114 deletions.
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@
"pull-batch": "^1.0.0",
"pull-block": "1.2.0",
"pull-cat": "^1.1.11",
"pull-defer": "~0.2.2",
"pull-pair": "^1.1.0",
"pull-paramap": "^1.2.2",
"pull-pause": "0.0.1",
Expand Down
32 changes: 15 additions & 17 deletions src/exporter/dir-flat.js
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
'use strict'

const pull = require('pull-stream')
const paramap = require('pull-paramap')
const CID = require('cids')
const cat = require('pull-cat')

// Logic to export a unixfs directory.
module.exports = dirExporter

function dirExporter (node, name, pathRest, ipldResolver, resolve, parent) {
function dirExporter (node, name, path, pathRest, resolve, size, dag, parent, depth) {
const accepts = pathRest[0]

const dir = {
path: name,
hash: node.multihash
name: name,
depth: depth,
path: path,
hash: node.multihash,
size: node.size,
type: 'dir'
}

const streams = [
pull(
pull.values(node.links),
pull.map((link) => ({
depth: depth + 1,
size: link.size,
name: link.name,
path: path + '/' + link.name,
multihash: link.multihash,
linkName: link.name,
path: name + '/' + link.name,
hash: link.multihash
pathRest: pathRest.slice(1),
type: 'dir'
})),
pull.filter((item) => accepts === undefined || item.linkName === accepts),
paramap((item, cb) => ipldResolver.get(new CID(item.hash), (err, n) => {
if (err) {
return cb(err)
}

cb(null, resolve(n.value, accepts || item.path, pathRest, ipldResolver, name, parent))
})),
pull.flatten()
resolve
)
]

Expand All @@ -41,7 +41,5 @@ function dirExporter (node, name, pathRest, ipldResolver, resolve, parent) {
streams.unshift(pull.values([dir]))
}

pathRest.shift()

return cat(streams)
}
47 changes: 17 additions & 30 deletions src/exporter/dir-hamt-sharded.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
'use strict'

const pull = require('pull-stream')
const paramap = require('pull-paramap')
const CID = require('cids')
const cat = require('pull-cat')
const cleanHash = require('./clean-multihash')

// Logic to export a unixfs directory.
module.exports = shardedDirExporter

function shardedDirExporter (node, name, pathRest, ipldResolver, resolve, parent) {
function shardedDirExporter (node, name, path, pathRest, resolve, size, dag, parent, depth) {
let dir
if (!parent || parent.path !== name) {
dir = [{
path: name,
hash: cleanHash(node.multihash)
}]
if (!parent || (parent.path !== path)) {
dir = {
name: name,
depth: depth,
path: path,
hash: cleanHash(node.multihash),
size: node.size,
type: 'dir'
}
}

const streams = [
Expand All @@ -24,47 +26,32 @@ function shardedDirExporter (node, name, pathRest, ipldResolver, resolve, parent
pull.map((link) => {
// remove the link prefix (2 chars for the bucket index)
const p = link.name.substring(2)
const pp = p ? name + '/' + p : name
const pp = p ? path + '/' + p : path
let accept = true
let fromPathRest = false

if (p && pathRest.length) {
fromPathRest = true
accept = (p === pathRest[0])
}
if (accept) {
return {
fromPathRest: fromPathRest,
depth: depth + 1,
name: p,
path: pp,
hash: link.multihash,
pathRest: p ? pathRest.slice(1) : pathRest
multihash: link.multihash,
pathRest: p ? pathRest.slice(1) : pathRest,
parent: dir || parent
}
} else {
return ''
}
}),
pull.filter(Boolean),
paramap((item, cb) => ipldResolver.get(new CID(item.hash), (err, n) => {
if (err) {
return cb(err)
}

cb(
null,
resolve(
n.value,
item.fromPathRest ? item.name : item.path,
item.pathRest,
ipldResolver,
(dir && dir[0]) || parent))
})),
pull.flatten()
resolve
)
]

if (!pathRest.length) {
streams.unshift(pull.values(dir))
streams.unshift(pull.values([dir]))
}

return cat(streams)
Expand Down
15 changes: 9 additions & 6 deletions src/exporter/file.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const pull = require('pull-stream')
const paramap = require('pull-paramap')

// Logic to export a single (possibly chunked) unixfs file.
module.exports = (node, name, pathRest, ipldResolver) => {
module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth) => {
function getData (node) {
try {
const file = UnixFS.unmarshal(node.data)
Expand All @@ -20,14 +20,14 @@ module.exports = (node, name, pathRest, ipldResolver) => {
function visitor (node) {
return pull(
pull.values(node.links),
paramap((link, cb) => ipldResolver.get(new CID(link.multihash), cb)),
paramap((link, cb) => dag.get(new CID(link.multihash), cb)),
pull.map((result) => result.value)
)
}

const accepts = pathRest.shift()
const accepts = pathRest[0]

if (accepts !== undefined && accepts !== name) {
if (accepts !== undefined && accepts !== path) {
return pull.empty()
}

Expand All @@ -38,9 +38,12 @@ module.exports = (node, name, pathRest, ipldResolver) => {

const file = UnixFS.unmarshal(node.data)
return pull.values([{
depth: depth,
content: content,
path: name,
name: name,
path: path,
hash: node.multihash,
size: file.fileSize()
size: size || file.fileSize(),
type: 'file'
}])
}
72 changes: 55 additions & 17 deletions src/exporter/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

const pull = require('pull-stream')
const CID = require('cids')
const pullDefer = require('pull-defer')

const resolve = require('./resolve').resolve
const createResolver = require('./resolve').createResolver

function pathBaseAndRest (path) {
// Buffer -> raw multihash or CID in buffer
Expand Down Expand Up @@ -36,28 +35,67 @@ function pathBaseAndRest (path) {
}
}

module.exports = (path, dag) => {
const defaultOptions = {
maxDepth: Infinity
}

module.exports = (path, dag, _options) => {
const options = Object.assign({}, defaultOptions, _options)

let dPath
try {
path = pathBaseAndRest(path)
dPath = pathBaseAndRest(path)
} catch (err) {
return pull.error(err)
}

const d = pullDefer.source()
const pathLengthToCut = join(
[dPath.base].concat(dPath.rest.slice(0, dPath.rest.length - 1))).length

const cid = new CID(path.base)
return pull(
pull.values([{
multihash: new CID(dPath.base),
name: dPath.base,
path: dPath.base,
pathRest: dPath.rest,
depth: 0
}]),
createResolver(dag, options),
pull.filter(Boolean),
pull.map((node) => {
return {
depth: node.depth,
name: node.name,
path: finalPathFor(node),
size: node.size,
hash: node.hash || node.multihash,
content: node.content,
type: node.type
}
})
)

dag.get(cid, (err, node) => {
if (err) {
return pull.error(err)
function finalPathFor (node) {
if (!dPath.rest.length) {
return node.path
}
d.resolve(pull.values([node]))
})

return pull(
d,
pull.map((result) => result.value),
pull.map((node) => resolve(node, path.base, path.rest, dag)),
pull.flatten()
)
let retPath = node.path.substring(pathLengthToCut)
if (retPath.charAt(0) === '/') {
retPath = retPath.substring(1)
}
if (!retPath) {
retPath = dPath.rest[dPath.rest.length - 1] || dPath.base
}
return retPath
}
}

function join (paths) {
return paths.reduce((acc, path) => {
if (acc.length) {
acc += '/'
}
return acc + path
}, '')
}
37 changes: 16 additions & 21 deletions src/exporter/object.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,29 @@

const CID = require('cids')
const pull = require('pull-stream')
const pullDefer = require('pull-defer')

module.exports = (node, name, pathRest, ipldResolver, resolve) => {
module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth) => {
let newNode
if (pathRest.length) {
const pathElem = pathRest.shift()
const pathElem = pathRest[0]
newNode = node[pathElem]
const newName = name + '/' + pathElem
if (CID.isCID(newNode)) {
const d = pullDefer.source()
ipldResolver.get(sanitizeCID(newNode), (err, newNode) => {
if (err) {
d.resolve(pull.error(err))
} else {
d.resolve(resolve(newNode.value, newName, pathRest, ipldResolver, node))
}
})
return d
} else if (newNode !== undefined) {
return resolve(newNode, newName, pathRest, ipldResolver, node)
} else {
const newName = path + '/' + pathElem
if (!newNode) {
return pull.error('not found')
}
const isCID = CID.isCID(newNode)
return pull(
pull.values([{
depth: depth,
name: pathElem,
path: newName,
pathRest: pathRest.slice(1),
multihash: isCID && newNode,
object: !isCID && newNode,
parent: parent
}]),
resolve)
} else {
return pull.error(new Error('invalid node type'))
}
}

function sanitizeCID (cid) {
return new CID(cid.version, cid.codec, cid.multihash)
}
Loading

0 comments on commit 211e4e3

Please sign in to comment.