Skip to content
This repository has been archived by the owner on Aug 12, 2020. It is now read-only.

Commit

Permalink
fix: build & export interop with go-ipfs for small file raw leaves
Browse files Browse the repository at this point in the history
For files smaller than max chunk size and raw leaves true, go-ipfs will create a single node that is a raw buffer. Prior to this PR, js-ipfs created a unixfs file node who's data was the raw buffer. This resulted in different hashes for the same file.

This PR changes the builder to do the same thing as go-ipfs and adds a resolver to the exporter that allows the exporter to export a node that is a single raw buffer (so that you can `ipfs cat [CID w codec raw]` as you can in go-ipfs).

License: MIT
Signed-off-by: Alan Shaw <[email protected]>
  • Loading branch information
alanshaw committed Aug 23, 2018
1 parent fef6e9b commit 11885fa
Show file tree
Hide file tree
Showing 12 changed files with 164 additions and 94 deletions.
52 changes: 7 additions & 45 deletions src/builder/reduce.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,51 +13,13 @@ module.exports = function reduce (file, ipld, options) {
if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) {
const leaf = leaves[0]

if (options.leafType === 'file' && !options.rawLeaves) {
return callback(null, {
path: file.path,
multihash: leaf.multihash,
size: leaf.size,
leafSize: leaf.leafSize,
name: leaf.name
})
}

// we're using raw leaf nodes so we convert the node into a UnixFS `file` node.
return waterfall([
(cb) => ipld.get(leaf.cid, cb),
(result, cb) => {
// If result.value is a buffer, this is a raw leaf otherwise it's a dag-pb node
const data = Buffer.isBuffer(result.value) ? result.value : result.value.data
const fileNode = new UnixFS('file', data)

DAGNode.create(fileNode.marshal(), [], options.hashAlg, (error, node) => {
cb(error, { DAGNode: node, fileNode: fileNode })
})
},
(result, cb) => {
if (options.onlyHash) {
return cb(null, result)
}

let cid = new CID(result.DAGNode.multihash)

if (options.cidVersion === 1) {
cid = cid.toV1()
}

ipld.put(result.DAGNode, { cid }, (error) => cb(error, result))
},
(result, cb) => {
cb(null, {
path: file.path,
multihash: result.DAGNode.multihash,
size: result.DAGNode.size,
leafSize: result.fileNode.fileSize(),
name: leaf.name
})
}
], callback)
return callback(null, {
path: file.path,
multihash: leaf.multihash,
size: leaf.size,
leafSize: leaf.leafSize,
name: leaf.name
})
}

// create a parent node and add all the leaves
Expand Down
7 changes: 0 additions & 7 deletions src/exporter/clean-multihash.js

This file was deleted.

4 changes: 2 additions & 2 deletions src/exporter/dir-flat.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ const cat = require('pull-cat')
// Logic to export a unixfs directory.
module.exports = dirExporter

function dirExporter (node, name, path, pathRest, resolve, size, dag, parent, depth) {
function dirExporter (cid, node, name, path, pathRest, resolve, size, dag, parent, depth) {
const accepts = pathRest[0]

const dir = {
name: name,
depth: depth,
path: path,
hash: node.multihash,
hash: cid,
size: node.size,
type: 'dir'
}
Expand Down
5 changes: 2 additions & 3 deletions src/exporter/dir-hamt-sharded.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,18 @@

const pull = require('pull-stream')
const cat = require('pull-cat')
const cleanHash = require('./clean-multihash')

// Logic to export a unixfs directory.
module.exports = shardedDirExporter

function shardedDirExporter (node, name, path, pathRest, resolve, size, dag, parent, depth) {
function shardedDirExporter (cid, node, name, path, pathRest, resolve, size, dag, parent, depth) {
let dir
if (!parent || (parent.path !== path)) {
dir = {
name: name,
depth: depth,
path: path,
hash: cleanHash(node.multihash),
hash: cid,
size: node.size,
type: 'dir'
}
Expand Down
23 changes: 23 additions & 0 deletions src/exporter/extract-data-from-block.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
'use strict'

module.exports = function extractDataFromBlock (block, streamPosition, begin, end) {
const blockLength = block.length

if (begin >= streamPosition + blockLength) {
// If begin is after the start of the block, return an empty block
// This can happen when internal nodes contain data
return Buffer.alloc(0)
}

if (end - streamPosition < blockLength) {
// If the end byte is in the current block, truncate the block to the end byte
block = block.slice(0, end - streamPosition)
}

if (begin > streamPosition && begin < (streamPosition + blockLength)) {
// If the start byte is in the current block, skip to the start byte
block = block.slice(begin - streamPosition)
}

return block
}
27 changes: 3 additions & 24 deletions src/exporter/file.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ const UnixFS = require('ipfs-unixfs')
const CID = require('cids')
const pull = require('pull-stream')
const paramap = require('pull-paramap')
const extractDataFromBlock = require('./extract-data-from-block')

// Logic to export a single (possibly chunked) unixfs file.
module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth, offset, length) => {
module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, depth, offset, length) => {
const accepts = pathRest[0]

if (accepts !== undefined && accepts !== path) {
Expand Down Expand Up @@ -48,7 +49,7 @@ module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth,
content: content,
name: name,
path: path,
hash: node.multihash,
hash: cid,
size: fileSize,
type: 'file'
}])
Expand Down Expand Up @@ -149,25 +150,3 @@ function streamBytes (dag, node, fileSize, offset, length) {
pull.filter(Boolean)
)
}

function extractDataFromBlock (block, streamPosition, begin, end) {
const blockLength = block.length

if (begin >= streamPosition + blockLength) {
// If begin is after the start of the block, return an empty block
// This can happen when internal nodes contain data
return Buffer.alloc(0)
}

if (end - streamPosition < blockLength) {
// If the end byte is in the current block, truncate the block to the end byte
block = block.slice(0, end - streamPosition)
}

if (begin > streamPosition && begin < (streamPosition + blockLength)) {
// If the start byte is in the current block, skip to the start byte
block = block.slice(begin - streamPosition)
}

return block
}
2 changes: 1 addition & 1 deletion src/exporter/object.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
const CID = require('cids')
const pull = require('pull-stream')

module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth) => {
module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, depth) => {
let newNode
if (pathRest.length) {
const pathElem = pathRest[0]
Expand Down
57 changes: 57 additions & 0 deletions src/exporter/raw.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
'use strict'

const pull = require('pull-stream')
const extractDataFromBlock = require('./extract-data-from-block')

// Logic to export a single raw block
module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, depth, offset, length) => {
const accepts = pathRest[0]

if (accepts !== undefined && accepts !== path) {
return pull.empty()
}

size = size || node.length

if (offset < 0) {
return pull.error(new Error('Offset must be greater than 0'))
}

if (offset > size) {
return pull.error(new Error('Offset must be less than the file size'))
}

if (length < 0) {
return pull.error(new Error('Length must be greater than or equal to 0'))
}

if (length === 0) {
return pull.once({
depth,
content: pull.once(Buffer.alloc(0)),
hash: cid,
name,
path,
size,
type: 'raw'
})
}

if (!offset) {
offset = 0
}

if (!length || (offset + length > size)) {
length = size - offset
}

return pull.once({
depth,
content: pull.once(extractDataFromBlock(node, 0, offset, offset + length)),
hash: cid,
name,
path,
size,
type: 'raw'
})
}
25 changes: 16 additions & 9 deletions src/exporter/resolve.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ const resolvers = {
directory: require('./dir-flat'),
'hamt-sharded-directory': require('./dir-hamt-sharded'),
file: require('./file'),
object: require('./object')
object: require('./object'),
raw: require('./raw')
}

module.exports = Object.assign({
Expand All @@ -31,39 +32,45 @@ function createResolver (dag, options, depth, parent) {
if ((typeof item.depth) !== 'number') {
return pull.error(new Error('no depth'))
}

if (item.object) {
return cb(null, resolveItem(item.object, item, options.offset, options.length))
return cb(null, resolveItem(null, item.object, item, options.offset, options.length))
}
dag.get(new CID(item.multihash), (err, node) => {

const cid = new CID(item.multihash)

dag.get(cid, (err, node) => {
if (err) {
return cb(err)
}
// const name = item.fromPathRest ? item.name : item.path
cb(null, resolveItem(node.value, item, options.offset, options.length))
cb(null, resolveItem(cid, node.value, item, options.offset, options.length))
})
}),
pull.flatten(),
pull.filter(Boolean),
pull.filter((node) => node.depth <= options.maxDepth)
)

function resolveItem (node, item, offset, length) {
return resolve(node, item.name, item.path, item.pathRest, item.size, dag, item.parent || parent, item.depth, offset, length)
function resolveItem (cid, node, item, offset, length) {
return resolve(cid, node, item.name, item.path, item.pathRest, item.size, dag, item.parent || parent, item.depth, offset, length)
}

function resolve (node, name, path, pathRest, size, dag, parentNode, depth, offset, length) {
function resolve (cid, node, name, path, pathRest, size, dag, parentNode, depth, offset, length) {
const type = typeOf(node)
const nodeResolver = resolvers[type]
if (!nodeResolver) {
return pull.error(new Error('Unkown node type ' + type))
}
const resolveDeep = createResolver(dag, options, depth, node)
return nodeResolver(node, name, path, pathRest, resolveDeep, size, dag, parentNode, depth, offset, length)
return nodeResolver(cid, node, name, path, pathRest, resolveDeep, size, dag, parentNode, depth, offset, length)
}
}

function typeOf (node) {
if (Buffer.isBuffer(node.data)) {
if (Buffer.isBuffer(node)) {
return 'raw'
} else if (Buffer.isBuffer(node.data)) {
return UnixFS.unmarshal(node.data).type
} else {
return 'object'
Expand Down
2 changes: 1 addition & 1 deletion test/builder-dir-sharding.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ module.exports = (repo) => {
expect(nodes.length).to.be.eql(2)
const expectedHash = new CID(shardedHash).toBaseEncodedString()
expect(nodes[0].path).to.be.eql(expectedHash)
expect(nodes[0].hash).to.be.eql(expectedHash)
expect(new CID(nodes[0].hash).toBaseEncodedString()).to.be.eql(expectedHash)
expect(nodes[1].path).to.be.eql(expectedHash + '/b')
expect(nodes[1].size).to.be.eql(21)
pull(
Expand Down
33 changes: 32 additions & 1 deletion test/exporter.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const exporter = unixFSEngine.exporter
const importer = unixFSEngine.importer

const bigFile = loadFixture('test/fixtures/1.2MiB.txt')
const smallFile = loadFixture('test/fixtures/200Bytes.txt')

module.exports = (repo) => {
describe('exporter', () => {
Expand Down Expand Up @@ -420,12 +421,42 @@ module.exports = (repo) => {
)
})

it('exports a large file > 5mb imported with raw leaves', function (done) {
it('exports a small file imported with raw leaves', function (done) {
this.timeout(30 * 1000)

pull(
pull.values([{
path: '200Bytes.txt',
content: pull.values([smallFile])
}]),
importer(ipld, {
rawLeaves: true
}),
pull.collect(collected)
)

function collected (err, files) {
expect(err).to.not.exist()
expect(files.length).to.equal(1)

pull(
exporter(files[0].multihash, ipld),
pull.collect((err, files) => {
expect(err).to.not.exist()
expect(new CID(files[0].hash).toBaseEncodedString()).to.equal('zb2rhXrz1gkCv8p4nUDZRohY6MzBE9C3HVTVDP72g6Du3SD9Q')

fileEql(files[0], smallFile, done)
})
)
}
})

it('exports a large file > 1mb imported with raw leaves', function (done) {
this.timeout(30 * 1000)

pull(
pull.values([{
path: '1.2MiB.txt',
content: pull.values([bigFile])
}]),
importer(ipld, {
Expand Down
Loading

0 comments on commit 11885fa

Please sign in to comment.