Skip to content

Commit

Permalink
Optimizating restore to avoid reading newly created sparse file
Browse files Browse the repository at this point in the history
  • Loading branch information
gilbertchen committed Oct 22, 2018
1 parent 22a0b22 commit bfb4b44
Showing 1 changed file with 35 additions and 0 deletions.
35 changes: 35 additions & 0 deletions src/duplicacy_backupmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,9 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
lengthMap := make(map[string]int)
var offset int64

// If the file is newly created (needed by sparse file optimization)
isNewFile := false

existingFile, err = os.Open(fullPath)
if err != nil {
if os.IsNotExist(err) {
Expand Down Expand Up @@ -1196,6 +1199,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
LOG_ERROR("DOWNLOAD_OPEN", "Can't reopen the initial file just created: %v", err)
return false
}
isNewFile = true
}
} else {
LOG_TRACE("DOWNLOAD_OPEN", "Can't open the existing file: %v", err)
Expand All @@ -1208,6 +1212,9 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
}
}

// The key in this map is the number of zeroes. The value is the corresponding hash.
knownHashes := make(map[int]string)

fileHash := ""
if existingFile != nil {

Expand All @@ -1217,6 +1224,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
fileHasher := manager.config.NewFileHasher()
buffer := make([]byte, 64*1024)
err = nil
isSkipped := false
// We set to read one more byte so the file hash will be different if the file to be restored is a
// truncated portion of the existing file
for i := entry.StartChunk; i <= entry.EndChunk+1; i++ {
Expand All @@ -1232,6 +1240,28 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
chunkSize = 1 // the size of extra chunk beyond EndChunk
}
count := 0

if isNewFile {
if hash, found := knownHashes[chunkSize]; found {
// We have read the same number of zeros before, so we just retrieve the hash from the map
existingChunks = append(existingChunks, hash)
existingLengths = append(existingLengths, chunkSize)
offsetMap[hash] = offset
lengthMap[hash] = chunkSize
offset += int64(chunkSize)
isSkipped = true
continue
}
}

if isSkipped {
_, err := existingFile.Seek(offset, 0)
if err != nil {
LOG_ERROR("DOWNLOAD_SEEK", "Failed to seek to offset %d: %v", offset, err)
}
isSkipped = false
}

for count < chunkSize {
n := chunkSize - count
if n > cap(buffer) {
Expand All @@ -1258,12 +1288,16 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
offsetMap[hash] = offset
lengthMap[hash] = chunkSize
offset += int64(chunkSize)
if isNewFile {
knownHashes[chunkSize] = hash
}
}

if err == io.EOF {
break
}
}

fileHash = hex.EncodeToString(fileHasher.Sum(nil))
} else {
// If it is not inplace, we want to reuse any chunks in the existing file regardless their offets, so
Expand All @@ -1290,6 +1324,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
}
}


for i := entry.StartChunk; i <= entry.EndChunk; i++ {
if _, found := offsetMap[chunkDownloader.taskList[i].chunkHash]; !found {
chunkDownloader.taskList[i].needed = true
Expand Down

0 comments on commit bfb4b44

Please sign in to comment.