From bfb4b44c0ab9afbbd33cd641c2dab49c3518b751 Mon Sep 17 00:00:00 2001 From: Gilbert Chen Date: Sun, 21 Oct 2018 22:43:24 -0400 Subject: [PATCH] Optimizating restore to avoid reading newly created sparse file --- src/duplicacy_backupmanager.go | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/duplicacy_backupmanager.go b/src/duplicacy_backupmanager.go index 0b908dd5..f7f836cb 100644 --- a/src/duplicacy_backupmanager.go +++ b/src/duplicacy_backupmanager.go @@ -1162,6 +1162,9 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun lengthMap := make(map[string]int) var offset int64 + // If the file is newly created (needed by sparse file optimization) + isNewFile := false + existingFile, err = os.Open(fullPath) if err != nil { if os.IsNotExist(err) { @@ -1196,6 +1199,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun LOG_ERROR("DOWNLOAD_OPEN", "Can't reopen the initial file just created: %v", err) return false } + isNewFile = true } } else { LOG_TRACE("DOWNLOAD_OPEN", "Can't open the existing file: %v", err) @@ -1208,6 +1212,9 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun } } + // The key in this map is the number of zeroes. The value is the corresponding hash. + knownHashes := make(map[int]string) + fileHash := "" if existingFile != nil { @@ -1217,6 +1224,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun fileHasher := manager.config.NewFileHasher() buffer := make([]byte, 64*1024) err = nil + isSkipped := false // We set to read one more byte so the file hash will be different if the file to be restored is a // truncated portion of the existing file for i := entry.StartChunk; i <= entry.EndChunk+1; i++ { @@ -1232,6 +1240,28 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun chunkSize = 1 // the size of extra chunk beyond EndChunk } count := 0 + + if isNewFile { + if hash, found := knownHashes[chunkSize]; found { + // We have read the same number of zeros before, so we just retrieve the hash from the map + existingChunks = append(existingChunks, hash) + existingLengths = append(existingLengths, chunkSize) + offsetMap[hash] = offset + lengthMap[hash] = chunkSize + offset += int64(chunkSize) + isSkipped = true + continue + } + } + + if isSkipped { + _, err := existingFile.Seek(offset, 0) + if err != nil { + LOG_ERROR("DOWNLOAD_SEEK", "Failed to seek to offset %d: %v", offset, err) + } + isSkipped = false + } + for count < chunkSize { n := chunkSize - count if n > cap(buffer) { @@ -1258,12 +1288,16 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun offsetMap[hash] = offset lengthMap[hash] = chunkSize offset += int64(chunkSize) + if isNewFile { + knownHashes[chunkSize] = hash + } } if err == io.EOF { break } } + fileHash = hex.EncodeToString(fileHasher.Sum(nil)) } else { // If it is not inplace, we want to reuse any chunks in the existing file regardless their offets, so @@ -1290,6 +1324,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun } } + for i := entry.StartChunk; i <= entry.EndChunk; i++ { if _, found := offsetMap[chunkDownloader.taskList[i].chunkHash]; !found { chunkDownloader.taskList[i].needed = true