Optimizating restore to avoid reading newly created sparse file

gilbertchen · Oct 22, 2018 · bfb4b44 · bfb4b44
1 parent 22a0b22
commit bfb4b44
Showing 1 changed file with 35 additions and 0 deletions.
diff --git a/src/duplicacy_backupmanager.go b/src/duplicacy_backupmanager.go
@@ -1162,6 +1162,9 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
 	lengthMap := make(map[string]int)
 	var offset int64
 
+	// If the file is newly created (needed by sparse file optimization)
+	isNewFile := false
+
 	existingFile, err = os.Open(fullPath)
 	if err != nil {
 		if os.IsNotExist(err) {
@@ -1196,6 +1199,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
 					LOG_ERROR("DOWNLOAD_OPEN", "Can't reopen the initial file just created: %v", err)
 					return false
 				}
+				isNewFile = true
 			}
 		} else {
 			LOG_TRACE("DOWNLOAD_OPEN", "Can't open the existing file: %v", err)
@@ -1208,6 +1212,9 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
 		}
 	}
 
+	// The key in this map is the number of zeroes.  The value is the corresponding hash.
+	knownHashes := make(map[int]string)
+
 	fileHash := ""
 	if existingFile != nil {
 
@@ -1217,6 +1224,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
 			fileHasher := manager.config.NewFileHasher()
 			buffer := make([]byte, 64*1024)
 			err = nil
+			isSkipped := false
 			// We set to read one more byte so the file hash will be different if the file to be restored is a
 			// truncated portion of the existing file
 			for i := entry.StartChunk; i <= entry.EndChunk+1; i++ {
@@ -1232,6 +1240,28 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
 					chunkSize = 1 // the size of extra chunk beyond EndChunk
 				}
 				count := 0
+
+				if isNewFile {
+					if hash, found := knownHashes[chunkSize]; found {
+						// We have read the same number of zeros before, so we just retrieve the hash from the map
+						existingChunks = append(existingChunks, hash)
+						existingLengths = append(existingLengths, chunkSize)
+						offsetMap[hash] = offset
+						lengthMap[hash] = chunkSize
+						offset += int64(chunkSize)
+						isSkipped = true
+						continue
+					}
+				}
+
+				if isSkipped {
+					_, err := existingFile.Seek(offset, 0)
+					if err != nil {
+						LOG_ERROR("DOWNLOAD_SEEK", "Failed to seek to offset %d: %v", offset, err)
+					}
+					isSkipped = false
+				}
+
 				for count < chunkSize {
 					n := chunkSize - count
 					if n > cap(buffer) {
@@ -1258,12 +1288,16 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
 					offsetMap[hash] = offset
 					lengthMap[hash] = chunkSize
 					offset += int64(chunkSize)
+					if isNewFile {
+						knownHashes[chunkSize] = hash
+					}
 				}
 
 				if err == io.EOF {
 					break
 				}
 			}
+
 			fileHash = hex.EncodeToString(fileHasher.Sum(nil))
 		} else {
 			// If it is not inplace, we want to reuse any chunks in the existing file regardless their offets, so
@@ -1290,6 +1324,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
 		}
 	}
 
+
 	for i := entry.StartChunk; i <= entry.EndChunk; i++ {
 		if _, found := offsetMap[chunkDownloader.taskList[i].chunkHash]; !found {
 			chunkDownloader.taskList[i].needed = true