use of org.syncany.chunk.Chunker.ChunkEnumeration in project syncany by syncany.
the class Deduper method deduplicate.
/**
* Deduplicates the given list of files according to the Syncany chunk algorithm.
*
* <p>A brief description of the algorithm (and further links to a detailed description)
* are given in the {@link Deduper}.
*
* @param files List of files to be deduplicated (will be modified!)
* @param listener Listener to react of file/chunk/multichunk events, and to implement the chunk index
* @throws IOException If a file cannot be read or an unexpected exception occurs
*/
public void deduplicate(List<File> files, DeduperListener listener) throws IOException {
Chunk chunk = null;
MultiChunk multiChunk = null;
long totalMultiChunkSize = 0L;
long totalNumFiles = 0L;
while (!files.isEmpty()) {
File file = files.remove(0);
totalNumFiles++;
// Filter ignored files
boolean fileAccepted = listener.onFileFilter(file);
if (!fileAccepted) {
continue;
}
// Decide whether to index the contents
boolean dedupContents = listener.onFileStart(file);
if (dedupContents) {
// Create chunks from file
ChunkEnumeration chunksEnum = chunker.createChunks(file);
while (chunksEnum.hasMoreElements()) {
chunk = chunksEnum.nextElement();
// old chunk
if (!listener.onChunk(chunk)) {
listener.onFileAddChunk(file, chunk);
continue;
} else // new chunk
{
// - Check if multichunk full
if (multiChunk != null && multiChunk.isFull()) {
totalMultiChunkSize += multiChunk.getSize();
multiChunk.close();
listener.onMultiChunkClose(multiChunk);
multiChunk = null;
}
// - Open new multichunk if non-existent
if (multiChunk == null) {
MultiChunkId newMultiChunkId = listener.createNewMultiChunkId(chunk);
File multiChunkFile = listener.getMultiChunkFile(newMultiChunkId);
multiChunk = multiChunker.createMultiChunk(newMultiChunkId, transformer.createOutputStream(new FileOutputStream(multiChunkFile)));
listener.onMultiChunkOpen(multiChunk);
}
// - Add chunk data
multiChunk.write(chunk);
listener.onMultiChunkWrite(multiChunk, chunk);
}
listener.onFileAddChunk(file, chunk);
}
// Closing file is necessary!
chunksEnum.close();
}
if (chunk != null) {
listener.onFileEnd(file, chunk.getFileChecksum());
} else {
listener.onFileEnd(file, null);
}
// Reset chunk (if folder after chunk, the folder would have a checksum b/c of chunk.getFileChecksum())
chunk = null;
// Check if we have reached the transaction limit
if (multiChunk != null) {
if (totalMultiChunkSize + multiChunk.getSize() >= maxTotalSize || totalNumFiles >= maxNumberOfFiles) {
multiChunk.close();
listener.onMultiChunkClose(multiChunk);
return;
}
} else if (totalMultiChunkSize >= maxTotalSize || totalNumFiles >= maxNumberOfFiles) {
return;
}
}
// Close and add last multichunk
if (multiChunk != null) {
// Data
multiChunk.close();
listener.onMultiChunkClose(multiChunk);
multiChunk = null;
}
listener.onFinish();
return;
}
Aggregations