Search in sources :

Example 1 with ChunkEnumeration

use of org.syncany.chunk.Chunker.ChunkEnumeration in project syncany by syncany.

the class Deduper method deduplicate.

/**
 * Deduplicates the given list of files according to the Syncany chunk algorithm.
 *
 * <p>A brief description of the algorithm (and further links to a detailed description)
 * are given in the {@link Deduper}.
 *
 * @param files List of files to be deduplicated (will be modified!)
 * @param listener Listener to react of file/chunk/multichunk events, and to implement the chunk index
 * @throws IOException If a file cannot be read or an unexpected exception occurs
 */
public void deduplicate(List<File> files, DeduperListener listener) throws IOException {
    Chunk chunk = null;
    MultiChunk multiChunk = null;
    long totalMultiChunkSize = 0L;
    long totalNumFiles = 0L;
    while (!files.isEmpty()) {
        File file = files.remove(0);
        totalNumFiles++;
        // Filter ignored files
        boolean fileAccepted = listener.onFileFilter(file);
        if (!fileAccepted) {
            continue;
        }
        // Decide whether to index the contents
        boolean dedupContents = listener.onFileStart(file);
        if (dedupContents) {
            // Create chunks from file
            ChunkEnumeration chunksEnum = chunker.createChunks(file);
            while (chunksEnum.hasMoreElements()) {
                chunk = chunksEnum.nextElement();
                // old chunk
                if (!listener.onChunk(chunk)) {
                    listener.onFileAddChunk(file, chunk);
                    continue;
                } else // new chunk
                {
                    // - Check if multichunk full
                    if (multiChunk != null && multiChunk.isFull()) {
                        totalMultiChunkSize += multiChunk.getSize();
                        multiChunk.close();
                        listener.onMultiChunkClose(multiChunk);
                        multiChunk = null;
                    }
                    // - Open new multichunk if non-existent
                    if (multiChunk == null) {
                        MultiChunkId newMultiChunkId = listener.createNewMultiChunkId(chunk);
                        File multiChunkFile = listener.getMultiChunkFile(newMultiChunkId);
                        multiChunk = multiChunker.createMultiChunk(newMultiChunkId, transformer.createOutputStream(new FileOutputStream(multiChunkFile)));
                        listener.onMultiChunkOpen(multiChunk);
                    }
                    // - Add chunk data
                    multiChunk.write(chunk);
                    listener.onMultiChunkWrite(multiChunk, chunk);
                }
                listener.onFileAddChunk(file, chunk);
            }
            // Closing file is necessary!
            chunksEnum.close();
        }
        if (chunk != null) {
            listener.onFileEnd(file, chunk.getFileChecksum());
        } else {
            listener.onFileEnd(file, null);
        }
        // Reset chunk (if folder after chunk, the folder would have a checksum b/c of chunk.getFileChecksum())
        chunk = null;
        // Check if we have reached the transaction limit
        if (multiChunk != null) {
            if (totalMultiChunkSize + multiChunk.getSize() >= maxTotalSize || totalNumFiles >= maxNumberOfFiles) {
                multiChunk.close();
                listener.onMultiChunkClose(multiChunk);
                return;
            }
        } else if (totalMultiChunkSize >= maxTotalSize || totalNumFiles >= maxNumberOfFiles) {
            return;
        }
    }
    // Close and add last multichunk
    if (multiChunk != null) {
        // Data
        multiChunk.close();
        listener.onMultiChunkClose(multiChunk);
        multiChunk = null;
    }
    listener.onFinish();
    return;
}
Also used : ChunkEnumeration(org.syncany.chunk.Chunker.ChunkEnumeration) MultiChunkId(org.syncany.database.MultiChunkEntry.MultiChunkId) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Aggregations

File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 ChunkEnumeration (org.syncany.chunk.Chunker.ChunkEnumeration)1 MultiChunkId (org.syncany.database.MultiChunkEntry.MultiChunkId)1