Search in sources :

Example 11 with GzipCompressorOutputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream in project flink by apache.

the class YarnTestArchiveJob method archiveFilesInDirectory.

private static void archiveFilesInDirectory(File directory, String target) throws IOException {
    for (Map.Entry<String, String> entry : srcFiles.entrySet()) {
        Files.write(Paths.get(directory.getAbsolutePath() + File.separator + entry.getKey()), entry.getValue().getBytes());
    }
    try (FileOutputStream fos = new FileOutputStream(target);
        GzipCompressorOutputStream gos = new GzipCompressorOutputStream(new BufferedOutputStream(fos));
        TarArchiveOutputStream taros = new TarArchiveOutputStream(gos)) {
        taros.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
        for (File f : directory.listFiles()) {
            taros.putArchiveEntry(new TarArchiveEntry(f, directory.getName() + File.separator + f.getName()));
            try (FileInputStream fis = new FileInputStream(f);
                BufferedInputStream bis = new BufferedInputStream(fis)) {
                IOUtils.copy(bis, taros);
                taros.closeArchiveEntry();
            }
        }
    }
    for (Map.Entry<String, String> entry : srcFiles.entrySet()) {
        Files.delete(Paths.get(directory.getAbsolutePath() + File.separator + entry.getKey()));
    }
}
Also used : GzipCompressorOutputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream) BufferedInputStream(java.io.BufferedInputStream) FileOutputStream(java.io.FileOutputStream) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) HashMap(java.util.HashMap) Map(java.util.Map) BufferedOutputStream(java.io.BufferedOutputStream) File(java.io.File) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) FileInputStream(java.io.FileInputStream)

Example 12 with GzipCompressorOutputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream in project alluxio by Alluxio.

the class BackupManager method backup.

/**
 * Writes a backup to the specified stream.
 *
 * @param os the stream to write to
 * @param entryCount will receive total entry count that are backed up
 */
public void backup(OutputStream os, AtomicLong entryCount) throws IOException {
    // Create gZIP compressed stream as back-up stream.
    GzipCompressorOutputStream zipStream = new GzipCompressorOutputStream(os);
    // Executor for taking backup.
    CompletionService<Boolean> completionService = new ExecutorCompletionService<>(Executors.newFixedThreadPool(4, ThreadFactoryUtils.build("master-backup-%d", true)));
    // List of active tasks.
    Set<Future<?>> activeTasks = new HashSet<>();
    // Entry queue will be used as a buffer and synchronization between readers and writer.
    // Use of {@link LinkedBlockingQueue} is preferred because of {@code #drainTo()} method,
    // using which all existing entries can be drained while allowing writes.
    // Processing/draining one-by-one using {@link ConcurrentLinkedQueue} proved to be
    // inefficient compared to draining with dedicated method.
    LinkedBlockingQueue<JournalEntry> journalEntryQueue = new LinkedBlockingQueue<>(ServerConfiguration.getInt(PropertyKey.MASTER_BACKUP_ENTRY_BUFFER_COUNT));
    // Whether buffering is still active.
    AtomicBoolean bufferingActive = new AtomicBoolean(true);
    // Start the timer for backup metrics.
    long startBackupTime = System.currentTimeMillis();
    // Submit master reader task.
    activeTasks.add(completionService.submit(() -> {
        try {
            for (Master master : mRegistry.getServers()) {
                try (CloseableIterator<JournalEntry> it = master.getJournalEntryIterator()) {
                    while (it.hasNext()) {
                        journalEntryQueue.put(it.next());
                        if (Thread.interrupted()) {
                            throw new InterruptedException();
                        }
                    }
                }
            }
            // Put termination entry for signaling the writer.
            journalEntryQueue.put(JournalEntry.newBuilder().setSequenceNumber(TERMINATION_SEQ).build());
            return true;
        } catch (InterruptedException ie) {
            LOG.info("Backup reader task interrupted");
            Thread.currentThread().interrupt();
            throw new RuntimeException("Thread interrupted while reading master state.", ie);
        } finally {
            // Signal reader completion.
            bufferingActive.set(false);
        }
    }));
    // Submit writer task.
    activeTasks.add(completionService.submit(() -> {
        try {
            List<JournalEntry> pendingEntries = new LinkedList<>();
            while (bufferingActive.get() || journalEntryQueue.size() > 0) {
                // Drain pending entries.
                if (0 == journalEntryQueue.drainTo(pendingEntries)) {
                    // No elements at the moment. Fall-back to blocking mode.
                    pendingEntries.add(journalEntryQueue.take());
                }
                if (Thread.interrupted()) {
                    throw new InterruptedException();
                }
                // Write entries to back-up stream.
                for (JournalEntry journalEntry : pendingEntries) {
                    // Check for termination entry.
                    if (journalEntry.getSequenceNumber() == TERMINATION_SEQ) {
                        // Reading finished.
                        return true;
                    }
                    journalEntry.writeDelimitedTo(zipStream);
                    entryCount.incrementAndGet();
                }
                pendingEntries.clear();
            }
            return true;
        } catch (InterruptedException ie) {
            LOG.info("Backup writer task interrupted");
            // Continue interrupt chain.
            Thread.currentThread().interrupt();
            throw new RuntimeException("Thread interrupted while writing to backup stream.", ie);
        }
    }));
    // Wait until backup tasks are completed.
    safeWaitTasks(activeTasks, completionService);
    // Close timer and update entry count.
    mBackupTimeMs = System.currentTimeMillis() - startBackupTime;
    mBackupEntriesCount = entryCount.get();
    // finish() instead of close() since close would close os, which is owned by the caller.
    zipStream.finish();
    LOG.info("Created backup with {} entries", entryCount.get());
}
Also used : GzipCompressorOutputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream) CloseableIterator(alluxio.resource.CloseableIterator) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) JournalEntry(alluxio.proto.journal.Journal.JournalEntry) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Future(java.util.concurrent.Future) LinkedList(java.util.LinkedList) List(java.util.List) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashSet(java.util.HashSet)

Example 13 with GzipCompressorOutputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream in project alluxio by Alluxio.

the class TarUtils method writeTarGz.

/**
 * Creates a gzipped tar archive from the given path, streaming the data to the give output
 * stream.
 *
 * @param dirPath the path to archive
 * @param output the output stream to write the data to
 */
public static void writeTarGz(Path dirPath, OutputStream output) throws IOException, InterruptedException {
    GzipCompressorOutputStream zipStream = new GzipCompressorOutputStream(output);
    TarArchiveOutputStream archiveStream = new TarArchiveOutputStream(zipStream);
    try (final Stream<Path> stream = Files.walk(dirPath)) {
        for (Path subPath : stream.collect(toList())) {
            if (Thread.interrupted()) {
                throw new InterruptedException();
            }
            File file = subPath.toFile();
            TarArchiveEntry entry = new TarArchiveEntry(file, dirPath.relativize(subPath).toString());
            archiveStream.putArchiveEntry(entry);
            if (file.isFile()) {
                try (InputStream fileIn = Files.newInputStream(subPath)) {
                    IOUtils.copy(fileIn, archiveStream);
                }
            }
            archiveStream.closeArchiveEntry();
        }
    }
    archiveStream.finish();
    zipStream.finish();
}
Also used : Path(java.nio.file.Path) GzipCompressorOutputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) InputStream(java.io.InputStream) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) File(java.io.File) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry)

Example 14 with GzipCompressorOutputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream in project whirr by apache.

the class Tarball method createFromDirectory.

/**
 * Creates a tarball from the source directory and writes it into the target directory.
 *
 * @param sourceDirectory directory whose files will be added to the tarball
 * @param targetName      directory where tarball will be written to
 * @throws IOException when an exception occurs on creating the tarball
 */
public static void createFromDirectory(String sourceDirectory, String targetName) throws IOException {
    FileOutputStream fileOutputStream = null;
    BufferedOutputStream bufferedOutputStream = null;
    GzipCompressorOutputStream gzipOutputStream = null;
    TarArchiveOutputStream tarArchiveOutputStream = null;
    try {
        fileOutputStream = new FileOutputStream(new File(targetName));
        bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
        gzipOutputStream = new GzipCompressorOutputStream(bufferedOutputStream);
        tarArchiveOutputStream = new TarArchiveOutputStream(gzipOutputStream);
        addFilesInDirectory(tarArchiveOutputStream, sourceDirectory);
    } finally {
        if (tarArchiveOutputStream != null) {
            tarArchiveOutputStream.finish();
        }
        if (tarArchiveOutputStream != null) {
            tarArchiveOutputStream.close();
        }
        if (gzipOutputStream != null) {
            gzipOutputStream.close();
        }
        if (bufferedOutputStream != null) {
            bufferedOutputStream.close();
        }
        if (fileOutputStream != null) {
            fileOutputStream.close();
        }
    }
}
Also used : GzipCompressorOutputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream) FileOutputStream(java.io.FileOutputStream) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) File(java.io.File)

Example 15 with GzipCompressorOutputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream in project hive by apache.

the class CompressionUtils method tar.

/**
 * Archive all the files in the inputFiles into outputFile
 *
 * @param inputFiles
 * @param outputFile
 * @throws IOException
 */
public static void tar(String parentDir, String[] inputFiles, String outputFile) throws IOException {
    FileOutputStream out = null;
    try {
        out = new FileOutputStream(new File(parentDir, outputFile));
        TarArchiveOutputStream tOut = new TarArchiveOutputStream(new GzipCompressorOutputStream(new BufferedOutputStream(out)));
        for (int i = 0; i < inputFiles.length; i++) {
            File f = new File(parentDir, inputFiles[i]);
            TarArchiveEntry tarEntry = new TarArchiveEntry(f, f.getName());
            tOut.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
            tOut.putArchiveEntry(tarEntry);
            FileInputStream input = new FileInputStream(f);
            try {
                // copy with 8K buffer, not close
                IOUtils.copy(input, tOut);
            } finally {
                input.close();
            }
            tOut.closeArchiveEntry();
        }
        // finishes inside
        tOut.close();
    } finally {
        // TarArchiveOutputStream seemed not to close files properly in error situation
        org.apache.hadoop.io.IOUtils.closeStream(out);
    }
}
Also used : GzipCompressorOutputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream) FileOutputStream(java.io.FileOutputStream) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) FileInputStream(java.io.FileInputStream)

Aggregations

GzipCompressorOutputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream)33 TarArchiveOutputStream (org.apache.commons.compress.archivers.tar.TarArchiveOutputStream)24 File (java.io.File)16 FileOutputStream (java.io.FileOutputStream)13 Path (java.nio.file.Path)13 BufferedOutputStream (java.io.BufferedOutputStream)8 OutputStream (java.io.OutputStream)8 TarArchiveEntry (org.apache.commons.compress.archivers.tar.TarArchiveEntry)7 FileInputStream (java.io.FileInputStream)6 IOException (java.io.IOException)6 ZipOutputStream (java.util.zip.ZipOutputStream)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 ArchiveOutputStream (org.apache.commons.compress.archivers.ArchiveOutputStream)4 ArchiveEntry (org.apache.commons.compress.archivers.ArchiveEntry)3 Test (org.junit.Test)3 ZipEntry (java.util.zip.ZipEntry)2 Test (org.junit.jupiter.api.Test)2 JournalEntry (alluxio.proto.journal.Journal.JournalEntry)1 CloseableIterator (alluxio.resource.CloseableIterator)1 ByteString (com.google.protobuf.ByteString)1