Search in sources :

Example 6 with BZip2CompressorInputStream

use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project logging-log4j2 by apache.

the class Bzip2CompressActionTest method testExecuteCompressesSourceFileToDestinationFile.

@Test
public void testExecuteCompressesSourceFileToDestinationFile() throws IOException {
    final String LINE1 = "Here is line 1. Random text: ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n";
    final String LINE2 = "Here is line 2. Random text: ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n";
    final String LINE3 = "Here is line 3. Random text: ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n";
    final File source = new File("target/compressme");
    try (FileWriter fw = new FileWriter(source, false)) {
        fw.write(LINE1);
        fw.write(LINE2);
        fw.write(LINE3);
        fw.flush();
    }
    final File destination = new File("target/compressme.bz2");
    // just in case
    destination.delete();
    assertFalse("Destination should not exist yet", destination.exists());
    final boolean actual = CommonsCompressAction.execute("bzip2", source, destination, true);
    assertEquals("Bzip2CompressAction should have succeeded", true, actual);
    assertTrue("Destination should exist after Bzip2CompressAction", destination.exists());
    assertFalse("Source should have been deleted", source.exists());
    final byte[] bz2 = new byte[] { (byte) 0x42, (byte) 0x5A, (byte) 0x68, (byte) 0x39, (byte) 0x31, (byte) 0x41, (byte) 0x59, (byte) 0x26, (byte) 0x53, (byte) 0x59, (byte) 0x9C, (byte) 0xE1, (byte) 0xE8, (byte) 0x2D, (byte) 0x00, (byte) 0x00, (byte) 0x1C, (byte) 0xDF, (byte) 0x80, (byte) 0x00, (byte) 0x12, (byte) 0x40, (byte) 0x01, (byte) 0x38, (byte) 0x10, (byte) 0x3F, (byte) 0xFF, (byte) 0xFF, (byte) 0xF0, (byte) 0x26, (byte) 0x27, (byte) 0x9C, (byte) 0x40, (byte) 0x20, (byte) 0x00, (byte) 0x70, (byte) 0x63, (byte) 0x4D, (byte) 0x06, (byte) 0x80, (byte) 0x19, (byte) 0x34, (byte) 0x06, (byte) 0x46, (byte) 0x9A, (byte) 0x18, (byte) 0x9A, (byte) 0x30, (byte) 0xCF, (byte) 0xFD, (byte) 0x55, (byte) 0x4D, (byte) 0x0D, (byte) 0x06, (byte) 0x9A, (byte) 0x0C, (byte) 0x40, (byte) 0x1A, (byte) 0x1A, (byte) 0x34, (byte) 0x34, (byte) 0xCD, (byte) 0x46, (byte) 0x05, (byte) 0x6B, (byte) 0x19, (byte) 0x92, (byte) 0x23, (byte) 0x5E, (byte) 0xB5, (byte) 0x2E, (byte) 0x79, (byte) 0x65, (byte) 0x41, (byte) 0x81, (byte) 0x33, (byte) 0x4B, (byte) 0x53, (byte) 0x5B, (byte) 0x62, (byte) 0x75, (byte) 0x0A, (byte) 0x14, (byte) 0xB6, (byte) 0xB7, (byte) 0x37, (byte) 0xB8, (byte) 0x38, (byte) 0xB9, (byte) 0x39, (byte) 0xBA, (byte) 0x2A, (byte) 0x4E, (byte) 0xEA, (byte) 0xEC, (byte) 0xEE, (byte) 0xAD, (byte) 0xE1, (byte) 0xE5, (byte) 0x63, (byte) 0xD3, (byte) 0x22, (byte) 0xE8, (byte) 0x90, (byte) 0x52, (byte) 0xA9, (byte) 0x7A, (byte) 0x68, (byte) 0x90, (byte) 0x5C, (byte) 0x82, (byte) 0x0B, (byte) 0x51, (byte) 0xBF, (byte) 0x24, (byte) 0x61, (byte) 0x7F, (byte) 0x17, (byte) 0x72, (byte) 0x45, (byte) 0x38, (byte) 0x50, (byte) 0x90, (byte) 0x9C, (byte) 0xE1, (byte) 0xE8, (byte) 0x2D };
    assertEquals(bz2.length, destination.length());
    // check the compressed contents
    try (FileInputStream fis = new FileInputStream(destination)) {
        final byte[] actualBz2 = new byte[bz2.length];
        int n = 0;
        int offset = 0;
        do {
            n = fis.read(actualBz2, offset, actualBz2.length - offset);
            offset += n;
        } while (offset < actualBz2.length);
        assertArrayEquals("Compressed data corrupt", bz2, actualBz2);
    }
    destination.delete();
    // uncompress
    try (BZip2CompressorInputStream bzin = new BZip2CompressorInputStream(new ByteArrayInputStream(bz2))) {
        final StringBuilder sb = new StringBuilder();
        final byte[] buf = new byte[1024];
        int n = 0;
        while ((n = bzin.read(buf, 0, buf.length)) > -1) {
            sb.append(new String(buf, 0, n));
        }
        assertEquals(LINE1 + LINE2 + LINE3, sb.toString());
    }
}
Also used : BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileWriter(java.io.FileWriter) File(java.io.File) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 7 with BZip2CompressorInputStream

use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project uPortal by Jasig.

the class JaxbPortalDataHandlerService method importDataArchive.

protected void importDataArchive(Resource archive, InputStream resourceStream, BatchImportOptions options) {
    BufferedInputStream bufferedResourceStream = null;
    try {
        //Make sure the stream is buffered
        if (resourceStream instanceof BufferedInputStream) {
            bufferedResourceStream = (BufferedInputStream) resourceStream;
        } else {
            bufferedResourceStream = new BufferedInputStream(resourceStream);
        }
        //Buffer up to 100MB, bad things will happen if we bust this buffer.
        //TODO see if there is a buffered stream that will write to a file once the buffer fills up
        bufferedResourceStream.mark(100 * 1024 * 1024);
        final MediaType type = getMediaType(bufferedResourceStream, archive.getFilename());
        if (MT_JAVA_ARCHIVE.equals(type)) {
            final ArchiveInputStream archiveStream = new JarArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MediaType.APPLICATION_ZIP.equals(type)) {
            final ArchiveInputStream archiveStream = new ZipArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_CPIO.equals(type)) {
            final ArchiveInputStream archiveStream = new CpioArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_AR.equals(type)) {
            final ArchiveInputStream archiveStream = new ArArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_TAR.equals(type)) {
            final ArchiveInputStream archiveStream = new TarArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_BZIP2.equals(type)) {
            final CompressorInputStream compressedStream = new BZip2CompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_GZIP.equals(type)) {
            final CompressorInputStream compressedStream = new GzipCompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_PACK200.equals(type)) {
            final CompressorInputStream compressedStream = new Pack200CompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_XZ.equals(type)) {
            final CompressorInputStream compressedStream = new XZCompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else {
            throw new RuntimeException("Unrecognized archive media type: " + type);
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not load InputStream for resource: " + archive, e);
    } finally {
        IOUtils.closeQuietly(bufferedResourceStream);
    }
}
Also used : JarArchiveInputStream(org.apache.commons.compress.archivers.jar.JarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) ArArchiveInputStream(org.apache.commons.compress.archivers.ar.ArArchiveInputStream) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) XZCompressorInputStream(org.apache.commons.compress.compressors.xz.XZCompressorInputStream) Pack200CompressorInputStream(org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) IOException(java.io.IOException) Pack200CompressorInputStream(org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) JarArchiveInputStream(org.apache.commons.compress.archivers.jar.JarArchiveInputStream) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) CpioArchiveInputStream(org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream) ArArchiveInputStream(org.apache.commons.compress.archivers.ar.ArArchiveInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) BufferedInputStream(java.io.BufferedInputStream) MediaType(org.apache.tika.mime.MediaType) CpioArchiveInputStream(org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream) XZCompressorInputStream(org.apache.commons.compress.compressors.xz.XZCompressorInputStream)

Example 8 with BZip2CompressorInputStream

use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project tika by apache.

the class ExtractReader method loadExtract.

public List<Metadata> loadExtract(Path extractFile) throws ExtractReaderException {
    List<Metadata> metadataList = null;
    if (extractFile == null || !Files.isRegularFile(extractFile)) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
    }
    FileSuffixes fileSuffixes = parseSuffixes(extractFile.getFileName().toString());
    if (fileSuffixes.txtOrJson == null) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.INCORRECT_EXTRACT_FILE_SUFFIX);
    }
    if (!Files.isRegularFile(extractFile)) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
    }
    long length = -1L;
    try {
        length = Files.size(extractFile);
    } catch (IOException e) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
    }
    if (length == 0L) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.ZERO_BYTE_EXTRACT_FILE);
    }
    if (minExtractLength > IGNORE_LENGTH && length < minExtractLength) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_SHORT);
    }
    if (maxExtractLength > IGNORE_LENGTH && length > maxExtractLength) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_LONG);
    }
    Reader reader = null;
    InputStream is = null;
    try {
        is = Files.newInputStream(extractFile);
        if (fileSuffixes.compression != null) {
            if (fileSuffixes.compression.equals("bz2")) {
                is = new BZip2CompressorInputStream(is);
            } else if (fileSuffixes.compression.equals("gz") || fileSuffixes.compression.equals("gzip")) {
                is = new GzipCompressorInputStream(is);
            } else if (fileSuffixes.compression.equals("zip")) {
                is = new ZCompressorInputStream(is);
            } else {
                LOG.warn("Can't yet process compression of type: {}", fileSuffixes.compression);
                return metadataList;
            }
        }
        reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
    } catch (IOException e) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
    }
    try {
        if (fileSuffixes.txtOrJson.equals("json")) {
            metadataList = JsonMetadataList.fromJson(reader);
            if (alterMetadataList.equals(ALTER_METADATA_LIST.FIRST_ONLY) && metadataList.size() > 1) {
                while (metadataList.size() > 1) {
                    metadataList.remove(metadataList.size() - 1);
                }
            } else if (alterMetadataList.equals(ALTER_METADATA_LIST.AS_IS.CONCATENATE_CONTENT_INTO_FIRST) && metadataList.size() > 1) {
                StringBuilder sb = new StringBuilder();
                Metadata containerMetadata = metadataList.get(0);
                for (int i = 0; i < metadataList.size(); i++) {
                    Metadata m = metadataList.get(i);
                    String c = m.get(RecursiveParserWrapper.TIKA_CONTENT);
                    if (c != null) {
                        sb.append(c);
                        sb.append(" ");
                    }
                }
                containerMetadata.set(RecursiveParserWrapper.TIKA_CONTENT, sb.toString());
                while (metadataList.size() > 1) {
                    metadataList.remove(metadataList.size() - 1);
                }
            }
        } else {
            metadataList = generateListFromTextFile(reader, fileSuffixes);
        }
    } catch (IOException e) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
    } catch (TikaException e) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_PARSE_EXCEPTION);
    } finally {
        IOUtils.closeQuietly(reader);
        IOUtils.closeQuietly(is);
    }
    return metadataList;
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) TikaException(org.apache.tika.exception.TikaException) InputStreamReader(java.io.InputStreamReader) ZCompressorInputStream(org.apache.commons.compress.compressors.z.ZCompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) BufferedReader(java.io.BufferedReader) ZCompressorInputStream(org.apache.commons.compress.compressors.z.ZCompressorInputStream)

Example 9 with BZip2CompressorInputStream

use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project stanbol by apache.

the class RdfResourceImporter method importResource.

@Override
public ResourceState importResource(InputStream is, String resourceName) throws IOException {
    String name = FilenameUtils.getName(resourceName);
    if ("gz".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
        is = new GZIPInputStream(is);
        name = FilenameUtils.removeExtension(name);
        log.debug("   - from GZIP Archive");
    } else if ("bz2".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
        is = new BZip2CompressorInputStream(is, //use true as 2nd param (see http://s.apache.org/QbK) 
        true);
        name = FilenameUtils.removeExtension(name);
        log.debug("   - from BZip2 Archive");
    }
    // TODO: No Zip Files inside Zip Files supported :o( ^^
    Lang format = RDFLanguages.filenameToLang(name);
    if (format == null) {
        log.warn("ignore File {} because of unknown extension ");
        return ResourceState.IGNORED;
    } else {
        log.info("    - bulk loading File {} using Format {}", resourceName, format);
        try {
            destination.startBulk();
            RiotReader.parse(is, format, null, destination);
        } catch (RuntimeException e) {
            return ResourceState.ERROR;
        } finally {
            destination.finishBulk();
        }
    }
    //        }
    return ResourceState.LOADED;
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) Lang(org.apache.jena.riot.Lang)

Example 10 with BZip2CompressorInputStream

use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project stanbol by apache.

the class ConfigUtils method getArchiveInputStream.

public static ArchiveInputStream getArchiveInputStream(String solrArchiveName, InputStream is) throws IOException {
    String archiveFormat;
    String solrArchiveExtension = FilenameUtils.getExtension(solrArchiveName);
    if (solrArchiveExtension == null || solrArchiveExtension.isEmpty()) {
        // assume that the archiveExtension was parsed
        archiveFormat = solrArchiveName;
    } else {
        archiveFormat = SUPPORTED_SOLR_ARCHIVE_FORMAT.get(solrArchiveExtension);
    }
    ArchiveInputStream ais;
    if ("zip".equals(archiveFormat)) {
        ais = new ZipArchiveInputStream(is);
    } else {
        if ("gz".equals(archiveFormat)) {
            is = new GZIPInputStream(is);
        } else if ("bz2".equals(archiveFormat)) {
            is = new BZip2CompressorInputStream(is);
        } else {
            throw new IllegalStateException("Unsupported compression format " + archiveFormat + "!. " + "Please report this to stanbol-dev mailing list!");
        }
        ais = new TarArchiveInputStream(is);
    }
    return ais;
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream)

Aggregations

BZip2CompressorInputStream (org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream)19 FileInputStream (java.io.FileInputStream)11 IOException (java.io.IOException)6 InputStream (java.io.InputStream)6 GZIPInputStream (java.util.zip.GZIPInputStream)6 File (java.io.File)4 FileOutputStream (java.io.FileOutputStream)4 BufferedReader (java.io.BufferedReader)3 InputStreamReader (java.io.InputStreamReader)3 ZipArchiveInputStream (org.apache.commons.compress.archivers.zip.ZipArchiveInputStream)3 GzipCompressorInputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)3 BufferedInputStream (java.io.BufferedInputStream)2 ByteBuffer (java.nio.ByteBuffer)2 ArchiveInputStream (org.apache.commons.compress.archivers.ArchiveInputStream)2 TarArchiveInputStream (org.apache.commons.compress.archivers.tar.TarArchiveInputStream)2 CompressorInputStream (org.apache.commons.compress.compressors.CompressorInputStream)2 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)2 Test (org.junit.Test)2 Configuration (com.alibaba.datax.common.util.Configuration)1 CompressException (com.alibaba.otter.node.etl.common.io.compress.exception.CompressException)1