Search in sources :

Example 1 with CompressorInputStream

use of org.apache.commons.compress.compressors.CompressorInputStream in project logging-log4j2 by apache.

the class RollingAppenderSizeTest method testAppender.

@Test
public void testAppender() throws Exception {
    final Path path = Paths.get(DIR, "rollingtest.log");
    if (Files.exists(path) && createOnDemand) {
        Assert.fail(String.format("Unexpected file: %s (%s bytes)", path, Files.getAttribute(path, "size")));
    }
    for (int i = 0; i < 500; ++i) {
        logger.debug("This is test message number " + i);
    }
    try {
        Thread.sleep(100);
    } catch (final InterruptedException ie) {
    // Ignore the error.
    }
    final File dir = new File(DIR);
    assertTrue("Directory not created", dir.exists() && dir.listFiles().length > 0);
    final File[] files = dir.listFiles();
    assertNotNull(files);
    assertThat(files, hasItemInArray(that(hasName(that(endsWith(fileExtension))))));
    final FileExtension ext = FileExtension.lookup(fileExtension);
    if (ext == null || FileExtension.ZIP == ext || FileExtension.PACK200 == ext) {
        // Apache Commons Compress cannot deflate zip? TODO test decompressing these formats
        return;
    }
    // Stop the context to make sure all files are compressed and closed. Trying to remedy failures in CI builds.
    if (!loggerContextRule.getLoggerContext().stop(30, TimeUnit.SECONDS)) {
        System.err.println("Could not stop cleanly " + loggerContextRule + " for " + this);
    }
    for (final File file : files) {
        if (file.getName().endsWith(fileExtension)) {
            CompressorInputStream in = null;
            try (FileInputStream fis = new FileInputStream(file)) {
                try {
                    in = new CompressorStreamFactory().createCompressorInputStream(ext.name().toLowerCase(), fis);
                } catch (final CompressorException ce) {
                    ce.printStackTrace();
                    fail("Error creating input stream from " + file.toString() + ": " + ce.getMessage());
                }
                final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                assertNotNull("No input stream for " + file.getName(), in);
                try {
                    IOUtils.copy(in, baos);
                } catch (final Exception ex) {
                    ex.printStackTrace();
                    fail("Unable to decompress " + file.getAbsolutePath());
                }
                final String text = new String(baos.toByteArray(), Charset.defaultCharset());
                final String[] lines = text.split("[\\r\\n]+");
                for (final String line : lines) {
                    assertTrue(line.contains("DEBUG o.a.l.l.c.a.r.RollingAppenderSizeTest [main] This is test message number"));
                }
            } finally {
                Closer.close(in);
            }
        }
    }
}
Also used : Path(java.nio.file.Path) CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FileInputStream(java.io.FileInputStream) CompressorException(org.apache.commons.compress.compressors.CompressorException) CompressorException(org.apache.commons.compress.compressors.CompressorException) File(java.io.File) Test(org.junit.Test)

Example 2 with CompressorInputStream

use of org.apache.commons.compress.compressors.CompressorInputStream in project DataX by alibaba.

the class UnstructuredStorageReaderUtil method readFromStream.

public static void readFromStream(InputStream inputStream, String context, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
    String compress = readerSliceConfig.getString(Key.COMPRESS, null);
    if (StringUtils.isBlank(compress)) {
        compress = null;
    }
    String encoding = readerSliceConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
    // handle blank encoding
    if (StringUtils.isBlank(encoding)) {
        encoding = Constant.DEFAULT_ENCODING;
        LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, Constant.DEFAULT_ENCODING));
    }
    List<Configuration> column = readerSliceConfig.getListConfiguration(Key.COLUMN);
    // handle ["*"] -> [], null
    if (null != column && 1 == column.size() && "\"*\"".equals(column.get(0).toString())) {
        readerSliceConfig.set(Key.COLUMN, null);
        column = null;
    }
    BufferedReader reader = null;
    int bufferSize = readerSliceConfig.getInt(Key.BUFFER_SIZE, Constant.DEFAULT_BUFFER_SIZE);
    // compress logic
    try {
        if (null == compress) {
            reader = new BufferedReader(new InputStreamReader(inputStream, encoding), bufferSize);
        } else {
            // TODO compress
            if ("lzo_deflate".equalsIgnoreCase(compress)) {
                LzoInputStream lzoInputStream = new LzoInputStream(inputStream, new LzoDecompressor1x_safe());
                reader = new BufferedReader(new InputStreamReader(lzoInputStream, encoding));
            } else if ("lzo".equalsIgnoreCase(compress)) {
                LzoInputStream lzopInputStream = new ExpandLzopInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(lzopInputStream, encoding));
            } else if ("gzip".equalsIgnoreCase(compress)) {
                CompressorInputStream compressorInputStream = new GzipCompressorInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
            } else if ("bzip2".equalsIgnoreCase(compress)) {
                CompressorInputStream compressorInputStream = new BZip2CompressorInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
            } else if ("hadoop-snappy".equalsIgnoreCase(compress)) {
                CompressionCodec snappyCodec = new SnappyCodec();
                InputStream snappyInputStream = snappyCodec.createInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
            } else if ("framing-snappy".equalsIgnoreCase(compress)) {
                InputStream snappyInputStream = new SnappyFramedInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
            } else /*else if ("xz".equalsIgnoreCase(compress)) {
					CompressorInputStream compressorInputStream = new XZCompressorInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							compressorInputStream, encoding));
				} else if ("ar".equalsIgnoreCase(compress)) {
					ArArchiveInputStream arArchiveInputStream = new ArArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							arArchiveInputStream, encoding));
				} else if ("arj".equalsIgnoreCase(compress)) {
					ArjArchiveInputStream arjArchiveInputStream = new ArjArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							arjArchiveInputStream, encoding));
				} else if ("cpio".equalsIgnoreCase(compress)) {
					CpioArchiveInputStream cpioArchiveInputStream = new CpioArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							cpioArchiveInputStream, encoding));
				} else if ("dump".equalsIgnoreCase(compress)) {
					DumpArchiveInputStream dumpArchiveInputStream = new DumpArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							dumpArchiveInputStream, encoding));
				} else if ("jar".equalsIgnoreCase(compress)) {
					JarArchiveInputStream jarArchiveInputStream = new JarArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							jarArchiveInputStream, encoding));
				} else if ("tar".equalsIgnoreCase(compress)) {
					TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							tarArchiveInputStream, encoding));
				}*/
            if ("zip".equalsIgnoreCase(compress)) {
                ZipCycleInputStream zipCycleInputStream = new ZipCycleInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(zipCycleInputStream, encoding), bufferSize);
            } else {
                throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, String.format("仅支持 gzip, bzip2, zip, lzo, lzo_deflate, hadoop-snappy, framing-snappy" + "文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", compress));
            }
        }
        UnstructuredStorageReaderUtil.doReadFromStream(reader, context, readerSliceConfig, recordSender, taskPluginCollector);
    } catch (UnsupportedEncodingException uee) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.OPEN_FILE_WITH_CHARSET_ERROR, String.format("不支持的编码格式 : [%s]", encoding), uee);
    } catch (NullPointerException e) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.RUNTIME_EXCEPTION, "运行时错误, 请联系我们", e);
    }/* catch (ArchiveException e) {
			throw DataXException.asDataXException(
					UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR,
					String.format("压缩文件流读取错误 : [%s]", context), e);
		} */
     catch (IOException e) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, String.format("流读取错误 : [%s]", context), e);
    } finally {
        IOUtils.closeQuietly(reader);
    }
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) Configuration(com.alibaba.datax.common.util.Configuration) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) SnappyFramedInputStream(io.airlift.compress.snappy.SnappyFramedInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) SnappyFramedInputStream(io.airlift.compress.snappy.SnappyFramedInputStream) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) SnappyCodec(io.airlift.compress.snappy.SnappyCodec)

Example 3 with CompressorInputStream

use of org.apache.commons.compress.compressors.CompressorInputStream in project uPortal by Jasig.

the class JaxbPortalDataHandlerService method importDataArchive.

private void importDataArchive(Resource archive, InputStream resourceStream, BatchImportOptions options) {
    BufferedInputStream bufferedResourceStream = null;
    try {
        // Make sure the stream is buffered
        if (resourceStream instanceof BufferedInputStream) {
            bufferedResourceStream = (BufferedInputStream) resourceStream;
        } else {
            bufferedResourceStream = new BufferedInputStream(resourceStream);
        }
        // Buffer up to 100MB, bad things will happen if we bust this buffer.
        // TODO see if there is a buffered stream that will write to a file once the buffer
        // fills up
        bufferedResourceStream.mark(100 * 1024 * 1024);
        final MediaType type = getMediaType(bufferedResourceStream, archive.getFilename());
        if (MT_JAVA_ARCHIVE.equals(type)) {
            final ArchiveInputStream archiveStream = new JarArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MediaType.APPLICATION_ZIP.equals(type)) {
            final ArchiveInputStream archiveStream = new ZipArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_CPIO.equals(type)) {
            final ArchiveInputStream archiveStream = new CpioArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_AR.equals(type)) {
            final ArchiveInputStream archiveStream = new ArArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_TAR.equals(type)) {
            final ArchiveInputStream archiveStream = new TarArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_BZIP2.equals(type)) {
            final CompressorInputStream compressedStream = new BZip2CompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_GZIP.equals(type)) {
            final CompressorInputStream compressedStream = new GzipCompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_PACK200.equals(type)) {
            final CompressorInputStream compressedStream = new Pack200CompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_XZ.equals(type)) {
            final CompressorInputStream compressedStream = new XZCompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else {
            throw new RuntimeException("Unrecognized archive media type: " + type);
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not load InputStream for resource: " + archive, e);
    } finally {
        IOUtils.closeQuietly(bufferedResourceStream);
    }
}
Also used : JarArchiveInputStream(org.apache.commons.compress.archivers.jar.JarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) ArArchiveInputStream(org.apache.commons.compress.archivers.ar.ArArchiveInputStream) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) XZCompressorInputStream(org.apache.commons.compress.compressors.xz.XZCompressorInputStream) Pack200CompressorInputStream(org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) IOException(java.io.IOException) Pack200CompressorInputStream(org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) JarArchiveInputStream(org.apache.commons.compress.archivers.jar.JarArchiveInputStream) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) CpioArchiveInputStream(org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream) ArArchiveInputStream(org.apache.commons.compress.archivers.ar.ArArchiveInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) BufferedInputStream(java.io.BufferedInputStream) MediaType(org.apache.tika.mime.MediaType) CpioArchiveInputStream(org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream) XZCompressorInputStream(org.apache.commons.compress.compressors.xz.XZCompressorInputStream)

Example 4 with CompressorInputStream

use of org.apache.commons.compress.compressors.CompressorInputStream in project languagetool by languagetool-org.

the class WikipediaSentenceExtractor method extract.

private void extract(Language language, String xmlDumpPath) throws IOException, CompressorException {
    try (FileInputStream fis = new FileInputStream(xmlDumpPath);
        BufferedInputStream bis = new BufferedInputStream(fis);
        CompressorInputStream input = new CompressorStreamFactory().createCompressorInputStream(bis)) {
        int sentenceCount = 0;
        WikipediaSentenceSource source = new WikipediaSentenceSource(input, language);
        while (source.hasNext()) {
            String sentence = source.next().getText();
            if (skipSentence(sentence)) {
                continue;
            }
            System.out.println(sentence);
            sentenceCount++;
            if (sentenceCount % 1000 == 0) {
                System.err.println("Exporting sentence #" + sentenceCount + "...");
            }
        }
    }
}
Also used : BufferedInputStream(java.io.BufferedInputStream) CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) FileInputStream(java.io.FileInputStream)

Example 5 with CompressorInputStream

use of org.apache.commons.compress.compressors.CompressorInputStream in project languagetool by languagetool-org.

the class CommonCrawlToNgram3 method indexInputFile.

private void indexInputFile() throws IOException, CompressorException {
    FileInputStream fin = new FileInputStream(input);
    BufferedInputStream in = new BufferedInputStream(fin);
    try (CompressorInputStream input = new CompressorStreamFactory().createCompressorInputStream(in)) {
        final byte[] buffer = new byte[8192];
        int n;
        while ((n = input.read(buffer)) != -1) {
            // TODO: not always correct, we need to wait for line end first?
            String buf = new String(buffer, 0, n);
            String[] lines = buf.split("\n");
            indexLine(lines);
        }
    }
    writeToDisk(1, unigramToCount);
    writeToDisk(2, bigramToCount);
    writeToDisk(3, trigramToCount);
}
Also used : CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream)

Aggregations

CompressorInputStream (org.apache.commons.compress.compressors.CompressorInputStream)9 CompressorStreamFactory (org.apache.commons.compress.compressors.CompressorStreamFactory)7 BufferedInputStream (java.io.BufferedInputStream)6 FileInputStream (java.io.FileInputStream)4 CompressorException (org.apache.commons.compress.compressors.CompressorException)4 BZip2CompressorInputStream (org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream)3 GzipCompressorInputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)3 IOException (java.io.IOException)2 Pack200CompressorInputStream (org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream)2 XZCompressorInputStream (org.apache.commons.compress.compressors.xz.XZCompressorInputStream)2 MediaType (org.apache.tika.mime.MediaType)2 Configuration (com.alibaba.datax.common.util.Configuration)1 SnappyCodec (io.airlift.compress.snappy.SnappyCodec)1 SnappyFramedInputStream (io.airlift.compress.snappy.SnappyFramedInputStream)1 BufferedReader (java.io.BufferedReader)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1