use of org.apache.commons.compress.compressors.CompressorInputStream in project logging-log4j2 by apache.
the class RollingAppenderSizeTest method testAppender.
@Test
public void testAppender() throws Exception {
final Path path = Paths.get(DIR, "rollingtest.log");
if (Files.exists(path) && createOnDemand) {
Assert.fail(String.format("Unexpected file: %s (%s bytes)", path, Files.getAttribute(path, "size")));
}
for (int i = 0; i < 500; ++i) {
logger.debug("This is test message number " + i);
}
try {
Thread.sleep(100);
} catch (final InterruptedException ie) {
// Ignore the error.
}
final File dir = new File(DIR);
assertTrue("Directory not created", dir.exists() && dir.listFiles().length > 0);
final File[] files = dir.listFiles();
assertNotNull(files);
assertThat(files, hasItemInArray(that(hasName(that(endsWith(fileExtension))))));
final FileExtension ext = FileExtension.lookup(fileExtension);
if (ext == null || FileExtension.ZIP == ext || FileExtension.PACK200 == ext) {
// Apache Commons Compress cannot deflate zip? TODO test decompressing these formats
return;
}
// Stop the context to make sure all files are compressed and closed. Trying to remedy failures in CI builds.
if (!loggerContextRule.getLoggerContext().stop(30, TimeUnit.SECONDS)) {
System.err.println("Could not stop cleanly " + loggerContextRule + " for " + this);
}
for (final File file : files) {
if (file.getName().endsWith(fileExtension)) {
CompressorInputStream in = null;
try (FileInputStream fis = new FileInputStream(file)) {
try {
in = new CompressorStreamFactory().createCompressorInputStream(ext.name().toLowerCase(), fis);
} catch (final CompressorException ce) {
ce.printStackTrace();
fail("Error creating input stream from " + file.toString() + ": " + ce.getMessage());
}
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
assertNotNull("No input stream for " + file.getName(), in);
try {
IOUtils.copy(in, baos);
} catch (final Exception ex) {
ex.printStackTrace();
fail("Unable to decompress " + file.getAbsolutePath());
}
final String text = new String(baos.toByteArray(), Charset.defaultCharset());
final String[] lines = text.split("[\\r\\n]+");
for (final String line : lines) {
assertTrue(line.contains("DEBUG o.a.l.l.c.a.r.RollingAppenderSizeTest [main] This is test message number"));
}
} finally {
Closer.close(in);
}
}
}
}
use of org.apache.commons.compress.compressors.CompressorInputStream in project DataX by alibaba.
the class UnstructuredStorageReaderUtil method readFromStream.
public static void readFromStream(InputStream inputStream, String context, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
String compress = readerSliceConfig.getString(Key.COMPRESS, null);
if (StringUtils.isBlank(compress)) {
compress = null;
}
String encoding = readerSliceConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
// handle blank encoding
if (StringUtils.isBlank(encoding)) {
encoding = Constant.DEFAULT_ENCODING;
LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, Constant.DEFAULT_ENCODING));
}
List<Configuration> column = readerSliceConfig.getListConfiguration(Key.COLUMN);
// handle ["*"] -> [], null
if (null != column && 1 == column.size() && "\"*\"".equals(column.get(0).toString())) {
readerSliceConfig.set(Key.COLUMN, null);
column = null;
}
BufferedReader reader = null;
int bufferSize = readerSliceConfig.getInt(Key.BUFFER_SIZE, Constant.DEFAULT_BUFFER_SIZE);
// compress logic
try {
if (null == compress) {
reader = new BufferedReader(new InputStreamReader(inputStream, encoding), bufferSize);
} else {
// TODO compress
if ("lzo_deflate".equalsIgnoreCase(compress)) {
LzoInputStream lzoInputStream = new LzoInputStream(inputStream, new LzoDecompressor1x_safe());
reader = new BufferedReader(new InputStreamReader(lzoInputStream, encoding));
} else if ("lzo".equalsIgnoreCase(compress)) {
LzoInputStream lzopInputStream = new ExpandLzopInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(lzopInputStream, encoding));
} else if ("gzip".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new GzipCompressorInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
} else if ("bzip2".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new BZip2CompressorInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
} else if ("hadoop-snappy".equalsIgnoreCase(compress)) {
CompressionCodec snappyCodec = new SnappyCodec();
InputStream snappyInputStream = snappyCodec.createInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
} else if ("framing-snappy".equalsIgnoreCase(compress)) {
InputStream snappyInputStream = new SnappyFramedInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
} else /*else if ("xz".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new XZCompressorInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
compressorInputStream, encoding));
} else if ("ar".equalsIgnoreCase(compress)) {
ArArchiveInputStream arArchiveInputStream = new ArArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
arArchiveInputStream, encoding));
} else if ("arj".equalsIgnoreCase(compress)) {
ArjArchiveInputStream arjArchiveInputStream = new ArjArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
arjArchiveInputStream, encoding));
} else if ("cpio".equalsIgnoreCase(compress)) {
CpioArchiveInputStream cpioArchiveInputStream = new CpioArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
cpioArchiveInputStream, encoding));
} else if ("dump".equalsIgnoreCase(compress)) {
DumpArchiveInputStream dumpArchiveInputStream = new DumpArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
dumpArchiveInputStream, encoding));
} else if ("jar".equalsIgnoreCase(compress)) {
JarArchiveInputStream jarArchiveInputStream = new JarArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
jarArchiveInputStream, encoding));
} else if ("tar".equalsIgnoreCase(compress)) {
TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
tarArchiveInputStream, encoding));
}*/
if ("zip".equalsIgnoreCase(compress)) {
ZipCycleInputStream zipCycleInputStream = new ZipCycleInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(zipCycleInputStream, encoding), bufferSize);
} else {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, String.format("仅支持 gzip, bzip2, zip, lzo, lzo_deflate, hadoop-snappy, framing-snappy" + "文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", compress));
}
}
UnstructuredStorageReaderUtil.doReadFromStream(reader, context, readerSliceConfig, recordSender, taskPluginCollector);
} catch (UnsupportedEncodingException uee) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.OPEN_FILE_WITH_CHARSET_ERROR, String.format("不支持的编码格式 : [%s]", encoding), uee);
} catch (NullPointerException e) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.RUNTIME_EXCEPTION, "运行时错误, 请联系我们", e);
}/* catch (ArchiveException e) {
throw DataXException.asDataXException(
UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR,
String.format("压缩文件流读取错误 : [%s]", context), e);
} */
catch (IOException e) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, String.format("流读取错误 : [%s]", context), e);
} finally {
IOUtils.closeQuietly(reader);
}
}
use of org.apache.commons.compress.compressors.CompressorInputStream in project uPortal by Jasig.
the class JaxbPortalDataHandlerService method importDataArchive.
private void importDataArchive(Resource archive, InputStream resourceStream, BatchImportOptions options) {
BufferedInputStream bufferedResourceStream = null;
try {
// Make sure the stream is buffered
if (resourceStream instanceof BufferedInputStream) {
bufferedResourceStream = (BufferedInputStream) resourceStream;
} else {
bufferedResourceStream = new BufferedInputStream(resourceStream);
}
// Buffer up to 100MB, bad things will happen if we bust this buffer.
// TODO see if there is a buffered stream that will write to a file once the buffer
// fills up
bufferedResourceStream.mark(100 * 1024 * 1024);
final MediaType type = getMediaType(bufferedResourceStream, archive.getFilename());
if (MT_JAVA_ARCHIVE.equals(type)) {
final ArchiveInputStream archiveStream = new JarArchiveInputStream(bufferedResourceStream);
importDataArchive(archive, archiveStream, options);
} else if (MediaType.APPLICATION_ZIP.equals(type)) {
final ArchiveInputStream archiveStream = new ZipArchiveInputStream(bufferedResourceStream);
importDataArchive(archive, archiveStream, options);
} else if (MT_CPIO.equals(type)) {
final ArchiveInputStream archiveStream = new CpioArchiveInputStream(bufferedResourceStream);
importDataArchive(archive, archiveStream, options);
} else if (MT_AR.equals(type)) {
final ArchiveInputStream archiveStream = new ArArchiveInputStream(bufferedResourceStream);
importDataArchive(archive, archiveStream, options);
} else if (MT_TAR.equals(type)) {
final ArchiveInputStream archiveStream = new TarArchiveInputStream(bufferedResourceStream);
importDataArchive(archive, archiveStream, options);
} else if (MT_BZIP2.equals(type)) {
final CompressorInputStream compressedStream = new BZip2CompressorInputStream(bufferedResourceStream);
importDataArchive(archive, compressedStream, options);
} else if (MT_GZIP.equals(type)) {
final CompressorInputStream compressedStream = new GzipCompressorInputStream(bufferedResourceStream);
importDataArchive(archive, compressedStream, options);
} else if (MT_PACK200.equals(type)) {
final CompressorInputStream compressedStream = new Pack200CompressorInputStream(bufferedResourceStream);
importDataArchive(archive, compressedStream, options);
} else if (MT_XZ.equals(type)) {
final CompressorInputStream compressedStream = new XZCompressorInputStream(bufferedResourceStream);
importDataArchive(archive, compressedStream, options);
} else {
throw new RuntimeException("Unrecognized archive media type: " + type);
}
} catch (IOException e) {
throw new RuntimeException("Could not load InputStream for resource: " + archive, e);
} finally {
IOUtils.closeQuietly(bufferedResourceStream);
}
}
use of org.apache.commons.compress.compressors.CompressorInputStream in project languagetool by languagetool-org.
the class WikipediaSentenceExtractor method extract.
private void extract(Language language, String xmlDumpPath) throws IOException, CompressorException {
try (FileInputStream fis = new FileInputStream(xmlDumpPath);
BufferedInputStream bis = new BufferedInputStream(fis);
CompressorInputStream input = new CompressorStreamFactory().createCompressorInputStream(bis)) {
int sentenceCount = 0;
WikipediaSentenceSource source = new WikipediaSentenceSource(input, language);
while (source.hasNext()) {
String sentence = source.next().getText();
if (skipSentence(sentence)) {
continue;
}
System.out.println(sentence);
sentenceCount++;
if (sentenceCount % 1000 == 0) {
System.err.println("Exporting sentence #" + sentenceCount + "...");
}
}
}
}
use of org.apache.commons.compress.compressors.CompressorInputStream in project languagetool by languagetool-org.
the class CommonCrawlToNgram3 method indexInputFile.
private void indexInputFile() throws IOException, CompressorException {
FileInputStream fin = new FileInputStream(input);
BufferedInputStream in = new BufferedInputStream(fin);
try (CompressorInputStream input = new CompressorStreamFactory().createCompressorInputStream(in)) {
final byte[] buffer = new byte[8192];
int n;
while ((n = input.read(buffer)) != -1) {
// TODO: not always correct, we need to wait for line end first?
String buf = new String(buffer, 0, n);
String[] lines = buf.split("\n");
indexLine(lines);
}
}
writeToDisk(1, unigramToCount);
writeToDisk(2, bigramToCount);
writeToDisk(3, trigramToCount);
}
Aggregations