use of org.tukaani.xz.XZInputStream in project caffeine by ben-manes.
the class AbstractTraceReader method readFile.
/** Returns the input stream, decompressing if required. */
private InputStream readFile(String filePath) throws IOException {
BufferedInputStream input = new BufferedInputStream(openFile(filePath), BUFFER_SIZE);
input.mark(100);
try {
return new XZInputStream(input);
} catch (IOException e) {
input.reset();
}
try {
return new CompressorStreamFactory().createCompressorInputStream(input);
} catch (CompressorException e) {
input.reset();
}
try {
return new ArchiveStreamFactory().createArchiveInputStream(input);
} catch (ArchiveException e) {
input.reset();
}
return input;
}
use of org.tukaani.xz.XZInputStream in project buck by facebook.
the class XzStepTest method testXzStep.
@Test
public void testXzStep() throws IOException {
final Path sourceFile = TestDataHelper.getTestDataScenario(this, "xz_with_rm_and_check").resolve("xzstep.data");
final File destinationFile = tmp.newFile("xzstep.data.xz");
XzStep step = new XzStep(new ProjectFilesystem(tmp.getRoot().toPath()), sourceFile, destinationFile.toPath(), /* compressionLevel -- for faster testing */
1, /* keep */
true, XZ.CHECK_CRC32);
ExecutionContext context = TestExecutionContext.newInstance();
assertEquals(0, step.execute(context).getExitCode());
ByteSource original = PathByteSource.asByteSource(sourceFile);
ByteSource decompressed = new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return new XZInputStream(new FileInputStream(destinationFile));
}
};
assertTrue("Decompressed file must be identical to original.", original.contentEquals(decompressed));
}
use of org.tukaani.xz.XZInputStream in project languagetool by languagetool-org.
the class CommonCrawlToNgram method indexInputFile.
void indexInputFile() throws IOException {
// run now so we have a baseline
writeAndEvaluate();
FileInputStream fin = new FileInputStream(input);
BufferedInputStream in = new BufferedInputStream(fin);
try (XZInputStream xzIn = new XZInputStream(in)) {
final byte[] buffer = new byte[8192];
int n;
while ((n = xzIn.read(buffer)) != -1) {
// TODO: not always correct, we need to wait for line end first?
String buf = new String(buffer, 0, n);
String[] lines = buf.split("\n");
indexLine(lines);
}
}
writeAndEvaluate();
}
use of org.tukaani.xz.XZInputStream in project nifi by apache.
the class CompressContent method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
final long sizeBeforeCompression = flowFile.getSize();
final String compressionMode = context.getProperty(MODE).getValue();
String compressionFormatValue = context.getProperty(COMPRESSION_FORMAT).getValue();
if (compressionFormatValue.equals(COMPRESSION_FORMAT_ATTRIBUTE)) {
final String mimeType = flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
if (mimeType == null) {
logger.error("No {} attribute exists for {}; routing to failure", new Object[] { CoreAttributes.MIME_TYPE.key(), flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
compressionFormatValue = compressionFormatMimeTypeMap.get(mimeType);
if (compressionFormatValue == null) {
logger.info("Mime Type of {} is '{}', which does not indicate a supported Compression Format; routing to success without decompressing", new Object[] { flowFile, mimeType });
session.transfer(flowFile, REL_SUCCESS);
return;
}
}
final String compressionFormat = compressionFormatValue;
final AtomicReference<String> mimeTypeRef = new AtomicReference<>(null);
final StopWatch stopWatch = new StopWatch(true);
final String fileExtension;
switch(compressionFormat.toLowerCase()) {
case COMPRESSION_FORMAT_GZIP:
fileExtension = ".gz";
break;
case COMPRESSION_FORMAT_LZMA:
fileExtension = ".lzma";
break;
case COMPRESSION_FORMAT_XZ_LZMA2:
fileExtension = ".xz";
break;
case COMPRESSION_FORMAT_BZIP2:
fileExtension = ".bz2";
break;
case COMPRESSION_FORMAT_SNAPPY:
fileExtension = ".snappy";
break;
case COMPRESSION_FORMAT_SNAPPY_FRAMED:
fileExtension = ".sz";
break;
default:
fileExtension = "";
break;
}
try {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
final OutputStream compressionOut;
final InputStream compressionIn;
final OutputStream bufferedOut = new BufferedOutputStream(rawOut, 65536);
final InputStream bufferedIn = new BufferedInputStream(rawIn, 65536);
try {
if (MODE_COMPRESS.equalsIgnoreCase(compressionMode)) {
compressionIn = bufferedIn;
switch(compressionFormat.toLowerCase()) {
case COMPRESSION_FORMAT_GZIP:
final int compressionLevel = context.getProperty(COMPRESSION_LEVEL).asInteger();
compressionOut = new GZIPOutputStream(bufferedOut, compressionLevel);
mimeTypeRef.set("application/gzip");
break;
case COMPRESSION_FORMAT_LZMA:
compressionOut = new LzmaOutputStream.Builder(bufferedOut).build();
mimeTypeRef.set("application/x-lzma");
break;
case COMPRESSION_FORMAT_XZ_LZMA2:
compressionOut = new XZOutputStream(bufferedOut, new LZMA2Options());
mimeTypeRef.set("application/x-xz");
break;
case COMPRESSION_FORMAT_SNAPPY:
compressionOut = new SnappyOutputStream(bufferedOut);
mimeTypeRef.set("application/x-snappy");
break;
case COMPRESSION_FORMAT_SNAPPY_FRAMED:
compressionOut = new SnappyFramedOutputStream(bufferedOut);
mimeTypeRef.set("application/x-snappy-framed");
break;
case COMPRESSION_FORMAT_BZIP2:
default:
mimeTypeRef.set("application/x-bzip2");
compressionOut = new CompressorStreamFactory().createCompressorOutputStream(compressionFormat.toLowerCase(), bufferedOut);
break;
}
} else {
compressionOut = bufferedOut;
switch(compressionFormat.toLowerCase()) {
case COMPRESSION_FORMAT_LZMA:
compressionIn = new LzmaInputStream(bufferedIn, new Decoder());
break;
case COMPRESSION_FORMAT_XZ_LZMA2:
compressionIn = new XZInputStream(bufferedIn);
break;
case COMPRESSION_FORMAT_BZIP2:
// need this two-arg constructor to support concatenated streams
compressionIn = new BZip2CompressorInputStream(bufferedIn, true);
break;
case COMPRESSION_FORMAT_GZIP:
compressionIn = new GzipCompressorInputStream(bufferedIn, true);
break;
case COMPRESSION_FORMAT_SNAPPY:
compressionIn = new SnappyInputStream(bufferedIn);
break;
case COMPRESSION_FORMAT_SNAPPY_FRAMED:
compressionIn = new SnappyFramedInputStream(bufferedIn);
break;
default:
compressionIn = new CompressorStreamFactory().createCompressorInputStream(compressionFormat.toLowerCase(), bufferedIn);
}
}
} catch (final Exception e) {
closeQuietly(bufferedOut);
throw new IOException(e);
}
try (final InputStream in = compressionIn;
final OutputStream out = compressionOut) {
final byte[] buffer = new byte[8192];
int len;
while ((len = in.read(buffer)) > 0) {
out.write(buffer, 0, len);
}
out.flush();
}
}
});
stopWatch.stop();
final long sizeAfterCompression = flowFile.getSize();
if (MODE_DECOMPRESS.equalsIgnoreCase(compressionMode)) {
flowFile = session.removeAttribute(flowFile, CoreAttributes.MIME_TYPE.key());
if (context.getProperty(UPDATE_FILENAME).asBoolean()) {
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
if (filename.toLowerCase().endsWith(fileExtension)) {
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), filename.substring(0, filename.length() - fileExtension.length()));
}
}
} else {
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
if (context.getProperty(UPDATE_FILENAME).asBoolean()) {
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), filename + fileExtension);
}
}
logger.info("Successfully {}ed {} using {} compression format; size changed from {} to {} bytes", new Object[] { compressionMode.toLowerCase(), flowFile, compressionFormat, sizeBeforeCompression, sizeAfterCompression });
session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getDuration(TimeUnit.MILLISECONDS));
session.transfer(flowFile, REL_SUCCESS);
} catch (final ProcessException e) {
logger.error("Unable to {} {} using {} compression format due to {}; routing to failure", new Object[] { compressionMode.toLowerCase(), flowFile, compressionFormat, e });
session.transfer(flowFile, REL_FAILURE);
}
}
use of org.tukaani.xz.XZInputStream in project ant by apache.
the class Unxz method extract.
/**
* Implement the gunzipping.
*/
@Override
protected void extract() {
if (srcResource.getLastModified() > dest.lastModified()) {
log("Expanding " + srcResource.getName() + " to " + dest.getAbsolutePath());
try (XZInputStream zIn = new XZInputStream(srcResource.getInputStream());
OutputStream out = Files.newOutputStream(dest.toPath())) {
byte[] buffer = new byte[BUFFER_SIZE];
int count = 0;
do {
out.write(buffer, 0, count);
count = zIn.read(buffer, 0, buffer.length);
} while (count != -1);
} catch (IOException ioe) {
String msg = "Problem expanding xz " + ioe.getMessage();
throw new BuildException(msg, ioe, getLocation());
}
}
}
Aggregations