use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project logging-log4j2 by apache.
the class Bzip2CompressActionTest method testExecuteCompressesSourceFileToDestinationFile.
@Test
public void testExecuteCompressesSourceFileToDestinationFile() throws IOException {
final String LINE1 = "Here is line 1. Random text: ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n";
final String LINE2 = "Here is line 2. Random text: ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n";
final String LINE3 = "Here is line 3. Random text: ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n";
final File source = new File("target/compressme");
try (FileWriter fw = new FileWriter(source, false)) {
fw.write(LINE1);
fw.write(LINE2);
fw.write(LINE3);
fw.flush();
}
final File destination = new File("target/compressme.bz2");
// just in case
destination.delete();
assertFalse("Destination should not exist yet", destination.exists());
final boolean actual = CommonsCompressAction.execute("bzip2", source, destination, true);
assertEquals("Bzip2CompressAction should have succeeded", true, actual);
assertTrue("Destination should exist after Bzip2CompressAction", destination.exists());
assertFalse("Source should have been deleted", source.exists());
final byte[] bz2 = new byte[] { (byte) 0x42, (byte) 0x5A, (byte) 0x68, (byte) 0x39, (byte) 0x31, (byte) 0x41, (byte) 0x59, (byte) 0x26, (byte) 0x53, (byte) 0x59, (byte) 0x9C, (byte) 0xE1, (byte) 0xE8, (byte) 0x2D, (byte) 0x00, (byte) 0x00, (byte) 0x1C, (byte) 0xDF, (byte) 0x80, (byte) 0x00, (byte) 0x12, (byte) 0x40, (byte) 0x01, (byte) 0x38, (byte) 0x10, (byte) 0x3F, (byte) 0xFF, (byte) 0xFF, (byte) 0xF0, (byte) 0x26, (byte) 0x27, (byte) 0x9C, (byte) 0x40, (byte) 0x20, (byte) 0x00, (byte) 0x70, (byte) 0x63, (byte) 0x4D, (byte) 0x06, (byte) 0x80, (byte) 0x19, (byte) 0x34, (byte) 0x06, (byte) 0x46, (byte) 0x9A, (byte) 0x18, (byte) 0x9A, (byte) 0x30, (byte) 0xCF, (byte) 0xFD, (byte) 0x55, (byte) 0x4D, (byte) 0x0D, (byte) 0x06, (byte) 0x9A, (byte) 0x0C, (byte) 0x40, (byte) 0x1A, (byte) 0x1A, (byte) 0x34, (byte) 0x34, (byte) 0xCD, (byte) 0x46, (byte) 0x05, (byte) 0x6B, (byte) 0x19, (byte) 0x92, (byte) 0x23, (byte) 0x5E, (byte) 0xB5, (byte) 0x2E, (byte) 0x79, (byte) 0x65, (byte) 0x41, (byte) 0x81, (byte) 0x33, (byte) 0x4B, (byte) 0x53, (byte) 0x5B, (byte) 0x62, (byte) 0x75, (byte) 0x0A, (byte) 0x14, (byte) 0xB6, (byte) 0xB7, (byte) 0x37, (byte) 0xB8, (byte) 0x38, (byte) 0xB9, (byte) 0x39, (byte) 0xBA, (byte) 0x2A, (byte) 0x4E, (byte) 0xEA, (byte) 0xEC, (byte) 0xEE, (byte) 0xAD, (byte) 0xE1, (byte) 0xE5, (byte) 0x63, (byte) 0xD3, (byte) 0x22, (byte) 0xE8, (byte) 0x90, (byte) 0x52, (byte) 0xA9, (byte) 0x7A, (byte) 0x68, (byte) 0x90, (byte) 0x5C, (byte) 0x82, (byte) 0x0B, (byte) 0x51, (byte) 0xBF, (byte) 0x24, (byte) 0x61, (byte) 0x7F, (byte) 0x17, (byte) 0x72, (byte) 0x45, (byte) 0x38, (byte) 0x50, (byte) 0x90, (byte) 0x9C, (byte) 0xE1, (byte) 0xE8, (byte) 0x2D };
assertEquals(bz2.length, destination.length());
// check the compressed contents
try (FileInputStream fis = new FileInputStream(destination)) {
final byte[] actualBz2 = new byte[bz2.length];
int n = 0;
int offset = 0;
do {
n = fis.read(actualBz2, offset, actualBz2.length - offset);
offset += n;
} while (offset < actualBz2.length);
assertArrayEquals("Compressed data corrupt", bz2, actualBz2);
}
destination.delete();
// uncompress
try (BZip2CompressorInputStream bzin = new BZip2CompressorInputStream(new ByteArrayInputStream(bz2))) {
final StringBuilder sb = new StringBuilder();
final byte[] buf = new byte[1024];
int n = 0;
while ((n = bzin.read(buf, 0, buf.length)) > -1) {
sb.append(new String(buf, 0, n));
}
assertEquals(LINE1 + LINE2 + LINE3, sb.toString());
}
}
use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project uPortal by Jasig.
the class JaxbPortalDataHandlerService method importDataArchive.
protected void importDataArchive(Resource archive, InputStream resourceStream, BatchImportOptions options) {
BufferedInputStream bufferedResourceStream = null;
try {
//Make sure the stream is buffered
if (resourceStream instanceof BufferedInputStream) {
bufferedResourceStream = (BufferedInputStream) resourceStream;
} else {
bufferedResourceStream = new BufferedInputStream(resourceStream);
}
//Buffer up to 100MB, bad things will happen if we bust this buffer.
//TODO see if there is a buffered stream that will write to a file once the buffer fills up
bufferedResourceStream.mark(100 * 1024 * 1024);
final MediaType type = getMediaType(bufferedResourceStream, archive.getFilename());
if (MT_JAVA_ARCHIVE.equals(type)) {
final ArchiveInputStream archiveStream = new JarArchiveInputStream(bufferedResourceStream);
importDataArchive(archive, archiveStream, options);
} else if (MediaType.APPLICATION_ZIP.equals(type)) {
final ArchiveInputStream archiveStream = new ZipArchiveInputStream(bufferedResourceStream);
importDataArchive(archive, archiveStream, options);
} else if (MT_CPIO.equals(type)) {
final ArchiveInputStream archiveStream = new CpioArchiveInputStream(bufferedResourceStream);
importDataArchive(archive, archiveStream, options);
} else if (MT_AR.equals(type)) {
final ArchiveInputStream archiveStream = new ArArchiveInputStream(bufferedResourceStream);
importDataArchive(archive, archiveStream, options);
} else if (MT_TAR.equals(type)) {
final ArchiveInputStream archiveStream = new TarArchiveInputStream(bufferedResourceStream);
importDataArchive(archive, archiveStream, options);
} else if (MT_BZIP2.equals(type)) {
final CompressorInputStream compressedStream = new BZip2CompressorInputStream(bufferedResourceStream);
importDataArchive(archive, compressedStream, options);
} else if (MT_GZIP.equals(type)) {
final CompressorInputStream compressedStream = new GzipCompressorInputStream(bufferedResourceStream);
importDataArchive(archive, compressedStream, options);
} else if (MT_PACK200.equals(type)) {
final CompressorInputStream compressedStream = new Pack200CompressorInputStream(bufferedResourceStream);
importDataArchive(archive, compressedStream, options);
} else if (MT_XZ.equals(type)) {
final CompressorInputStream compressedStream = new XZCompressorInputStream(bufferedResourceStream);
importDataArchive(archive, compressedStream, options);
} else {
throw new RuntimeException("Unrecognized archive media type: " + type);
}
} catch (IOException e) {
throw new RuntimeException("Could not load InputStream for resource: " + archive, e);
} finally {
IOUtils.closeQuietly(bufferedResourceStream);
}
}
use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project tika by apache.
the class ExtractReader method loadExtract.
public List<Metadata> loadExtract(Path extractFile) throws ExtractReaderException {
List<Metadata> metadataList = null;
if (extractFile == null || !Files.isRegularFile(extractFile)) {
throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
}
FileSuffixes fileSuffixes = parseSuffixes(extractFile.getFileName().toString());
if (fileSuffixes.txtOrJson == null) {
throw new ExtractReaderException(ExtractReaderException.TYPE.INCORRECT_EXTRACT_FILE_SUFFIX);
}
if (!Files.isRegularFile(extractFile)) {
throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
}
long length = -1L;
try {
length = Files.size(extractFile);
} catch (IOException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
}
if (length == 0L) {
throw new ExtractReaderException(ExtractReaderException.TYPE.ZERO_BYTE_EXTRACT_FILE);
}
if (minExtractLength > IGNORE_LENGTH && length < minExtractLength) {
throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_SHORT);
}
if (maxExtractLength > IGNORE_LENGTH && length > maxExtractLength) {
throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_LONG);
}
Reader reader = null;
InputStream is = null;
try {
is = Files.newInputStream(extractFile);
if (fileSuffixes.compression != null) {
if (fileSuffixes.compression.equals("bz2")) {
is = new BZip2CompressorInputStream(is);
} else if (fileSuffixes.compression.equals("gz") || fileSuffixes.compression.equals("gzip")) {
is = new GzipCompressorInputStream(is);
} else if (fileSuffixes.compression.equals("zip")) {
is = new ZCompressorInputStream(is);
} else {
LOG.warn("Can't yet process compression of type: {}", fileSuffixes.compression);
return metadataList;
}
}
reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
} catch (IOException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
}
try {
if (fileSuffixes.txtOrJson.equals("json")) {
metadataList = JsonMetadataList.fromJson(reader);
if (alterMetadataList.equals(ALTER_METADATA_LIST.FIRST_ONLY) && metadataList.size() > 1) {
while (metadataList.size() > 1) {
metadataList.remove(metadataList.size() - 1);
}
} else if (alterMetadataList.equals(ALTER_METADATA_LIST.AS_IS.CONCATENATE_CONTENT_INTO_FIRST) && metadataList.size() > 1) {
StringBuilder sb = new StringBuilder();
Metadata containerMetadata = metadataList.get(0);
for (int i = 0; i < metadataList.size(); i++) {
Metadata m = metadataList.get(i);
String c = m.get(RecursiveParserWrapper.TIKA_CONTENT);
if (c != null) {
sb.append(c);
sb.append(" ");
}
}
containerMetadata.set(RecursiveParserWrapper.TIKA_CONTENT, sb.toString());
while (metadataList.size() > 1) {
metadataList.remove(metadataList.size() - 1);
}
}
} else {
metadataList = generateListFromTextFile(reader, fileSuffixes);
}
} catch (IOException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
} catch (TikaException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_PARSE_EXCEPTION);
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(is);
}
return metadataList;
}
use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project stanbol by apache.
the class RdfResourceImporter method importResource.
@Override
public ResourceState importResource(InputStream is, String resourceName) throws IOException {
String name = FilenameUtils.getName(resourceName);
if ("gz".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
is = new GZIPInputStream(is);
name = FilenameUtils.removeExtension(name);
log.debug(" - from GZIP Archive");
} else if ("bz2".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
is = new BZip2CompressorInputStream(is, //use true as 2nd param (see http://s.apache.org/QbK)
true);
name = FilenameUtils.removeExtension(name);
log.debug(" - from BZip2 Archive");
}
// TODO: No Zip Files inside Zip Files supported :o( ^^
Lang format = RDFLanguages.filenameToLang(name);
if (format == null) {
log.warn("ignore File {} because of unknown extension ");
return ResourceState.IGNORED;
} else {
log.info(" - bulk loading File {} using Format {}", resourceName, format);
try {
destination.startBulk();
RiotReader.parse(is, format, null, destination);
} catch (RuntimeException e) {
return ResourceState.ERROR;
} finally {
destination.finishBulk();
}
}
// }
return ResourceState.LOADED;
}
use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project stanbol by apache.
the class ConfigUtils method getArchiveInputStream.
public static ArchiveInputStream getArchiveInputStream(String solrArchiveName, InputStream is) throws IOException {
String archiveFormat;
String solrArchiveExtension = FilenameUtils.getExtension(solrArchiveName);
if (solrArchiveExtension == null || solrArchiveExtension.isEmpty()) {
// assume that the archiveExtension was parsed
archiveFormat = solrArchiveName;
} else {
archiveFormat = SUPPORTED_SOLR_ARCHIVE_FORMAT.get(solrArchiveExtension);
}
ArchiveInputStream ais;
if ("zip".equals(archiveFormat)) {
ais = new ZipArchiveInputStream(is);
} else {
if ("gz".equals(archiveFormat)) {
is = new GZIPInputStream(is);
} else if ("bz2".equals(archiveFormat)) {
is = new BZip2CompressorInputStream(is);
} else {
throw new IllegalStateException("Unsupported compression format " + archiveFormat + "!. " + "Please report this to stanbol-dev mailing list!");
}
ais = new TarArchiveInputStream(is);
}
return ais;
}
Aggregations