use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project pinot by linkedin.
the class TarGzCompressionUtils method unTarOneFile.
public static InputStream unTarOneFile(InputStream tarGzInputStream, final String filename) throws FileNotFoundException, IOException, ArchiveException {
TarArchiveInputStream debInputStream = null;
InputStream is = null;
try {
is = new GzipCompressorInputStream(tarGzInputStream);
debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory().createArchiveInputStream("tar", is);
TarArchiveEntry entry = null;
while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
if (entry.getName().contains(filename)) {
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
IOUtils.copy(debInputStream, byteArrayOutputStream);
return new ByteArrayInputStream(byteArrayOutputStream.toByteArray());
}
}
} finally {
IOUtils.closeQuietly(debInputStream);
IOUtils.closeQuietly(is);
}
return null;
}
use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project pinot by linkedin.
the class TarGzCompressionUtils method unTar.
/** Untar an input file into an output file.
* The output file is created in the output folder, having the same name
* as the input file, minus the '.tar' extension.
*
* @param inputFile the input .tar file
* @param outputDir the output directory file.
* @throws IOException
* @throws FileNotFoundException
*
* @return The {@link List} of {@link File}s with the untared content.
* @throws ArchiveException
*/
public static List<File> unTar(final File inputFile, final File outputDir) throws FileNotFoundException, IOException, ArchiveException {
LOGGER.debug(String.format("Untaring %s to dir %s.", inputFile.getAbsolutePath(), outputDir.getAbsolutePath()));
TarArchiveInputStream debInputStream = null;
InputStream is = null;
final List<File> untaredFiles = new LinkedList<File>();
try {
is = new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(inputFile)));
debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory().createArchiveInputStream("tar", is);
TarArchiveEntry entry = null;
while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
final File outputFile = new File(outputDir, entry.getName());
if (entry.isDirectory()) {
LOGGER.debug(String.format("Attempting to write output directory %s.", outputFile.getAbsolutePath()));
if (!outputFile.exists()) {
LOGGER.debug(String.format("Attempting to create output directory %s.", outputFile.getAbsolutePath()));
if (!outputFile.mkdirs()) {
throw new IllegalStateException(String.format("Couldn't create directory %s.", outputFile.getAbsolutePath()));
}
} else {
LOGGER.error("The directory already there. Deleting - " + outputFile.getAbsolutePath());
FileUtils.deleteDirectory(outputFile);
}
} else {
LOGGER.debug(String.format("Creating output file %s.", outputFile.getAbsolutePath()));
File directory = outputFile.getParentFile();
if (!directory.exists()) {
directory.mkdirs();
}
OutputStream outputFileStream = null;
try {
outputFileStream = new FileOutputStream(outputFile);
IOUtils.copy(debInputStream, outputFileStream);
} finally {
IOUtils.closeQuietly(outputFileStream);
}
}
untaredFiles.add(outputFile);
}
} finally {
IOUtils.closeQuietly(debInputStream);
IOUtils.closeQuietly(is);
}
return untaredFiles;
}
use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project karaf by apache.
the class RunMojo method extractGzArchive.
private static void extractGzArchive(InputStream tarGz, File tar) throws IOException {
BufferedInputStream in = new BufferedInputStream(tarGz);
FileOutputStream out = new FileOutputStream(tar);
GzipCompressorInputStream gzIn = new GzipCompressorInputStream(in);
final byte[] buffer = new byte[1000];
int n = 0;
while (-1 != (n = gzIn.read(buffer))) {
out.write(buffer, 0, n);
}
out.close();
gzIn.close();
}
use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project tika by apache.
the class ExtractReader method loadExtract.
public List<Metadata> loadExtract(Path extractFile) throws ExtractReaderException {
List<Metadata> metadataList = null;
if (extractFile == null || !Files.isRegularFile(extractFile)) {
throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
}
FileSuffixes fileSuffixes = parseSuffixes(extractFile.getFileName().toString());
if (fileSuffixes.txtOrJson == null) {
throw new ExtractReaderException(ExtractReaderException.TYPE.INCORRECT_EXTRACT_FILE_SUFFIX);
}
if (!Files.isRegularFile(extractFile)) {
throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
}
long length = -1L;
try {
length = Files.size(extractFile);
} catch (IOException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
}
if (length == 0L) {
throw new ExtractReaderException(ExtractReaderException.TYPE.ZERO_BYTE_EXTRACT_FILE);
}
if (minExtractLength > IGNORE_LENGTH && length < minExtractLength) {
throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_SHORT);
}
if (maxExtractLength > IGNORE_LENGTH && length > maxExtractLength) {
throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_LONG);
}
Reader reader = null;
InputStream is = null;
try {
is = Files.newInputStream(extractFile);
if (fileSuffixes.compression != null) {
if (fileSuffixes.compression.equals("bz2")) {
is = new BZip2CompressorInputStream(is);
} else if (fileSuffixes.compression.equals("gz") || fileSuffixes.compression.equals("gzip")) {
is = new GzipCompressorInputStream(is);
} else if (fileSuffixes.compression.equals("zip")) {
is = new ZCompressorInputStream(is);
} else {
LOG.warn("Can't yet process compression of type: {}", fileSuffixes.compression);
return metadataList;
}
}
reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
} catch (IOException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
}
try {
if (fileSuffixes.txtOrJson.equals("json")) {
metadataList = JsonMetadataList.fromJson(reader);
if (alterMetadataList.equals(ALTER_METADATA_LIST.FIRST_ONLY) && metadataList.size() > 1) {
while (metadataList.size() > 1) {
metadataList.remove(metadataList.size() - 1);
}
} else if (alterMetadataList.equals(ALTER_METADATA_LIST.AS_IS.CONCATENATE_CONTENT_INTO_FIRST) && metadataList.size() > 1) {
StringBuilder sb = new StringBuilder();
Metadata containerMetadata = metadataList.get(0);
for (int i = 0; i < metadataList.size(); i++) {
Metadata m = metadataList.get(i);
String c = m.get(RecursiveParserWrapper.TIKA_CONTENT);
if (c != null) {
sb.append(c);
sb.append(" ");
}
}
containerMetadata.set(RecursiveParserWrapper.TIKA_CONTENT, sb.toString());
while (metadataList.size() > 1) {
metadataList.remove(metadataList.size() - 1);
}
}
} else {
metadataList = generateListFromTextFile(reader, fileSuffixes);
}
} catch (IOException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
} catch (TikaException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_PARSE_EXCEPTION);
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(is);
}
return metadataList;
}
use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project incubator-systemml by apache.
the class ValidateLicAndNotice method extractFileFromTGZ.
/**
* This will return the file from tgz file and store it in specified location.
*
* @param tgzFileName is the name of tgz file from which file to be extracted.
* @param fileName is the name of the file to be extracted.
* @param strDestLoc is the location where file will be extracted.
* @param bFirstDirLevel to indicate to get file from first directory level.
* @return Sucess or Failure
*/
public static boolean extractFileFromTGZ(String tgzFileName, String fileName, String strDestLoc, boolean bFirstDirLevel) {
boolean bRetCode = Constants.bFAILURE;
TarArchiveInputStream tarIn = null;
try {
tarIn = new TarArchiveInputStream(new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(tgzFileName))));
} catch (Exception e) {
Utility.debugPrint(Constants.DEBUG_ERROR, "Exception in unzipping tar file: " + e);
return bRetCode;
}
try {
BufferedOutputStream bufOut = null;
BufferedInputStream bufIn = null;
TarArchiveEntry tarEntry = null;
while ((tarEntry = tarIn.getNextTarEntry()) != null) {
if (!tarEntry.getName().endsWith(fileName))
continue;
// Get file at root (in single directory) level. This is for License in root location.
if (bFirstDirLevel && (tarEntry.getName().indexOf('/') != tarEntry.getName().lastIndexOf('/')))
continue;
bufIn = new BufferedInputStream(tarIn);
int count;
byte[] data = new byte[Constants.BUFFER];
String strOutFileName = strDestLoc == null ? tarEntry.getName() : strDestLoc + "/" + fileName;
FileOutputStream fos = new FileOutputStream(strOutFileName);
bufOut = new BufferedOutputStream(fos, Constants.BUFFER);
while ((count = bufIn.read(data, 0, Constants.BUFFER)) != -1) {
bufOut.write(data, 0, count);
}
bufOut.flush();
bufOut.close();
bufIn.close();
bRetCode = Constants.bSUCCESS;
break;
}
} catch (Exception e) {
e.printStackTrace();
}
return bRetCode;
}
Aggregations