Search in sources :

Example 86 with TarArchiveInputStream

use of org.apache.commons.compress.archivers.tar.TarArchiveInputStream in project nifi by apache.

the class TarUnpackerSequenceFileWriter method processInputStream.

@Override
protected void processInputStream(final InputStream stream, final FlowFile tarArchivedFlowFile, final Writer writer) throws IOException {
    try (final TarArchiveInputStream tarIn = new TarArchiveInputStream(new BufferedInputStream(stream))) {
        TarArchiveEntry tarEntry;
        while ((tarEntry = tarIn.getNextTarEntry()) != null) {
            if (tarEntry.isDirectory()) {
                continue;
            }
            final String key = tarEntry.getName();
            final long fileSize = tarEntry.getSize();
            final InputStreamWritable inStreamWritable = new InputStreamWritable(tarIn, (int) fileSize);
            writer.append(new Text(key), inStreamWritable);
            logger.debug("Appending FlowFile {} to Sequence File", new Object[] { key });
        }
    }
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) InputStreamWritable(org.apache.nifi.processors.hadoop.util.InputStreamWritable) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) Text(org.apache.hadoop.io.Text) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry)

Example 87 with TarArchiveInputStream

use of org.apache.commons.compress.archivers.tar.TarArchiveInputStream in project Lucee by lucee.

the class CompressUtil method extractTar.

private static void extractTar(Resource tarFile, Resource targetDir) throws IOException {
    if (!targetDir.exists() || !targetDir.isDirectory())
        throw new IOException(targetDir + " is not a existing directory");
    if (!tarFile.exists())
        throw new IOException(tarFile + " is not a existing file");
    if (tarFile.isDirectory()) {
        Resource[] files = tarFile.listResources(new ExtensionResourceFilter("tar"));
        if (files == null)
            throw new IOException("directory " + tarFile + " is empty");
        extract(FORMAT_TAR, files, targetDir);
        return;
    }
    // read the zip file and build a query from its contents
    TarArchiveInputStream tis = null;
    try {
        tis = new TarArchiveInputStream(IOUtil.toBufferedInputStream(tarFile.getInputStream()));
        TarArchiveEntry entry;
        int mode;
        while ((entry = tis.getNextTarEntry()) != null) {
            // print.ln(entry);
            Resource target = targetDir.getRealResource(entry.getName());
            if (entry.isDirectory()) {
                target.mkdirs();
            } else {
                Resource parent = target.getParentResource();
                if (!parent.exists())
                    parent.mkdirs();
                IOUtil.copy(tis, target, false);
            }
            target.setLastModified(entry.getModTime().getTime());
            mode = entry.getMode();
            if (mode > 0)
                target.setMode(mode);
        // tis.closeEntry() ;
        }
    } finally {
        IOUtil.closeEL(tis);
    }
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) Resource(lucee.commons.io.res.Resource) ExtensionResourceFilter(lucee.commons.io.res.filter.ExtensionResourceFilter) IOException(java.io.IOException) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) lucee.aprint(lucee.aprint)

Example 88 with TarArchiveInputStream

use of org.apache.commons.compress.archivers.tar.TarArchiveInputStream in project linuxtools by eclipse.

the class CopyFromContainerCommandHandler method performCopyFromContainer.

private void performCopyFromContainer(final IDockerConnection connection, final IDockerContainer container, final String target, final List<ContainerFileProxy> files) {
    final Job copyFromContainerJob = new Job(CommandMessages.getFormattedString(COPY_FROM_CONTAINER_JOB_TITLE, container.name())) {

        @Override
        protected IStatus run(final IProgressMonitor monitor) {
            monitor.beginTask(CommandMessages.getString(COPY_FROM_CONTAINER_JOB_TASK), files.size());
            try (Closeable token = ((DockerConnection) connection).getOperationToken()) {
                for (ContainerFileProxy proxy : files) {
                    if (monitor.isCanceled()) {
                        monitor.done();
                        return Status.CANCEL_STATUS;
                    }
                    try {
                        monitor.setTaskName(CommandMessages.getFormattedString(COPY_FROM_CONTAINER_JOB_SUBTASK, proxy.getFullPath()));
                        monitor.worked(1);
                        InputStream in = ((DockerConnection) connection).copyContainer(token, container.id(), proxy.getLink());
                        /*
							 * The input stream from copyContainer might be
							 * incomplete or non-blocking so we should wrap it
							 * in a stream that is guaranteed to block until
							 * data is available.
							 */
                        TarArchiveInputStream k = new TarArchiveInputStream(new BlockingInputStream(in));
                        TarArchiveEntry te = null;
                        while ((te = k.getNextTarEntry()) != null) {
                            long size = te.getSize();
                            IPath path = new Path(target);
                            path = path.append(te.getName());
                            File f = new File(path.toOSString());
                            if (te.isDirectory()) {
                                f.mkdir();
                                continue;
                            } else {
                                f.createNewFile();
                            }
                            FileOutputStream os = new FileOutputStream(f);
                            int bufferSize = ((int) size > 4096 ? 4096 : (int) size);
                            byte[] barray = new byte[bufferSize];
                            int result = -1;
                            while ((result = k.read(barray, 0, bufferSize)) > -1) {
                                if (monitor.isCanceled()) {
                                    monitor.done();
                                    k.close();
                                    os.close();
                                    return Status.CANCEL_STATUS;
                                }
                                os.write(barray, 0, result);
                            }
                            os.close();
                        }
                        k.close();
                    } catch (final DockerException e) {
                        Display.getDefault().syncExec(() -> MessageDialog.openError(PlatformUI.getWorkbench().getActiveWorkbenchWindow().getShell(), CommandMessages.getFormattedString(ERROR_COPYING_FROM_CONTAINER, proxy.getLink(), container.name()), e.getCause() != null ? e.getCause().getMessage() : e.getMessage()));
                    }
                }
            } catch (InterruptedException e) {
            // do nothing
            } catch (IOException e) {
                Activator.log(e);
            } catch (DockerException e1) {
                Activator.log(e1);
            } finally {
                monitor.done();
            }
            return Status.OK_STATUS;
        }
    };
    copyFromContainerJob.schedule();
}
Also used : IPath(org.eclipse.core.runtime.IPath) Path(org.eclipse.core.runtime.Path) DockerException(org.eclipse.linuxtools.docker.core.DockerException) IPath(org.eclipse.core.runtime.IPath) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) InputStream(java.io.InputStream) Closeable(java.io.Closeable) IOException(java.io.IOException) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) IDockerConnection(org.eclipse.linuxtools.docker.core.IDockerConnection) DockerConnection(org.eclipse.linuxtools.internal.docker.core.DockerConnection) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) IProgressMonitor(org.eclipse.core.runtime.IProgressMonitor) ContainerFileProxy(org.eclipse.linuxtools.internal.docker.core.ContainerFileProxy) FileOutputStream(java.io.FileOutputStream) Job(org.eclipse.core.runtime.jobs.Job) File(java.io.File)

Example 89 with TarArchiveInputStream

use of org.apache.commons.compress.archivers.tar.TarArchiveInputStream in project mmtf-spark by sbl-sdsc.

the class JpredDataset method getDataset.

/**
 * Gets JPred 4/JNet (v.2.3.1) secondary structure dataset.
 *
 * @return secondary structure dataset
 * @throws IOException
 *             if file cannot be downloaded or read
 */
public static Dataset<Row> getDataset() throws IOException {
    List<Row> res = new ArrayList<Row>();
    URL u = new URL(URL);
    URLConnection conn = u.openConnection();
    InputStream in = conn.getInputStream();
    BufferedInputStream fin = new BufferedInputStream(in);
    GzipCompressorInputStream gzIn = new GzipCompressorInputStream(fin);
    TarArchiveInputStream tarIn = new TarArchiveInputStream(gzIn);
    TarArchiveEntry entry = null;
    Set<String> scopIDs = new HashSet<>();
    Map<String, String> sequences = new HashMap<String, String>();
    Map<String, String> secondaryStructures = new HashMap<String, String>();
    Map<String, String> trained = new HashMap<String, String>();
    while ((entry = (TarArchiveEntry) tarIn.getNextEntry()) != null) {
        if (entry.isDirectory()) {
            continue;
        }
        BufferedReader br = new BufferedReader(new InputStreamReader(tarIn));
        if (entry.getName().contains(".dssp")) {
            String scopID = br.readLine().substring(1);
            String secondaryStructure = br.readLine();
            secondaryStructure = secondaryStructure.replace("-", "C");
            secondaryStructures.put(scopID, secondaryStructure);
        } else if (entry.getName().contains(".fasta")) {
            String scopID = br.readLine().substring(1);
            String sequence = br.readLine();
            scopIDs.add(scopID);
            sequences.put(scopID, sequence);
            if (entry.getName().contains("training/"))
                trained.put(scopID, "true");
            else if (entry.getName().contains("blind/"))
                trained.put(scopID, "false");
        }
    }
    tarIn.close();
    Iterator<String> iter = scopIDs.iterator();
    while (iter.hasNext()) {
        String scopID = iter.next();
        res.add(RowFactory.create(scopID, sequences.get(scopID), secondaryStructures.get(scopID), trained.get(scopID)));
    }
    SparkSession spark = SparkSession.builder().getOrCreate();
    @SuppressWarnings("resource") JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
    JavaRDD<Row> data = sc.parallelize(res);
    return JavaRDDToDataset.getDataset(data, "scopID", "sequence", "secondaryStructure", "trained");
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) SparkSession(org.apache.spark.sql.SparkSession) InputStreamReader(java.io.InputStreamReader) HashMap(java.util.HashMap) BufferedInputStream(java.io.BufferedInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) URL(java.net.URL) URLConnection(java.net.URLConnection) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) BufferedInputStream(java.io.BufferedInputStream) BufferedReader(java.io.BufferedReader) Row(org.apache.spark.sql.Row) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HashSet(java.util.HashSet)

Example 90 with TarArchiveInputStream

use of org.apache.commons.compress.archivers.tar.TarArchiveInputStream in project AmazeFileManager by TeamAmaze.

the class GzipExtractor method extractWithFilter.

@Override
protected void extractWithFilter(@NonNull Filter filter) throws IOException {
    long totalBytes = 0;
    ArrayList<TarArchiveEntry> archiveEntries = new ArrayList<>();
    TarArchiveInputStream inputStream = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(filePath)));
    TarArchiveEntry tarArchiveEntry;
    while ((tarArchiveEntry = inputStream.getNextTarEntry()) != null) {
        if (filter.shouldExtract(tarArchiveEntry.getName(), tarArchiveEntry.isDirectory())) {
            archiveEntries.add(tarArchiveEntry);
            totalBytes += tarArchiveEntry.getSize();
        }
    }
    listener.onStart(totalBytes, archiveEntries.get(0).getName());
    inputStream.close();
    inputStream = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(filePath)));
    for (TarArchiveEntry entry : archiveEntries) {
        if (!listener.isCancelled()) {
            listener.onUpdate(entry.getName());
            // TAR is sequential, you need to walk all the way to the file you want
            while (entry.hashCode() != inputStream.getNextTarEntry().hashCode()) ;
            extractEntry(context, inputStream, entry, outputPath);
        }
    }
    inputStream.close();
    listener.onFinish();
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) ArrayList(java.util.ArrayList) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) FileInputStream(java.io.FileInputStream)

Aggregations

TarArchiveInputStream (org.apache.commons.compress.archivers.tar.TarArchiveInputStream)132 TarArchiveEntry (org.apache.commons.compress.archivers.tar.TarArchiveEntry)99 File (java.io.File)52 IOException (java.io.IOException)50 FileInputStream (java.io.FileInputStream)46 GzipCompressorInputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)46 InputStream (java.io.InputStream)35 FileOutputStream (java.io.FileOutputStream)34 BufferedInputStream (java.io.BufferedInputStream)31 ByteArrayInputStream (java.io.ByteArrayInputStream)28 Test (org.junit.Test)23 ArrayList (java.util.ArrayList)20 GZIPInputStream (java.util.zip.GZIPInputStream)20 ByteArrayOutputStream (java.io.ByteArrayOutputStream)19 ArchiveEntry (org.apache.commons.compress.archivers.ArchiveEntry)17 OutputStream (java.io.OutputStream)16 Path (java.nio.file.Path)16 BufferedOutputStream (java.io.BufferedOutputStream)12 ArchiveStreamFactory (org.apache.commons.compress.archivers.ArchiveStreamFactory)12 TarArchiveOutputStream (org.apache.commons.compress.archivers.tar.TarArchiveOutputStream)8