Search in sources :

Example 1 with NoCloseInputStream

use of com.github.lindenb.jvarkit.io.NoCloseInputStream in project jvarkit by lindenb.

the class VcfToBed method doWork.

@Override
public int doWork(final List<String> args) {
    if (!StringUtil.isBlank(this.maxLengthStr)) {
        this.maxLength = new DistanceParser.StringConverter().convert(this.maxLengthStr);
    }
    if (!StringUtil.isBlank(this.minLengthStr)) {
        this.minLength = new DistanceParser.StringConverter().convert(this.minLengthStr);
    }
    PrintWriter pw = null;
    try {
        if (this.faidx != null) {
            this.samSequenceDictionary = SequenceDictionaryUtils.extractRequired(this.faidx);
        }
        pw = super.openPathOrStdoutAsPrintWriter(this.outputFile);
        if (printHeader) {
            switch(this.outputFormat) {
                case bed:
                    {
                        pw.println("track name=vcf2bed type=bed description=\"__DESCRIPTION__\"");
                        break;
                    }
                case interval:
                    {
                        if (this.samSequenceDictionary != null) {
                            final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
                            codec.encode(pw, new SAMFileHeader(this.samSequenceDictionary));
                        }
                        break;
                    }
                default:
                    {
                        throw new IllegalStateException("" + this.outputFormat);
                    }
            }
        }
        if (args.size() == 1 && args.get(0).endsWith(".list")) {
            final PrintWriter finalpw = pw;
            Files.lines(Paths.get(args.get(0))).filter(L -> !StringUtil.isBlank(L)).forEach(L -> {
                scan(L, finalpw);
            });
        } else if (args.size() == 1 && args.get(0).endsWith(".zip")) {
            try (InputStream in = new BufferedInputStream(Files.newInputStream(Paths.get(args.get(0))))) {
                ZipInputStream zin = new ZipInputStream(in);
                for (; ; ) {
                    final ZipEntry entry = zin.getNextEntry();
                    if (entry == null)
                        break;
                    if (entry.isDirectory()) {
                        // zin.closeEntry();
                        continue;
                    }
                    if (!FileExtensions.VCF_LIST.stream().anyMatch(X -> entry.getName().endsWith(X))) {
                        // zin.closeEntry();
                        continue;
                    }
                    /* prevent zip from being closed */
                    final InputStream do_not_close_in = new NoCloseInputStream(zin);
                    try (VCFIterator iter = new VCFIteratorBuilder().open(do_not_close_in)) {
                        scan(args.get(0) + "!" + entry.getName(), iter, pw);
                    }
                // zin.closeEntry();
                }
                zin.close();
            }
        } else if (args.size() == 1 && (args.get(0).endsWith(".tar") || args.get(0).endsWith(".tar.gz"))) {
            try (InputStream in = new BufferedInputStream(Files.newInputStream(Paths.get(args.get(0))))) {
                InputStream in2 = args.get(0).endsWith(".tar") ? in : new GZIPInputStream(in);
                final TarArchiveInputStream tarin = new TarArchiveInputStream(in2);
                for (; ; ) {
                    final TarArchiveEntry entry = tarin.getNextTarEntry();
                    if (entry == null)
                        break;
                    if (!tarin.canReadEntryData(entry))
                        continue;
                    if (entry.isDirectory()) {
                        continue;
                    }
                    if (!FileExtensions.VCF_LIST.stream().anyMatch(X -> entry.getName().endsWith(X))) {
                        continue;
                    }
                    /* prevent tar from being closed */
                    final InputStream do_not_close_in = new NoCloseInputStream(tarin);
                    try (VCFIterator iter = new VCFIteratorBuilder().open(do_not_close_in)) {
                        scan(args.get(0) + "!" + entry.getName(), iter, pw);
                    }
                }
                tarin.close();
                in2.close();
            }
        } else if (args.isEmpty()) {
            LOG.info("reading vcf from stdin");
            scan(null, pw);
        } else {
            final PrintWriter finalpw = pw;
            args.stream().forEach(L -> {
                scan(L, finalpw);
            });
        }
        if (!this.contigsNotFound.isEmpty()) {
            LOG.warn("The following contigs: " + String.join(",", this.contigsNotFound) + " were not found in the dictionaries.");
        }
        pw.flush();
        pw.close();
        pw = null;
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(pw);
    }
}
Also used : IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ZipInputStream(java.util.zip.ZipInputStream) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) Function(java.util.function.Function) SAMFileHeader(htsjdk.samtools.SAMFileHeader) TreeSet(java.util.TreeSet) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) NoCloseInputStream(com.github.lindenb.jvarkit.io.NoCloseInputStream) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) StringUtil(htsjdk.samtools.util.StringUtil) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) ZipEntry(java.util.zip.ZipEntry) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) Files(java.nio.file.Files) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) VCFIteratorBuilder(htsjdk.variant.vcf.VCFIteratorBuilder) Collectors(java.util.stream.Collectors) List(java.util.List) Paths(java.nio.file.Paths) FileExtensions(htsjdk.samtools.util.FileExtensions) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) InputStream(java.io.InputStream) VCFIteratorBuilder(htsjdk.variant.vcf.VCFIteratorBuilder) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ZipInputStream(java.util.zip.ZipInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) NoCloseInputStream(com.github.lindenb.jvarkit.io.NoCloseInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) GZIPInputStream(java.util.zip.GZIPInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ZipInputStream(java.util.zip.ZipInputStream) NoCloseInputStream(com.github.lindenb.jvarkit.io.NoCloseInputStream) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) BufferedInputStream(java.io.BufferedInputStream) SAMFileHeader(htsjdk.samtools.SAMFileHeader) VCFIterator(htsjdk.variant.vcf.VCFIterator) PrintWriter(java.io.PrintWriter)

Aggregations

Parameter (com.beust.jcommander.Parameter)1 NoCloseInputStream (com.github.lindenb.jvarkit.io.NoCloseInputStream)1 IntervalExtender (com.github.lindenb.jvarkit.samtools.util.IntervalExtender)1 SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)1 DistanceParser (com.github.lindenb.jvarkit.util.bio.DistanceParser)1 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)1 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)1 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)1 NoSplitter (com.github.lindenb.jvarkit.util.jcommander.NoSplitter)1 Program (com.github.lindenb.jvarkit.util.jcommander.Program)1 Logger (com.github.lindenb.jvarkit.util.log.Logger)1 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 SAMTextHeaderCodec (htsjdk.samtools.SAMTextHeaderCodec)1 CloserUtil (htsjdk.samtools.util.CloserUtil)1 FileExtensions (htsjdk.samtools.util.FileExtensions)1 Locatable (htsjdk.samtools.util.Locatable)1 StringUtil (htsjdk.samtools.util.StringUtil)1 VariantContext (htsjdk.variant.variantcontext.VariantContext)1