Search in sources :

Example 36 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfBurdenSplitter method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, File outorNull) {
    SortingCollection<KeyAndLine> sortingcollection = null;
    BufferedReader in = null;
    CloseableIterator<KeyAndLine> iter = null;
    PrintStream pw = null;
    PrintWriter allDiscardedLog = null;
    try {
        in = inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName);
        if (this.allFilteredFileOut != null) {
            allDiscardedLog = IOUtils.openFileForPrintWriter(this.allFilteredFileOut);
        }
        final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(in);
        /**
         * find splitter by name
         */
        Splitter splitter = null;
        for (final Splitter s : this.splitters) {
            if (this.splitterName.equals(s.getName())) {
                splitter = s;
                break;
            }
        }
        if (splitter == null) {
            return wrapException("Cannot find a splitter named " + this.splitterName);
        }
        splitter.initialize(cah.header);
        LOG.info("splitter is " + splitter);
        pw = super.openFileOrStdoutAsPrintStream(outorNull);
        // read variants
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(cah.header);
        String prev_contig = null;
        for (; ; ) {
            final String line = in.readLine();
            final VariantContext variant = (line == null ? null : progess.watch(cah.codec.decode(line)));
            if (variant == null || !variant.getContig().equals(prev_contig)) {
                if (sortingcollection != null) {
                    sortingcollection.doneAdding();
                    iter = sortingcollection.iterator();
                    LOG.info("dumping data for CONTIG: \"" + prev_contig + "\"");
                    final EqualRangeIterator<KeyAndLine> eqiter = new EqualRangeIterator<>(iter, new Comparator<KeyAndLine>() {

                        @Override
                        public int compare(final KeyAndLine o1, final KeyAndLine o2) {
                            return o1.key.compareTo(o2.key);
                        }
                    });
                    while (eqiter.hasNext()) {
                        final List<KeyAndLine> buffer = eqiter.next();
                        final KeyAndLine first = buffer.get(0);
                        LOG.info(first.key);
                        final List<VariantContext> variants = new ArrayList<>(buffer.size());
                        boolean has_only_filtered = true;
                        for (final KeyAndLine kal : buffer) {
                            final VariantContext ctx = cah.codec.decode(kal.ctx);
                            variants.add(ctx);
                            if (isDebuggingVariant(ctx)) {
                                LOG.info("Adding variant to list for key " + kal.key + " " + shortName(ctx));
                            }
                            if (!ctx.getContig().equals(prev_contig)) {
                                eqiter.close();
                                return wrapException("illegal state");
                            }
                            if (!ctx.isFiltered() || this.acceptFiltered) {
                                has_only_filtered = false;
                            // break; NOOOONNN !!!
                            }
                        }
                        // all ctx are filtered
                        if (has_only_filtered) {
                            LOG.warn("ALL IS FILTERED in " + first.key);
                            if (allDiscardedLog != null) {
                                for (final VariantContext ctx : variants) {
                                    if (isDebuggingVariant(ctx)) {
                                        LOG.info("Variant " + shortName(ctx) + " is part of never filtered for " + first.key);
                                    }
                                    allDiscardedLog.println(String.join("\t", first.key, ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().getDisplayString(), ctx.getAlternateAllele(0).getDisplayString(), String.valueOf(ctx.getFilters())));
                                }
                            }
                            continue;
                        }
                        // save vcf file
                        final VariantContextWriter out = VCFUtils.createVariantContextWriterToOutputStream(IOUtils.uncloseableOutputStream(pw));
                        final VCFHeader header2 = addMetaData(new VCFHeader(cah.header));
                        header2.addMetaDataLine(new VCFHeaderLine(VCF_HEADER_SPLITKEY, first.key));
                        out.writeHeader(header2);
                        for (final VariantContext ctx : variants) {
                            if (isDebuggingVariant(ctx)) {
                                LOG.info("saving variant " + shortName(ctx) + " to final output with key=" + first.key);
                            }
                            out.add(ctx);
                        }
                        // yes because wrapped into IOUtils.encloseableOutputSream
                        out.close();
                        pw.flush();
                    }
                    eqiter.close();
                    iter.close();
                    iter = null;
                    // dispose sorting collection
                    sortingcollection.cleanup();
                    sortingcollection = null;
                }
                // EOF met
                if (variant == null)
                    break;
                prev_contig = variant.getContig();
            }
            if (sortingcollection == null) {
                /* create sorting collection for new contig */
                sortingcollection = SortingCollection.newInstance(KeyAndLine.class, new KeyAndLineCodec(), new KeyAndLineComparator(), this.writingSortingCollection.maxRecordsInRam, this.writingSortingCollection.getTmpPaths());
                sortingcollection.setDestructiveIteration(true);
            }
            if (variant.getAlternateAlleles().size() != 1) {
                return wrapException("Expected only one allele per variant. Please use VcfMultiToOneAllele https://github.com/lindenb/jvarkit/wiki/VcfMultiToOneAllele.");
            }
            // no check for ctx.ifFiltered here, we do this later.
            for (final String key : splitter.keys(variant)) {
                if (isDebuggingVariant(variant)) {
                    LOG.info("Adding variant with key " + key + " " + shortName(variant));
                }
                sortingcollection.add(new KeyAndLine(key, line));
            }
        }
        progess.finish();
        pw.flush();
        pw.close();
        pw = null;
        if (allDiscardedLog != null) {
            allDiscardedLog.flush();
            allDiscardedLog.close();
            allDiscardedLog = null;
        }
        return RETURN_OK;
    } catch (final Exception err) {
        return wrapException(err);
    } finally {
        CloserUtil.close(iter);
        if (sortingcollection != null)
            sortingcollection.cleanup();
        CloserUtil.close(in);
        CloserUtil.close(pw);
        CloserUtil.close(allDiscardedLog);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) PrintStream(java.io.PrintStream) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader)

Example 37 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfFilterNotInPedigree method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter delegate) {
    final VariantContextWriter out = this.component.open(delegate);
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
    out.writeHeader(in.getHeader());
    while (in.hasNext()) {
        out.add(progess.watch(in.next()));
    }
    progess.finish();
    out.close();
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) DelegateVariantContextWriter(com.github.lindenb.jvarkit.util.vcf.DelegateVariantContextWriter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Example 38 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfInjectPedigree method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter delegate) {
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
    final VariantContextWriter out = this.component.open(delegate);
    out.writeHeader(in.getHeader());
    while (in.hasNext()) {
        out.add(progess.watch(in.next()));
    }
    progess.finish();
    out.close();
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) DelegateVariantContextWriter(com.github.lindenb.jvarkit.util.vcf.DelegateVariantContextWriter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Example 39 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class CaseControlPlot method doWork.

@Override
public int doWork(final List<String> args) {
    ArchiveFactory archiveFactory = null;
    VcfIterator in = null;
    VariantContextWriter teeVariantWriter = null;
    final List<CaseControlExtractor> excractors = new ArrayList<>();
    try {
        in = super.openVcfIterator(oneFileOrNull(args));
        final VCFHeader header = in.getHeader();
        excractors.addAll(parseConfigFile(header));
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = Pedigree.newParser().parse(this.pedigreeFile);
        } else {
            pedigree = Pedigree.newParser().parse(header);
        }
        if (pedigree == null || pedigree.isEmpty()) {
            LOG.error("No pedigree defined , or it is empty");
            return -1;
        }
        final Set<Pedigree.Person> casepersons = pedigree.getPersons().stream().filter(F -> F.isAffected() && header.getSampleNamesInOrder().indexOf(F.getId()) != -1).collect(Collectors.toSet());
        if (casepersons.isEmpty()) {
            LOG.error("No Affected individuals in pedigree/header");
            return -1;
        }
        final Set<Pedigree.Person> controlpersons = pedigree.getPersons().stream().filter(F -> F.isUnaffected() && header.getSampleNamesInOrder().indexOf(F.getId()) != -1).collect(Collectors.toSet());
        if (controlpersons.isEmpty()) {
            LOG.error("No Unaffected individuals in pedigree/header");
            return -1;
        }
        if (this.teeToStdout) {
            teeVariantWriter = super.openVariantContextWriter(null);
            teeVariantWriter.writeHeader(header);
        }
        archiveFactory = ArchiveFactory.open(this.outputDirOrZip);
        for (final CaseControlExtractor extractor : excractors) {
            extractor.pw = archiveFactory.openWriter(extractor.name);
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        while (in.hasNext()) {
            final VariantContext ctx = progress.watch(in.next());
            if (teeVariantWriter != null)
                teeVariantWriter.add(ctx);
            for (final CaseControlExtractor handler : excractors) {
                handler.visit(ctx, casepersons, controlpersons);
            }
        }
        for (final CaseControlExtractor handler : excractors) {
            handler.close();
        }
        progress.finish();
        if (teeVariantWriter != null) {
            teeVariantWriter.close();
            teeVariantWriter = null;
        }
        in.close();
        in = null;
        archiveFactory.close();
        archiveFactory = null;
        return RETURN_OK;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(archiveFactory);
        CloserUtil.close(teeVariantWriter);
        for (final CaseControlExtractor handler : excractors) {
            handler.close();
        }
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) Document(org.w3c.dom.Document) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) Node(org.w3c.dom.Node) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) ScriptException(javax.script.ScriptException) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) SimpleBindings(javax.script.SimpleBindings) List(java.util.List) Element(org.w3c.dom.Element) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) VariantContext(htsjdk.variant.variantcontext.VariantContext) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) ScriptException(javax.script.ScriptException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 40 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfBurdenFilterGenes method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final VCFHeader header = in.getHeader();
    try {
        final VCFHeader h2 = addMetaData(new VCFHeader(header));
        final VCFFilterHeaderLine filterControlsHeader;
        if (!StringUtil.isBlank(this.filterTag)) {
            filterControlsHeader = new VCFFilterHeaderLine(this.filterTag.trim(), "Genes not in list " + this.geneFile);
            h2.addMetaDataLine(filterControlsHeader);
        } else {
            filterControlsHeader = null;
        }
        final List<String> lookColumns = Arrays.asList("CCDS", "Feature", "ENSP", "Gene", "HGNC", "HGNC_ID", "SYMBOL", "RefSeq");
        final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
        final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary()).logger(LOG);
        out.writeHeader(h2);
        while (in.hasNext() && !out.checkError()) {
            final VariantContext ctx = progess.watch(in.next());
            boolean keep = false;
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            // not just set FILTER ?
            if (filterControlsHeader == null) {
                vcb.rmAttribute(vepParser.getTag());
                vcb.rmAttribute(annParser.getTag());
            }
            final List<String> newVepList = new ArrayList<>();
            for (final String predStr : ctx.getAttributeAsList(vepParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final VepPredictionParser.VepPrediction pred = vepParser.parseOnePrediction(ctx, predStr);
                for (final String col : lookColumns) {
                    final String token = pred.getByCol(col);
                    if (!StringUtil.isBlank(token) && this.geneNames.contains(token)) {
                        newVepList.add(predStr);
                        keep = true;
                        break;
                    }
                }
            }
            final List<String> newEffList = new ArrayList<>();
            for (final String predStr : ctx.getAttributeAsList(annParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final AnnPredictionParser.AnnPrediction pred = annParser.parseOnePrediction(predStr);
                final String token = pred.getGeneName();
                if (!StringUtil.isBlank(token) && this.geneNames.contains(token)) {
                    newEffList.add(predStr);
                    keep = true;
                    break;
                }
            }
            // not just set FILTER ?
            if (filterControlsHeader == null) {
                if (!newVepList.isEmpty())
                    vcb.attribute(vepParser.getTag(), newVepList);
                if (!newEffList.isEmpty())
                    vcb.attribute(annParser.getTag(), newEffList);
            }
            if (filterControlsHeader != null) {
                if (!keep) {
                    vcb.filter(filterControlsHeader.getID());
                } else if (!ctx.isFiltered()) {
                    vcb.passFilters();
                }
                out.add(vcb.make());
            } else {
                if (keep)
                    out.add(vcb.make());
            }
        }
        progess.finish();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
    }
}
Also used : AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringUtil(htsjdk.samtools.util.StringUtil) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Files(java.nio.file.Files) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Aggregations

VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)132 File (java.io.File)67 VCFHeader (htsjdk.variant.vcf.VCFHeader)65 VariantContext (htsjdk.variant.variantcontext.VariantContext)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)53 ArrayList (java.util.ArrayList)41 IOException (java.io.IOException)38 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)35 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)34 HashSet (java.util.HashSet)32 List (java.util.List)29 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)28 Allele (htsjdk.variant.variantcontext.Allele)28 Collectors (java.util.stream.Collectors)27 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)26 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)26 Parameter (com.beust.jcommander.Parameter)24 Logger (com.github.lindenb.jvarkit.util.log.Logger)24 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)23 Program (com.github.lindenb.jvarkit.util.jcommander.Program)23