Search in sources :

Example 1 with Term

use of com.github.lindenb.jvarkit.util.so.SequenceOntologyTree.Term in project jvarkit by lindenb.

the class VCFCompare method doWork.

@Override
public int doWork(final List<String> args) {
    if (args.isEmpty()) {
        LOG.error("VCFs missing.");
        return -1;
    }
    if (args.size() != 2) {
        System.err.println("Illegal number or arguments. Expected two VCFs");
        return -1;
    }
    PrintWriter pw = null;
    XMLStreamWriter w = null;
    InputStream in = null;
    SortingCollection<LineAndFile> variants = null;
    try {
        LineAndFileComparator varcmp = new LineAndFileComparator();
        variants = SortingCollection.newInstance(LineAndFile.class, new LineAndFileCodec(), varcmp, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        variants.setDestructiveIteration(true);
        for (int i = 0; i < 2; ++i) {
            this.inputs[i] = new Input();
            this.inputs[i].codec = VCFUtils.createDefaultVCFCodec();
            this.inputs[i].filename = args.get(i);
            LOG.info("Opening " + this.inputs[i].filename);
            in = IOUtils.openURIForReading(this.inputs[i].filename);
            final LineReader lr = new SynchronousLineReader(in);
            final LineIterator li = new LineIteratorImpl(lr);
            this.inputs[i].header = (VCFHeader) this.inputs[i].codec.readActualHeader(li);
            this.inputs[i].vepPredictionParser = new VepPredictionParserFactory(this.inputs[i].header).get();
            this.inputs[i].snpEffPredictionParser = new SnpEffPredictionParserFactory(this.inputs[i].header).get();
            this.inputs[i].annPredictionParser = new AnnPredictionParserFactory(this.inputs[i].header).get();
            while (li.hasNext()) {
                LineAndFile laf = new LineAndFile();
                laf.fileIdx = i;
                laf.line = li.next();
                variants.add(laf);
            }
            LOG.info("Done Reading " + this.inputs[i].filename);
            CloserUtil.close(li);
            CloserUtil.close(lr);
            CloserUtil.close(in);
        }
        variants.doneAdding();
        LOG.info("Done Adding");
        Set<String> commonSamples = new TreeSet<String>(this.inputs[0].header.getSampleNamesInOrder());
        commonSamples.retainAll(this.inputs[1].header.getSampleNamesInOrder());
        List<Venn0> venn1List = new ArrayList<VCFCompare.Venn0>();
        venn1List.add(new Venn1("ALL"));
        venn1List.add(new Venn1("having ID") {

            @Override
            public VariantContext filter(VariantContext ctx, int fileIndex) {
                return ctx == null || !ctx.hasID() ? null : ctx;
            }
        });
        venn1List.add(new Venn1("QUAL greater 30") {

            @Override
            public VariantContext filter(VariantContext ctx, int fileIndex) {
                return ctx == null || !ctx.hasLog10PError() || ctx.getPhredScaledQual() < 30.0 ? null : ctx;
            }
        });
        for (VariantContext.Type t : VariantContext.Type.values()) {
            venn1List.add(new VennType(t));
        }
        for (SequenceOntologyTree.Term term : SequenceOntologyTree.getInstance().getTerms()) {
            venn1List.add(new VennPred("vep", term) {

                @Override
                Set<Term> terms(VariantContext ctx, int file_id) {
                    Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
                    for (VepPredictionParser.VepPrediction pred : VCFCompare.this.inputs[file_id].vepPredictionParser.getPredictions(ctx)) {
                        tt.addAll(pred.getSOTerms());
                    }
                    return tt;
                }
            });
            venn1List.add(new VennPred("SnpEff", term) {

                @Override
                Set<Term> terms(VariantContext ctx, int file_id) {
                    Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
                    for (SnpEffPredictionParser.SnpEffPrediction pred : VCFCompare.this.inputs[file_id].snpEffPredictionParser.getPredictions(ctx)) {
                        tt.addAll(pred.getSOTerms());
                    }
                    return tt;
                }
            });
            venn1List.add(new VennPred("ANN", term) {

                @Override
                Set<Term> terms(VariantContext ctx, int file_id) {
                    Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
                    for (AnnPredictionParser.AnnPrediction pred : VCFCompare.this.inputs[file_id].annPredictionParser.getPredictions(ctx)) {
                        tt.addAll(pred.getSOTerms());
                    }
                    return tt;
                }
            });
        }
        for (String s : commonSamples) {
            venn1List.add(new VennGType(s));
        }
        /* START : digest results ====================== */
        Counter<String> diff = new Counter<String>();
        List<LineAndFile> row = new ArrayList<LineAndFile>();
        CloseableIterator<LineAndFile> iter = variants.iterator();
        for (; ; ) {
            LineAndFile rec = null;
            if (iter.hasNext()) {
                rec = iter.next();
            }
            if (rec == null || (!row.isEmpty() && varcmp.compare(row.get(0), rec) != 0)) {
                if (!row.isEmpty()) {
                    diff.incr("count.variations");
                    VariantContext[] contexes_init = new VariantContext[] { null, null };
                    for (LineAndFile var : row) {
                        if (contexes_init[var.fileIdx] != null) {
                            LOG.error("Duplicate context in " + inputs[var.fileIdx].filename + " : " + var.line);
                            continue;
                        }
                        contexes_init[var.fileIdx] = var.getContext();
                    }
                    for (Venn0 venn : venn1List) {
                        venn.visit(contexes_init);
                    }
                    row.clear();
                }
                if (rec == null)
                    break;
            }
            row.add(rec);
        }
        iter.close();
        /* END : digest results ====================== */
        pw = super.openFileOrStdoutAsPrintWriter(outputFile);
        XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
        w = xmlfactory.createXMLStreamWriter(pw);
        w.writeStartElement("html");
        w.writeStartElement("body");
        /* specific samples */
        w.writeStartElement("div");
        w.writeStartElement("dl");
        for (int i = 0; i < 3; ++i) {
            String title;
            Set<String> samples;
            switch(i) {
                case 0:
                case 1:
                    title = "Sample(s) for " + this.inputs[i].filename + ".";
                    samples = new TreeSet<String>(this.inputs[i].header.getSampleNamesInOrder());
                    samples.removeAll(commonSamples);
                    break;
                default:
                    title = "Common Sample(s).";
                    samples = new TreeSet<String>(commonSamples);
                    break;
            }
            w.writeStartElement("dt");
            w.writeCharacters(title);
            w.writeEndElement();
            w.writeStartElement("dd");
            w.writeStartElement("ol");
            for (String s : samples) {
                w.writeStartElement("li");
                w.writeCharacters(s);
                w.writeEndElement();
            }
            w.writeEndElement();
            w.writeEndElement();
        }
        // dl
        w.writeEndElement();
        // div
        w.writeEndElement();
        for (Venn0 v : venn1List) {
            v.write(w);
        }
        // body
        w.writeEndElement();
        // html
        w.writeEndElement();
        w.writeEndDocument();
        w.close();
        w = null;
        pw.flush();
        pw.close();
        pw = null;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(w);
        CloserUtil.close(pw);
        if (variants != null)
            variants.cleanup();
    }
    return 0;
}
Also used : Term(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree.Term) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Set(java.util.Set) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) Counter(com.github.lindenb.jvarkit.util.Counter) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) TreeSet(java.util.TreeSet) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) LineReader(htsjdk.tribble.readers.LineReader) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) PrintWriter(java.io.PrintWriter) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) SnpEffPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory) Term(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree.Term) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Aggregations

Counter (com.github.lindenb.jvarkit.util.Counter)1 SequenceOntologyTree (com.github.lindenb.jvarkit.util.so.SequenceOntologyTree)1 Term (com.github.lindenb.jvarkit.util.so.SequenceOntologyTree.Term)1 AnnPredictionParserFactory (com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory)1 SnpEffPredictionParserFactory (com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory)1 VepPredictionParserFactory (com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)1 LineIterator (htsjdk.tribble.readers.LineIterator)1 LineIteratorImpl (htsjdk.tribble.readers.LineIteratorImpl)1 LineReader (htsjdk.tribble.readers.LineReader)1 SynchronousLineReader (htsjdk.tribble.readers.SynchronousLineReader)1 VariantContext (htsjdk.variant.variantcontext.VariantContext)1 DataInputStream (java.io.DataInputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 PrintWriter (java.io.PrintWriter)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 TreeSet (java.util.TreeSet)1 XMLOutputFactory (javax.xml.stream.XMLOutputFactory)1