Search in sources :

Example 11 with LineIteratorImpl

use of htsjdk.tribble.readers.LineIteratorImpl in project jvarkit by lindenb.

the class VCFCompare method doWork.

@Override
public int doWork(final List<String> args) {
    if (args.isEmpty()) {
        LOG.error("VCFs missing.");
        return -1;
    }
    if (args.size() != 2) {
        System.err.println("Illegal number or arguments. Expected two VCFs");
        return -1;
    }
    PrintWriter pw = null;
    XMLStreamWriter w = null;
    InputStream in = null;
    SortingCollection<LineAndFile> variants = null;
    try {
        LineAndFileComparator varcmp = new LineAndFileComparator();
        variants = SortingCollection.newInstance(LineAndFile.class, new LineAndFileCodec(), varcmp, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        variants.setDestructiveIteration(true);
        for (int i = 0; i < 2; ++i) {
            this.inputs[i] = new Input();
            this.inputs[i].codec = VCFUtils.createDefaultVCFCodec();
            this.inputs[i].filename = args.get(i);
            LOG.info("Opening " + this.inputs[i].filename);
            in = IOUtils.openURIForReading(this.inputs[i].filename);
            final LineReader lr = new SynchronousLineReader(in);
            final LineIterator li = new LineIteratorImpl(lr);
            this.inputs[i].header = (VCFHeader) this.inputs[i].codec.readActualHeader(li);
            this.inputs[i].vepPredictionParser = new VepPredictionParserFactory(this.inputs[i].header).get();
            this.inputs[i].snpEffPredictionParser = new SnpEffPredictionParserFactory(this.inputs[i].header).get();
            this.inputs[i].annPredictionParser = new AnnPredictionParserFactory(this.inputs[i].header).get();
            while (li.hasNext()) {
                LineAndFile laf = new LineAndFile();
                laf.fileIdx = i;
                laf.line = li.next();
                variants.add(laf);
            }
            LOG.info("Done Reading " + this.inputs[i].filename);
            CloserUtil.close(li);
            CloserUtil.close(lr);
            CloserUtil.close(in);
        }
        variants.doneAdding();
        LOG.info("Done Adding");
        Set<String> commonSamples = new TreeSet<String>(this.inputs[0].header.getSampleNamesInOrder());
        commonSamples.retainAll(this.inputs[1].header.getSampleNamesInOrder());
        List<Venn0> venn1List = new ArrayList<VCFCompare.Venn0>();
        venn1List.add(new Venn1("ALL"));
        venn1List.add(new Venn1("having ID") {

            @Override
            public VariantContext filter(VariantContext ctx, int fileIndex) {
                return ctx == null || !ctx.hasID() ? null : ctx;
            }
        });
        venn1List.add(new Venn1("QUAL greater 30") {

            @Override
            public VariantContext filter(VariantContext ctx, int fileIndex) {
                return ctx == null || !ctx.hasLog10PError() || ctx.getPhredScaledQual() < 30.0 ? null : ctx;
            }
        });
        for (VariantContext.Type t : VariantContext.Type.values()) {
            venn1List.add(new VennType(t));
        }
        for (SequenceOntologyTree.Term term : SequenceOntologyTree.getInstance().getTerms()) {
            venn1List.add(new VennPred("vep", term) {

                @Override
                Set<Term> terms(VariantContext ctx, int file_id) {
                    Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
                    for (VepPredictionParser.VepPrediction pred : VCFCompare.this.inputs[file_id].vepPredictionParser.getPredictions(ctx)) {
                        tt.addAll(pred.getSOTerms());
                    }
                    return tt;
                }
            });
            venn1List.add(new VennPred("SnpEff", term) {

                @Override
                Set<Term> terms(VariantContext ctx, int file_id) {
                    Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
                    for (SnpEffPredictionParser.SnpEffPrediction pred : VCFCompare.this.inputs[file_id].snpEffPredictionParser.getPredictions(ctx)) {
                        tt.addAll(pred.getSOTerms());
                    }
                    return tt;
                }
            });
            venn1List.add(new VennPred("ANN", term) {

                @Override
                Set<Term> terms(VariantContext ctx, int file_id) {
                    Set<Term> tt = new HashSet<SequenceOntologyTree.Term>();
                    for (AnnPredictionParser.AnnPrediction pred : VCFCompare.this.inputs[file_id].annPredictionParser.getPredictions(ctx)) {
                        tt.addAll(pred.getSOTerms());
                    }
                    return tt;
                }
            });
        }
        for (String s : commonSamples) {
            venn1List.add(new VennGType(s));
        }
        /* START : digest results ====================== */
        Counter<String> diff = new Counter<String>();
        List<LineAndFile> row = new ArrayList<LineAndFile>();
        CloseableIterator<LineAndFile> iter = variants.iterator();
        for (; ; ) {
            LineAndFile rec = null;
            if (iter.hasNext()) {
                rec = iter.next();
            }
            if (rec == null || (!row.isEmpty() && varcmp.compare(row.get(0), rec) != 0)) {
                if (!row.isEmpty()) {
                    diff.incr("count.variations");
                    VariantContext[] contexes_init = new VariantContext[] { null, null };
                    for (LineAndFile var : row) {
                        if (contexes_init[var.fileIdx] != null) {
                            LOG.error("Duplicate context in " + inputs[var.fileIdx].filename + " : " + var.line);
                            continue;
                        }
                        contexes_init[var.fileIdx] = var.getContext();
                    }
                    for (Venn0 venn : venn1List) {
                        venn.visit(contexes_init);
                    }
                    row.clear();
                }
                if (rec == null)
                    break;
            }
            row.add(rec);
        }
        iter.close();
        /* END : digest results ====================== */
        pw = super.openFileOrStdoutAsPrintWriter(outputFile);
        XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
        w = xmlfactory.createXMLStreamWriter(pw);
        w.writeStartElement("html");
        w.writeStartElement("body");
        /* specific samples */
        w.writeStartElement("div");
        w.writeStartElement("dl");
        for (int i = 0; i < 3; ++i) {
            String title;
            Set<String> samples;
            switch(i) {
                case 0:
                case 1:
                    title = "Sample(s) for " + this.inputs[i].filename + ".";
                    samples = new TreeSet<String>(this.inputs[i].header.getSampleNamesInOrder());
                    samples.removeAll(commonSamples);
                    break;
                default:
                    title = "Common Sample(s).";
                    samples = new TreeSet<String>(commonSamples);
                    break;
            }
            w.writeStartElement("dt");
            w.writeCharacters(title);
            w.writeEndElement();
            w.writeStartElement("dd");
            w.writeStartElement("ol");
            for (String s : samples) {
                w.writeStartElement("li");
                w.writeCharacters(s);
                w.writeEndElement();
            }
            w.writeEndElement();
            w.writeEndElement();
        }
        // dl
        w.writeEndElement();
        // div
        w.writeEndElement();
        for (Venn0 v : venn1List) {
            v.write(w);
        }
        // body
        w.writeEndElement();
        // html
        w.writeEndElement();
        w.writeEndDocument();
        w.close();
        w = null;
        pw.flush();
        pw.close();
        pw = null;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(w);
        CloserUtil.close(pw);
        if (variants != null)
            variants.cleanup();
    }
    return 0;
}
Also used : Term(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree.Term) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Set(java.util.Set) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) Counter(com.github.lindenb.jvarkit.util.Counter) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) TreeSet(java.util.TreeSet) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) LineReader(htsjdk.tribble.readers.LineReader) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) PrintWriter(java.io.PrintWriter) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) SnpEffPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory) Term(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree.Term) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 12 with LineIteratorImpl

use of htsjdk.tribble.readers.LineIteratorImpl in project jvarkit by lindenb.

the class FixVCF method doWork.

private int doWork(String filenameIn, InputStream vcfStream, VariantContextWriter w) throws IOException {
    final AbstractVCFCodec vcfCodec = VCFUtils.createDefaultVCFCodec();
    LineIterator r = new LineIteratorImpl(new SynchronousLineReader(vcfStream));
    final VCFHeader header = (VCFHeader) vcfCodec.readActualHeader(r);
    // samples names have been changed by picard api and reordered !!!
    // re-create the original order
    List<String> sampleNamesInSameOrder = new ArrayList<String>(header.getSampleNamesInOrder().size());
    for (int col = 0; col < header.getSampleNamesInOrder().size(); ++col) {
        for (String sample : header.getSampleNameToOffset().keySet()) {
            if (header.getSampleNameToOffset().get(sample) == col) {
                sampleNamesInSameOrder.add(sample);
                break;
            }
        }
    }
    if (sampleNamesInSameOrder.size() != header.getSampleNamesInOrder().size()) {
        throw new IllegalStateException();
    }
    VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), sampleNamesInSameOrder);
    File tmp = IOUtil.newTempFile("tmp", ".vcf.gz", new File[] { tmpDir });
    tmp.deleteOnExit();
    PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(tmp)));
    while (r.hasNext()) {
        String line = r.next();
        pw.println(line);
        VariantContext ctx = null;
        try {
            ctx = vcfCodec.decode(line);
        } catch (Exception err) {
            pw.close();
            LOG.error(line);
            LOG.error(err);
            return -1;
        }
        for (String f : ctx.getFilters()) {
            if (h2.getFilterHeaderLine(f) != null)
                continue;
            // if(f.equals(VCFConstants.PASSES_FILTERS_v4)) continue; hum...
            if (f.isEmpty() || f.equals(VCFConstants.UNFILTERED))
                continue;
            LOG.info("Fixing missing Filter:" + f);
            h2.addMetaDataLine(new VCFFilterHeaderLine(f));
        }
        for (String tag : ctx.getAttributes().keySet()) {
            if (h2.getInfoHeaderLine(tag) != null)
                continue;
            LOG.info("Fixing missing INFO:" + tag);
            h2.addMetaDataLine(new VCFInfoHeaderLine(tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "undefined. Saved by " + getClass()));
        }
    }
    pw.flush();
    pw.close();
    pw = null;
    LOG.info("re-reading VCF frm tmpFile:" + tmp);
    h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName(), "Saved VCF FILTER AND INFO from " + filenameIn));
    // save header in memory
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    VariantContextWriter w2 = VCFUtils.createVariantContextWriterToOutputStream(baos);
    w2.writeHeader(h2);
    w2.close();
    baos.close();
    // reopen tmp file
    @SuppressWarnings("resource") VcfIterator in = new VcfIteratorImpl(new SequenceInputStream(new ByteArrayInputStream(baos.toByteArray()), new GZIPInputStream(new FileInputStream(tmp))));
    w.writeHeader(h2);
    while (in.hasNext()) {
        w.add(in.next());
    }
    in.close();
    tmp.delete();
    return 0;
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) GZIPInputStream(java.util.zip.GZIPInputStream) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) GZIPOutputStream(java.util.zip.GZIPOutputStream) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) VcfIteratorImpl(com.github.lindenb.jvarkit.util.vcf.VcfIteratorImpl) ByteArrayOutputStream(java.io.ByteArrayOutputStream) AbstractVCFCodec(htsjdk.variant.vcf.AbstractVCFCodec) IOException(java.io.IOException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) FileInputStream(java.io.FileInputStream) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 13 with LineIteratorImpl

use of htsjdk.tribble.readers.LineIteratorImpl in project jvarkit by lindenb.

the class VCFMerge method workUsingSortingCollection.

private int workUsingSortingCollection() {
    VariantContextWriter w = null;
    SortingCollection<VariantOfFile> array = null;
    InputStream in = null;
    CloseableIterator<VariantOfFile> iter = null;
    try {
        final List<String> IN = new ArrayList<String>(this.userVcfFiles);
        final Set<String> genotypeSampleNames = new TreeSet<String>();
        final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        array = SortingCollection.newInstance(VariantOfFile.class, new VariantCodec(), new VariantComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        array.setDestructiveIteration(true);
        for (int fileIndex = 0; fileIndex < IN.size(); ++fileIndex) {
            final String vcfFile = IN.get(fileIndex);
            LOG.info("reading from " + vcfFile + " " + (fileIndex + 1) + "/" + IN.size());
            final VCFHandler handler = new VCFHandler(vcfFile);
            vcfHandlers.add(handler);
            in = IOUtils.openURIForReading(vcfFile);
            final LineReader lr = new SynchronousLineReader(in);
            final LineIterator lit = new LineIteratorImpl(lr);
            handler.header = (VCFHeader) handler.vcfCodec.readActualHeader(lit);
            final SAMSequenceDictionary dict1 = handler.header.getSequenceDictionary();
            if (dict1 == null)
                throw new RuntimeException("dictionary missing in " + vcfFile);
            if (dict1.isEmpty())
                throw new RuntimeException("dictionary is Empty in " + vcfFile);
            genotypeSampleNames.addAll(handler.header.getSampleNamesInOrder());
            metaData.addAll(handler.header.getMetaDataInInputOrder());
            if (fileIndex == 0) {
                this.global_dictionary = dict1;
            } else if (!SequenceUtil.areSequenceDictionariesEqual(global_dictionary, dict1)) {
                throw new JvarkitException.DictionariesAreNotTheSame(global_dictionary, dict1);
            }
            final Predicate<VariantOfFile> accept;
            if (!StringUtil.isBlank(VCFMerge.this.regionStr)) {
                final IntervalParser intervalParser = new IntervalParser(dict1);
                intervalParser.setContigNameIsWholeContig(true);
                final Interval rgn = intervalParser.parse(VCFMerge.this.regionStr);
                accept = (VOL) -> {
                    final VariantContext ctx = VOL.parse();
                    return rgn.intersects(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd()));
                };
            } else {
                accept = (VOL) -> true;
            }
            while (lit.hasNext()) {
                final VariantOfFile vof = new VariantOfFile();
                vof.fileIndex = fileIndex;
                vof.line = lit.next();
                if (!accept.test(vof))
                    continue;
                array.add(vof);
            }
            in.close();
            in = null;
        }
        array.doneAdding();
        LOG.info("merging..." + vcfHandlers.size() + " vcfs");
        /* CREATE THE NEW VCH Header */
        VCFHeader mergeHeader = null;
        if (this.doNotMergeRowLines) {
            metaData.add(NO_MERGE_INFO_HEADER);
        }
        mergeHeader = new VCFHeader(metaData, genotypeSampleNames);
        // create the context writer
        w = super.openVariantContextWriter(outputFile);
        w.writeHeader(mergeHeader);
        iter = array.iterator();
        final List<VariantOfFile> row = new ArrayList<VariantOfFile>();
        for (; ; ) {
            VariantOfFile var = null;
            if (iter.hasNext()) {
                var = iter.next();
            } else {
                LOG.info("end of iteration");
                if (!row.isEmpty()) {
                    for (final VariantContext merged : buildContextFromVariantOfFiles(mergeHeader, row)) {
                        w.add(merged);
                    }
                }
                break;
            }
            if (!row.isEmpty() && !row.get(0).same(var)) {
                for (final VariantContext merged : buildContextFromVariantOfFiles(mergeHeader, row)) {
                    w.add(merged);
                }
                row.clear();
            }
            row.add(var);
        }
        CloserUtil.close(w);
        w = null;
        array.cleanup();
        array = null;
        CloserUtil.close(iter);
        iter = null;
        LOG.info("done");
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        this.userVcfFiles.clear();
        CloserUtil.close(w);
        CloserUtil.close(in);
        CloserUtil.close(iter);
        if (array != null)
            array.cleanup();
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) TreeSet(java.util.TreeSet) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) LineReader(htsjdk.tribble.readers.LineReader) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Interval(htsjdk.samtools.util.Interval)

Example 14 with LineIteratorImpl

use of htsjdk.tribble.readers.LineIteratorImpl in project jvarkit by lindenb.

the class WorldMapGenome method scan.

private void scan(Graphics2D g, InputStream input) throws IOException {
    Set<String> unknownC = new HashSet<String>();
    Pattern tab = Pattern.compile("[\t]");
    LineIterator in = new LineIteratorImpl(new SynchronousLineReader(input));
    while (in.hasNext()) {
        String line = in.next();
        String[] tokens = tab.split(line, 5);
        if (tokens.length < 4) {
            LOG.warning("Ignoring " + line);
            continue;
        }
        SAMSequenceRecord rec = this.context.getDictionary().getSequence(tokens[0]);
        if (rec == null) {
            LOG.warning("unknown chromosome " + tokens[0]);
            continue;
        }
        String country = tokens[3].toLowerCase().replaceAll("[ ]", "");
        Shape shape = this.country2shape.get(country);
        if (shape == null) {
            if (!unknownC.contains(country)) {
                unknownC.add(country);
                LOG.warning("unknown country " + country);
            }
            continue;
        }
        seen.incr(country);
        int midpos = (Integer.parseInt(tokens[1]) + Integer.parseInt(tokens[2])) / 2;
        // country center
        Point2D.Double pt1 = new Point2D.Double(shape.getBounds2D().getCenterX(), shape.getBounds2D().getCenterY());
        // circle point
        Point2D pt3 = this.context.convertPositionToPoint(tokens[0], midpos, getRadiusInt());
        double angle = this.context.convertPositionToRadian(rec, midpos);
        double angle2 = angle -= Math.PI / 10.0;
        double distance13 = context.getCenter().distance(new Point2D.Double((pt1.getX() + pt3.getX()) / 2.0, (pt1.getY() + pt3.getY()) / 2.0));
        // mid point
        Point2D pt2 = new Point2D.Double(context.getCenterX() + distance13 * Math.cos(angle2), context.getCenterX() + distance13 * Math.sin(angle2));
        Composite old = g.getComposite();
        Stroke olds = g.getStroke();
        g.setStroke(new BasicStroke(0.8f));
        g.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 0.02f));
        g.setColor(Color.DARK_GRAY);
        GeneralPath p = new GeneralPath();
        p.moveTo(pt1.getX(), pt1.getY());
        p.quadTo(pt2.getX(), pt2.getY(), pt3.getX(), pt3.getY());
        p.closePath();
        g.draw(p);
        g.setComposite(old);
        g.setStroke(olds);
    }
    CloserUtil.close(in);
}
Also used : BasicStroke(java.awt.BasicStroke) Pattern(java.util.regex.Pattern) Stroke(java.awt.Stroke) BasicStroke(java.awt.BasicStroke) Shape(java.awt.Shape) AlphaComposite(java.awt.AlphaComposite) Composite(java.awt.Composite) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) LineIterator(htsjdk.tribble.readers.LineIterator) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) HashSet(java.util.HashSet)

Aggregations

LineIteratorImpl (htsjdk.tribble.readers.LineIteratorImpl)14 LineIterator (htsjdk.tribble.readers.LineIterator)13 LineReader (htsjdk.tribble.readers.LineReader)9 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)7 Test (org.testng.annotations.Test)7 SynchronousLineReader (htsjdk.tribble.readers.SynchronousLineReader)5 IOException (java.io.IOException)5 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 HashSet (java.util.HashSet)4 VCFHeader (htsjdk.variant.vcf.VCFHeader)3 PrintWriter (java.io.PrintWriter)3 ArrayList (java.util.ArrayList)3 TreeSet (java.util.TreeSet)3 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)2 AbstractVCFCodec (htsjdk.variant.vcf.AbstractVCFCodec)2 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)2 DataInputStream (java.io.DataInputStream)2 InputStream (java.io.InputStream)2 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)1 Counter (com.github.lindenb.jvarkit.util.Counter)1