Search in sources :

Example 51 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class RDFVcfWriter method writeHeader.

public void writeHeader(VCFHeader header, URI source) {
    if (this.header != null)
        throw new RuntimeException("Header was already written");
    this.header = header;
    this.source = source;
    if (this.source == null)
        this.source = URI.create("urn:source/id" + (++id_generator));
    try {
        writeStartDocument();
        this.w.writeStartElement(PFX, "Source", NS);
        this.w.writeAttribute("rdf", RDF, "about", this.source.toString());
        this.w.writeStartElement("dc", "title", DC);
        this.w.writeCharacters(this.source.toString());
        // dc:title
        this.w.writeEndElement();
        this.w.writeEndElement();
        SAMSequenceDictionary dict = header.getSequenceDictionary();
        if (dict != null) {
            for (SAMSequenceRecord ssr : dict.getSequences()) {
                this.w.writeStartElement(PFX, "Chromosome", NS);
                this.w.writeAttribute("rdf", RDF, "about", "urn:chromosome/" + ssr.getSequenceName());
                this.w.writeStartElement("dc", "title", DC);
                this.w.writeCharacters(ssr.getSequenceName());
                // dc:title
                this.w.writeEndElement();
                this.w.writeStartElement(PFX, "length", NS);
                datatype("int");
                this.w.writeCharacters(String.valueOf(ssr.getSequenceLength()));
                // length
                this.w.writeEndElement();
                this.w.writeStartElement(PFX, "index", NS);
                datatype("int");
                this.w.writeCharacters(String.valueOf(ssr.getSequenceIndex()));
                // length
                this.w.writeEndElement();
                // Chromosome
                this.w.writeEndElement();
            }
        }
        key2infoHandler.put(SnpEffPredictionParser.getDefaultTag(), new SnpEffHandler());
        key2infoHandler.put(VepPredictionParser.getDefaultTag(), new VepHandler());
        key2infoHandler.put(VCFPredictions.TAG, new MyPredictionHandler());
        for (VCFInfoHeaderLine h : header.getInfoHeaderLines()) {
            RDFVcfInfoHandler handler = key2infoHandler.get(h.getID());
            if (handler == null) {
                LOG.info("creating default handler for INFO:" + h.getID());
                handler = createDefaultRdfVcfInfoHandlerFor(h);
                key2infoHandler.put(handler.getKey(), handler);
            }
            handler.init(h);
        }
        for (VCFFilterHeaderLine h : header.getFilterLines()) {
            this.w.writeStartElement(PFX, "Filter", NS);
            this.w.writeAttribute("rdf", RDF, "about", "urn:filter/" + h.getKey());
            this.w.writeStartElement("dc", "title", DC);
            this.w.writeCharacters(h.getKey());
            // dc:title
            this.w.writeEndElement();
            this.w.writeStartElement("dc", "description", DC);
            this.w.writeCharacters(h.getValue());
            // dc:title
            this.w.writeEndElement();
            // Filter
            this.w.writeEndElement();
        }
        // Sample
        for (String sample : header.getSampleNamesInOrder()) {
            this.w.writeStartElement(PFX, "Sample", NS);
            this.w.writeAttribute("rdf", RDF, "about", "urn:sample/" + sample);
            this.w.writeStartElement("dc", "title", DC);
            this.w.writeCharacters(sample);
            // dc:title
            this.w.writeEndElement();
            // rdf:RDF
            this.w.writeEndElement();
        }
    } catch (Exception e) {
        throw new RuntimeException("close failed", e);
    }
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException)

Example 52 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class InfoTreeModel method getCSQCols.

private List<String> getCSQCols(VCFHeader header) {
    VCFInfoHeaderLine ihl = header.getInfoHeaderLine("CSQ");
    if (ihl == null)
        return Collections.emptyList();
    String description = ihl.getDescription();
    String chunck = " Format:";
    int i = description.indexOf(chunck);
    if (i == -1) {
        LOG.warn("Cannot find " + chunck + " in " + description);
        return Collections.emptyList();
    }
    description = description.substring(i + chunck.length()).replaceAll("[ \'\\.\\(\\)]+", "").trim();
    String[] tokens = pipeVep.split(description);
    ArrayList<String> L = new ArrayList<String>(tokens.length);
    for (String s : tokens) {
        if (s.trim().isEmpty())
            continue;
        L.add(s);
    }
    return L;
}
Also used : ArrayList(java.util.ArrayList) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Example 53 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class InfoTreeModel method getEFFCols.

private List<String> getEFFCols(VCFHeader header) {
    VCFInfoHeaderLine ihl = header.getInfoHeaderLine("EFF");
    if (ihl == null)
        return Collections.emptyList();
    String description = ihl.getDescription();
    String chunck = "Format:";
    int i = description.indexOf(chunck);
    if (i == -1) {
        LOG.warn("Cannot find " + chunck + " in " + description);
        return Collections.emptyList();
    }
    description = description.substring(i + chunck.length()).replace('(', '|').replaceAll("[ \'\\.)\\[\\]]+", "").trim();
    String[] tokens = pipeSnpEff.split(description);
    ArrayList<String> L = new ArrayList<String>(tokens.length);
    for (String s : tokens) {
        if (s.trim().isEmpty())
            continue;
        L.add(s);
    }
    return L;
}
Also used : ArrayList(java.util.ArrayList) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Example 54 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class Biostar140111 method parseGenotypes.

/**
 * parses NCBI GT output
 */
private void parseGenotypes(XMLEventReader r, OutputStream outStream) throws Exception {
    final Pattern dnaRegex = Pattern.compile("[ATGCatgc]+");
    final QName attInId = new QName("indId");
    Set<String> samples = new TreeSet<>();
    Set<VCFHeaderLine> metaData = new HashSet<>();
    metaData.add(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype"));
    metaData.add(new VCFFormatHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Depth"));
    metaData.add(new VCFInfoHeaderLine("SnpInfoObserved", 1, VCFHeaderLineType.String, "SnpInfo : Oberved"));
    VCFHeader header = null;
    VariantContextWriter w = null;
    while (r.hasNext()) {
        XMLEvent evt = r.peek();
        if (evt.isStartElement()) {
            StartElement startE = evt.asStartElement();
            String localName = startE.getName().getLocalPart();
            if (localName.equals("SnpInfo")) {
                if (header == null) {
                    header = new VCFHeader(metaData, samples);
                    w = VCFUtils.createVariantContextWriterToOutputStream(outStream);
                    w.writeHeader(header);
                }
                SnpInfo snpInfo = this.unmarshaller.unmarshal(r, SnpInfo.class).getValue();
                if (snpInfo.getSnpLoc().isEmpty()) {
                    LOG.warning("no snploc for rs" + snpInfo.getRsId());
                }
                for (SnpLoc snpLoc : snpInfo.getSnpLoc()) {
                    int chromStart;
                    try {
                        chromStart = Integer.parseInt(snpLoc.getStart());
                    } catch (Exception e) {
                        LOG.warning("bad start in rs" + snpInfo.getRsId() + " " + snpLoc.getStart());
                        continue;
                    }
                    chromStart++;
                    String contigAllele = snpLoc.getContigAllele();
                    if (contigAllele == null || !dnaRegex.matcher(contigAllele).matches()) {
                        LOG.warning("bad contigAllele in rs" + snpInfo.getRsId() + " " + contigAllele);
                        continue;
                    }
                    if (!"fwd".equals(snpLoc.getRsOrientToChrom())) {
                        contigAllele = AcidNucleics.reverseComplement(contigAllele);
                    }
                    Allele ref = Allele.create(contigAllele, true);
                    Map<String, Genotype> sample2genotype = new HashMap<>();
                    for (SsInfo ssinfo : snpInfo.getSsInfo()) {
                        boolean revcomp = !"fwd".equals(snpLoc.getRsOrientToChrom());
                        if (!"fwd".equals(ssinfo.getSsOrientToRs()))
                            revcomp = !revcomp;
                        for (ByPop byPop : ssinfo.getByPop()) {
                            for (GTypeByInd gt : byPop.getGTypeByInd()) {
                                String sample = String.valueOf(gt.getIndId());
                                if (!samples.contains(sample)) {
                                    LOG.warning("Undefined sample:" + sample);
                                    continue;
                                }
                                boolean ok = true;
                                String[] tokens = gt.getGtype().split("[/]");
                                if (tokens.length == 1) {
                                    tokens = new String[] { tokens[0], tokens[0] };
                                } else if (tokens.length != 2) {
                                    LOG.warning("Bad genotypes in sample:" + sample + " " + gt.getGtype());
                                    continue;
                                }
                                List<Allele> sampleAlleles = new ArrayList<>(2);
                                for (int i = 0; i < tokens.length; ++i) {
                                    if (revcomp)
                                        tokens[i] = AcidNucleics.reverseComplement(tokens[i]);
                                    if (!dnaRegex.matcher(tokens[i]).matches()) {
                                        ok = false;
                                        break;
                                    }
                                    sampleAlleles.add(tokens[i].equalsIgnoreCase(contigAllele) ? ref : Allele.create(tokens[i], false));
                                }
                                if (!ok)
                                    continue;
                                GenotypeBuilder gb = new GenotypeBuilder(sample, sampleAlleles);
                                sample2genotype.put(sample, gb.make());
                            }
                        }
                    }
                    Set<Allele> alleles = new HashSet<>();
                    alleles.add(ref);
                    for (String sample : samples) {
                        if (!sample2genotype.containsKey(sample)) {
                            sample2genotype.put(sample, GenotypeBuilder.createMissing(sample, 2));
                        } else {
                            alleles.addAll(sample2genotype.get(sample).getAlleles());
                        }
                    }
                    VariantContextBuilder vcb = new VariantContextBuilder("dbsnp", snpLoc.getChrom(), chromStart, chromStart + ref.getBaseString().length() - 1, alleles);
                    if (snpInfo.getObserved() != null) {
                        vcb.attribute("SnpInfoObserved", VCFUtils.escapeInfoField(snpInfo.getObserved()));
                    }
                    vcb.genotypes(sample2genotype.values());
                    vcb.id("rs" + snpInfo.getRsId());
                    w.add(vcb.make());
                }
            } else if (localName.equals("Individual")) {
                if (header != null)
                    throw new XMLStreamException("Error got " + localName + " after genotypes", evt.getLocation());
                Attribute att = startE.getAttributeByName(attInId);
                if (att == null)
                    throw new XMLStreamException("Cannot get " + attInId, evt.getLocation());
                samples.add(att.getValue());
                // consumme
                r.next();
            } else {
                r.next();
            }
        } else {
            // consumme
            r.next();
        }
    }
    if (w == null)
        throw new IOException("No Genotype was found");
    w.close();
}
Also used : ByPop(gov.nih.nlm.ncbi.dbsnp.gt.SnpInfo.SsInfo.ByPop) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SnpInfo(gov.nih.nlm.ncbi.dbsnp.gt.SnpInfo) HashMap(java.util.HashMap) Attribute(javax.xml.stream.events.Attribute) ArrayList(java.util.ArrayList) GTypeByInd(gov.nih.nlm.ncbi.dbsnp.gt.SnpInfo.SsInfo.ByPop.GTypeByInd) SnpLoc(gov.nih.nlm.ncbi.dbsnp.gt.SnpInfo.SnpLoc) TreeSet(java.util.TreeSet) SsInfo(gov.nih.nlm.ncbi.dbsnp.gt.SnpInfo.SsInfo) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Pattern(java.util.regex.Pattern) QName(javax.xml.namespace.QName) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) IOException(java.io.IOException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) StartElement(javax.xml.stream.events.StartElement) Allele(htsjdk.variant.variantcontext.Allele) XMLStreamException(javax.xml.stream.XMLStreamException) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) XMLEvent(javax.xml.stream.events.XMLEvent)

Example 55 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class CaseControlJfx method doWork.

@Override
public int doWork(final Stage primaryStage, final List<String> args) {
    final VariantPartition partition;
    Pedigree pedigree = null;
    VcfIterator in = null;
    try {
        switch(this.partitionType) {
            case variantType:
                partition = new VariantTypePartition();
                break;
            case chromosome:
                partition = new ChromosomePartition();
                break;
            case autosomes:
                partition = new SexualContigPartition();
                break;
            case qual:
                partition = new QualPartition();
                break;
            case vqslod:
                partition = new VQSLODPartition();
                break;
            case typeFilter:
                partition = new TypeAndFilterPartiton();
                break;
            case distance:
                partition = new DisanceToDiagonalPartiton();
                break;
            case n_alts:
                partition = new NAltsPartition();
                break;
            default:
                throw new IllegalStateException(this.partitionType.name());
        }
        if (args.isEmpty()) {
            in = VCFUtils.createVcfIteratorStdin();
            primaryStage.setTitle(CaseControlJfx.class.getSimpleName());
        } else if (args.size() == 1) {
            in = VCFUtils.createVcfIterator(args.get(0));
            primaryStage.setTitle(args.get(0));
        } else {
            LOG.error("Illegal Number of arguments: " + args);
            return -1;
        }
        if (this.pedigreeFile != null) {
            pedigree = Pedigree.newParser().parse(this.pedigreeFile);
        } else {
            pedigree = Pedigree.newParser().parse(in.getHeader());
        }
        if (this.controlTag != null) {
            final VCFInfoHeaderLine infoHeaderLine = in.getHeader().getInfoHeaderLine(this.controlTag);
            if (infoHeaderLine == null) {
                LOG.error("No such attribute in the VCF header: " + this.controlTag);
                return -1;
            }
        }
        int count = 0;
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
        while (in.hasNext() && (this.limit_to_N_variants < 0 || count < this.limit_to_N_variants)) {
            final VariantContext ctx = progress.watch(in.next());
            if (this.ignore_ctx_filtered && ctx.isFiltered())
                continue;
            ++count;
            final List<Allele> alternates = ctx.getAlternateAlleles();
            for (int alt_idx = 0; alt_idx < alternates.size(); ++alt_idx) {
                final Allele alt = alternates.get(alt_idx);
                final Double[] mafs = { null, null };
                for (int i = 0; i < 2; ++i) {
                    if (i == 1 && this.controlTag != null) {
                        if (ctx.hasAttribute(this.controlTag)) {
                            try {
                                final List<Double> dvals = ctx.getAttributeAsDoubleList(this.controlTag, Double.NaN);
                                if (alt_idx < dvals.size() && dvals.get(alt_idx) != null) {
                                    final double d = dvals.get(alt_idx);
                                    if (!Double.isNaN(d) && d >= 0 && d <= 1.0)
                                        mafs[1] = d;
                                }
                            } catch (NumberFormatException err) {
                            }
                        }
                    } else {
                        final MafCalculator mafCalculator = new MafCalculator(alt, ctx.getContig());
                        mafCalculator.setNoCallIsHomRef(no_call_is_homref);
                        for (Pedigree.Person person : (i == 0 ? pedigree.getAffected() : pedigree.getUnaffected())) {
                            if (selectSamples.equals(SelectSamples.males) && !person.isMale())
                                continue;
                            if (selectSamples.equals(SelectSamples.females) && !person.isFemale())
                                continue;
                            final Genotype genotype = ctx.getGenotype(person.getId());
                            if (genotype == null)
                                continue;
                            if (ignore_gt_filtered && genotype.isFiltered())
                                continue;
                            mafCalculator.add(genotype, person.isMale());
                        }
                        if (!mafCalculator.isEmpty()) {
                            mafs[i] = mafCalculator.getMaf();
                        }
                    }
                }
                if (mafs[0] == null || mafs[1] == null)
                    continue;
                final XYChart.Data<Number, Number> data = new XYChart.Data<Number, Number>(mafs[0], mafs[1]);
                if (this.add_tooltip && this.outputFile == null) {
                    data.setExtraValue(ctx.getContig() + ":" + ctx.getStart());
                }
                partition.add(ctx, pedigree, data);
            }
        }
        progress.finish();
        in.close();
        in = null;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
    }
    final NumberAxis xAxis = new NumberAxis(0.0, 1.0, 0.1);
    xAxis.setLabel("Cases");
    final NumberAxis yAxis = new NumberAxis(0.0, 1.0, 0.1);
    yAxis.setLabel("Controls" + (this.controlTag == null ? "" : "[" + this.controlTag + "]"));
    final ScatterChart<Number, Number> chart = new ScatterChart<>(xAxis, yAxis);
    for (final XYChart.Series<Number, Number> series : partition.getSeries()) {
        chart.getData().add(series);
    }
    String title = "Case/Control";
    if (!args.isEmpty()) {
        title = args.get(0);
        int slash = title.lastIndexOf("/");
        if (slash != -1)
            title = title.substring(slash + 1);
        if (title.endsWith(".vcf.gz"))
            title = title.substring(0, title.length() - 7);
        if (title.endsWith(".vcf"))
            title = title.substring(0, title.length() - 4);
    }
    if (userTitle != null)
        title = userTitle;
    chart.setTitle(title);
    chart.setAnimated(false);
    chart.setLegendSide(this.legendSide);
    final VBox root = new VBox();
    MenuBar menuBar = new MenuBar();
    Menu menu = new Menu("File");
    MenuItem item = new MenuItem("Save image as...");
    item.setOnAction(AE -> {
        doMenuSave(chart);
    });
    menu.getItems().add(item);
    menu.getItems().add(new SeparatorMenuItem());
    item = new MenuItem("Quit");
    item.setOnAction(AE -> {
        Platform.exit();
    });
    menu.getItems().add(item);
    menuBar.getMenus().add(menu);
    root.getChildren().add(menuBar);
    final BorderPane contentPane = new BorderPane();
    contentPane.setCenter(chart);
    root.getChildren().add(contentPane);
    Rectangle2D screen = Screen.getPrimary().getVisualBounds();
    double minw = Math.max(Math.min(screen.getWidth(), screen.getHeight()) - 50, 50);
    chart.setPrefSize(minw, minw);
    final Scene scene = new Scene(root, minw, minw);
    primaryStage.setScene(scene);
    if (this.outputFile != null) {
        primaryStage.setOnShown(WE -> {
            LOG.info("saving as " + this.outputFile);
            try {
                saveImageAs(chart, this.outputFile);
            } catch (IOException err) {
                LOG.error(err);
                System.exit(-1);
            }
            Platform.exit();
        });
    }
    primaryStage.show();
    if (this.outputFile == null) {
        // http://stackoverflow.com/questions/14117867
        for (final XYChart.Series<Number, Number> series : partition.getSeries()) {
            for (XYChart.Data<Number, Number> d : series.getData()) {
                if (dataOpacity >= 0 && dataOpacity < 1.0) {
                    d.getNode().setStyle(d.getNode().getStyle() + "-fx-opacity:0.3;");
                }
                if (this.add_tooltip) {
                    final Tooltip tooltip = new Tooltip();
                    tooltip.setText(String.format("%s (%f / %f)", String.valueOf(d.getExtraValue()), d.getXValue().doubleValue(), d.getYValue().doubleValue()));
                    Tooltip.install(d.getNode(), tooltip);
                }
            }
        }
    }
    return 0;
}
Also used : BorderPane(javafx.scene.layout.BorderPane) NumberAxis(javafx.scene.chart.NumberAxis) ScatterChart(javafx.scene.chart.ScatterChart) VariantContext(htsjdk.variant.variantcontext.VariantContext) MenuBar(javafx.scene.control.MenuBar) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Menu(javafx.scene.control.Menu) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Tooltip(javafx.scene.control.Tooltip) Rectangle2D(javafx.geometry.Rectangle2D) Genotype(htsjdk.variant.variantcontext.Genotype) MenuItem(javafx.scene.control.MenuItem) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) IOException(java.io.IOException) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) Scene(javafx.scene.Scene) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) XYChart(javafx.scene.chart.XYChart) VBox(javafx.scene.layout.VBox)

Aggregations

VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)55 VCFHeader (htsjdk.variant.vcf.VCFHeader)49 VariantContext (htsjdk.variant.variantcontext.VariantContext)37 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)37 ArrayList (java.util.ArrayList)34 HashSet (java.util.HashSet)32 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)31 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)25 Allele (htsjdk.variant.variantcontext.Allele)22 IOException (java.io.IOException)20 File (java.io.File)19 Genotype (htsjdk.variant.variantcontext.Genotype)17 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)17 Set (java.util.Set)17 HashMap (java.util.HashMap)16 List (java.util.List)16 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 VCFFilterHeaderLine (htsjdk.variant.vcf.VCFFilterHeaderLine)14 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)14