Search in sources :

Example 1 with AFExtractor

use of com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor in project jvarkit by lindenb.

the class CaseControlJfx method doWork.

@Override
public int doWork(final List<String> args) {
    final VariantPartition partition;
    Pedigree pedigree = null;
    VCFIterator in = null;
    try {
        switch(this.partitionType) {
            case variantType:
                partition = new VariantTypePartition();
                break;
            case chromosome:
                partition = new ChromosomePartition();
                break;
            case autosomes:
                partition = new SexualContigPartition();
                break;
            case qual:
                partition = new QualPartition();
                break;
            case vqslod:
                partition = new VQSLODPartition();
                break;
            case typeFilter:
                partition = new TypeAndFilterPartiton();
                break;
            case distance:
                partition = new DisanceToDiagonalPartiton();
                break;
            case n_alts:
                partition = new NAltsPartition();
                break;
            default:
                throw new IllegalStateException(this.partitionType.name());
        }
        in = openVCFIterator(oneFileOrNull(args));
        if (this.pedigreeFile != null) {
            pedigree = Pedigree.newParser().parse(this.pedigreeFile);
        } else {
            pedigree = Pedigree.newParser().parse(in.getHeader());
        }
        final AFExtractor controlAFExtractor;
        if (!StringUtil.isBlank(this.controlFields)) {
            final List<AFExtractor> extractors = new AFExtractorFactory().parseFieldExtractors(this.controlFields);
            if (extractors.size() != 1) {
                LOG.error("extractor list should have size==1 . got " + extractors);
                return -1;
            }
            controlAFExtractor = extractors.get(0);
            if (!controlAFExtractor.validateHeader(in.getHeader())) {
                LOG.error("Invalid : " + controlAFExtractor);
                return -1;
            }
        } else {
            controlAFExtractor = null;
        }
        int count = 0;
        final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(in.getHeader()).logger(LOG).build();
        while (in.hasNext() && (this.limit_to_N_variants < 0 || count < this.limit_to_N_variants)) {
            final VariantContext ctx = progress.apply(in.next());
            if (this.ignore_ctx_filtered && ctx.isFiltered())
                continue;
            ++count;
            final List<Allele> alternates = ctx.getAlternateAlleles();
            for (int alt_idx = 0; alt_idx < alternates.size(); ++alt_idx) {
                final Allele alt = alternates.get(alt_idx);
                final Double[] mafs = { null, null };
                for (int i = 0; i < 2; ++i) {
                    if (i == 1 && controlAFExtractor != null) {
                        final List<Double> dvals = controlAFExtractor.parse(ctx);
                        if (alt_idx < dvals.size() && dvals.get(alt_idx) != null) {
                            final double d = dvals.get(alt_idx);
                            if (!Double.isNaN(d) && d >= 0 && d <= 1.0)
                                mafs[1] = d;
                        }
                    } else {
                        final MafCalculator mafCalculator = new MafCalculator(alt, ctx.getContig());
                        mafCalculator.setNoCallIsHomRef(no_call_is_homref);
                        for (Pedigree.Person person : (i == 0 ? pedigree.getAffected() : pedigree.getUnaffected())) {
                            if (selectSamples.equals(SelectSamples.males) && !person.isMale())
                                continue;
                            if (selectSamples.equals(SelectSamples.females) && !person.isFemale())
                                continue;
                            final Genotype genotype = ctx.getGenotype(person.getId());
                            if (genotype == null)
                                continue;
                            if (ignore_gt_filtered && genotype.isFiltered())
                                continue;
                            mafCalculator.add(genotype, person.isMale());
                        }
                        if (!mafCalculator.isEmpty()) {
                            mafs[i] = mafCalculator.getMaf();
                        }
                    }
                }
                if (mafs[0] == null || mafs[1] == null)
                    continue;
                final XYChart.Data<Number, Number> data = new XYChart.Data<Number, Number>(mafs[0], mafs[1]);
                partition.add(ctx, pedigree, data);
            }
        }
        progress.close();
        in.close();
        in = null;
        final NumberAxis xAxis = new NumberAxis(0.0, 1.0, 0.1);
        xAxis.setLabel("Cases");
        final NumberAxis yAxis = new NumberAxis(0.0, 1.0, 0.1);
        yAxis.setLabel("Controls" + (StringUtil.isBlank(this.controlFields) ? "" : "[" + this.controlFields + "]"));
        final ScatterChart<Number, Number> chart = new ScatterChart<>(xAxis, yAxis);
        for (final XYChart.Series<Number, Number> series : partition.getSeries()) {
            chart.getData().add(series);
        }
        String title = "Case/Control";
        if (!args.isEmpty()) {
            title = args.get(0);
            int slash = title.lastIndexOf("/");
            if (slash != -1)
                title = title.substring(slash + 1);
            if (title.endsWith(".vcf.gz"))
                title = title.substring(0, title.length() - 7);
            if (title.endsWith(".vcf"))
                title = title.substring(0, title.length() - 4);
        }
        if (userTitle != null)
            title = userTitle;
        chart.setTitle(title);
        chart.setAnimated(false);
        // chart.setLegendSide(this.legendSide);
        final RExporter rExporter = new RExporter();
        final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        rExporter.exportToR(pw, chart);
        pw.flush();
        pw.close();
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
    }
    return 0;
}
Also used : NumberAxis(com.github.lindenb.jvarkit.chart.NumberAxis) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ScatterChart(com.github.lindenb.jvarkit.chart.ScatterChart) VariantContext(htsjdk.variant.variantcontext.VariantContext) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) VCFIterator(htsjdk.variant.vcf.VCFIterator) PrintWriter(java.io.PrintWriter) Genotype(htsjdk.variant.variantcontext.Genotype) RExporter(com.github.lindenb.jvarkit.chart.RExporter) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) AFExtractor(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor) XYChart(com.github.lindenb.jvarkit.chart.XYChart)

Example 2 with AFExtractor

use of com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor in project jvarkit by lindenb.

the class CaseControlPlot method parseConfigFile.

private List<CaseControlExtractor> parseConfigFile(final VCFHeader header) throws Exception {
    Document dom = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(this.xmlFile);
    Element root = dom.getDocumentElement();
    if (root == null || !root.getNodeName().equals("config"))
        throw new JvarkitException.XmlDomError(root, "Root is note <config>");
    final Map<String, JSPredigate> id2variantFilter = new HashMap<>();
    // first pass, collect filters
    for (Node n1 = root.getFirstChild(); n1 != null; n1 = n1.getNextSibling()) {
        if (n1.getNodeType() != Node.ELEMENT_NODE)
            continue;
        Element e1 = Element.class.cast(n1);
        if (!e1.getNodeName().equals("filter"))
            continue;
        final String filterid = e1.getAttribute("id");
        if (StringUtil.isBlank(filterid))
            throw new JvarkitException.XmlDomError(e1, "@id missing");
        if (id2variantFilter.containsKey(filterid))
            throw new JvarkitException.XmlDomError(e1, "duplicate filter id : " + filterid);
        final String expression = e1.getTextContent();
        if (StringUtil.isBlank(expression))
            throw new JvarkitException.XmlDomError(e1, "expression missing");
        id2variantFilter.put(filterid, new JSPredigate(header, expression));
    }
    // second pass collect maf extractors
    final Map<String, MafExtractor> id2mafExtractor = new HashMap<>();
    for (Node n1 = root.getFirstChild(); n1 != null; n1 = n1.getNextSibling()) {
        if (n1.getNodeType() != Node.ELEMENT_NODE)
            continue;
        Element e1 = Element.class.cast(n1);
        if (!e1.getNodeName().equals("maf"))
            continue;
        final String mafid = e1.getAttribute("id");
        if (StringUtil.isBlank(mafid))
            throw new JvarkitException.XmlDomError(e1, "@id missing");
        if (id2mafExtractor.containsKey(mafid))
            throw new JvarkitException.XmlDomError(e1, "duplicate maf id : " + mafid);
        final MafExtractor mafExtractor;
        if (e1.hasAttribute("attribute")) {
            final String tag = e1.getAttribute("attribute");
            if (StringUtil.isBlank(tag))
                throw new JvarkitException.XmlDomError(e1, "@attribute is empty");
            final List<AFExtractor> extractors = new AFExtractorFactory().parseFieldExtractors(tag);
            if (extractors.size() != 1) {
                throw new JvarkitException.XmlDomError(e1, "expected one AF extractor but got " + extractors);
            }
            final DefaultMafExtractor at = new DefaultMafExtractor(extractors.get(0));
            mafExtractor = at;
        } else {
            final GenotypeMafExtractor at = new GenotypeMafExtractor();
            mafExtractor = at;
        }
        id2mafExtractor.put(mafid, mafExtractor);
    }
    final List<CaseControlExtractor> excractors = new ArrayList<>();
    // parse handlers
    for (Node n1 = root.getFirstChild(); n1 != null; n1 = n1.getNextSibling()) {
        if (n1.getNodeType() != Node.ELEMENT_NODE)
            continue;
        Element e1 = Element.class.cast(n1);
        if (!e1.getNodeName().equals("handler"))
            continue;
        final CaseControlExtractor extractor = new CaseControlExtractor();
        extractor.name = e1.getAttribute("name");
        if (StringUtil.isBlank(extractor.name))
            throw new JvarkitException.XmlDomError(e1, "@name missing");
        extractor.name = this.prefix + extractor.name;
        if (excractors.stream().filter(F -> F.getName().equals(extractor.name)).findAny().isPresent()) {
            throw new JvarkitException.XmlDomError(e1, "duplicate name" + extractor.name);
        }
        for (Node n2 = n1.getFirstChild(); n2 != null; n2 = n2.getNextSibling()) {
            if (n2.getNodeType() != Node.ELEMENT_NODE)
                continue;
            Element e2 = Element.class.cast(n2);
            if (e2.getNodeName().equals("filter")) {
                final Predicate<VariantContext> expr;
                if (e2.hasAttribute("ref")) {
                    final String filterid = e2.getAttribute("ref");
                    JSPredigate predicate = id2variantFilter.get(filterid);
                    if (predicate == null) {
                        throw new JvarkitException.XmlDomError(e2, "no such filter:" + filterid);
                    }
                    expr = predicate;
                } else {
                    final String expressionstr = e2.getTextContent();
                    if (StringUtil.isBlank(expressionstr))
                        throw new JvarkitException.XmlDomError(e2, "expression missing");
                    expr = new JSPredigate(header, expressionstr);
                }
                extractor.variantPredicate = extractor.variantPredicate.and(expr);
            } else if (e2.getNodeName().equals("case") || e2.getNodeName().equals("ctrl")) {
                final MafExtractor mafextractor;
                if (e2.hasAttribute("ref")) {
                    final String mafid = e2.getAttribute("ref");
                    if (!id2mafExtractor.containsKey(e2.getAttribute(mafid))) {
                        throw new JvarkitException.XmlDomError(e2, "no such mafextractor:" + mafid);
                    }
                    mafextractor = id2mafExtractor.get(mafid);
                } else {
                    final GenotypeMafExtractor genotypeMafExtractor = new GenotypeMafExtractor();
                    mafextractor = genotypeMafExtractor;
                }
                if (e2.getNodeName().equals("case")) {
                    extractor.caseExtractor = mafextractor;
                } else {
                    extractor.ctrlExtractor = mafextractor;
                }
            } else {
                LOG.error("unknown XML element " + e2.getNodeName());
            }
        }
        excractors.add(extractor);
    }
    return excractors;
}
Also used : HashMap(java.util.HashMap) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Document(org.w3c.dom.Document) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) AFExtractor(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor)

Example 3 with AFExtractor

use of com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor in project jvarkit by lindenb.

the class VcfAfInfoFilter method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator in, final VariantContextWriter out) {
    try {
        if (this.vcf_gnomad_nfe) {
            this.user_fields_str += ",gnomad_exome_AF_NFE,gnomad_genome_AF_NFE";
            this.user_fields_str += ",gnomad_exome_AC_NFE/gnomad_exome_AN_NFE," + "gnomad_genome_AC_NFE/gnomad_genome_AN_NFE";
        }
        if (!StringUtil.isBlank(this.deprecated_user_af_fields)) {
            LOG.warn("-af use is deprecated.");
            this.user_fields_str += "," + String.join(" , ", this.deprecated_user_af_fields.split("[,; \t\n]+"));
        }
        if (!StringUtil.isBlank(this.deprecated_user_af_fields)) {
            LOG.warn("-af use is deprecated.");
            this.user_fields_str += "," + String.join(" , ", this.deprecated_user_af_fields.split("[,; \t\n]+"));
        }
        if (!StringUtil.isBlank(this.deprecated_user_ac_an_fields)) {
            LOG.warn("--acn use is deprecated.");
            final String[] array = this.deprecated_user_ac_an_fields.split("[,; \t\n]+");
            int i = 0;
            while (i < array.length) {
                String s = array[i];
                if (StringUtil.isBlank(s)) {
                    i++;
                    continue;
                }
                if (s.contains("*")) {
                    this.user_fields_str += ";" + s;
                } else {
                    final String acf = s;
                    i++;
                    while (i < array.length) {
                        s = array[i];
                        if (StringUtil.isBlank(s)) {
                            i++;
                            continue;
                        }
                        break;
                    }
                    if (i == array.length) {
                        LOG.error("missing AN for " + acf + " in " + this.deprecated_user_ac_an_fields);
                        return -1;
                    }
                    final String anf = s;
                    this.user_fields_str += ";" + acf + "/" + anf;
                }
            }
        }
        final AFExtractorFactory afExtractorFactory = new AFExtractorFactory();
        final VCFHeader header = in.getHeader();
        final List<AFExtractor> afExtractors = new ArrayList<>(afExtractorFactory.parseFieldExtractors(this.user_fields_str));
        afExtractors.removeIf(T -> {
            if (!T.validateHeader(header) && !this.ignore_INFO_field_validation) {
                LOG.warn("Ignoring " + T + " because it's not valid.");
                return true;
            }
            return false;
        });
        if (afExtractors.isEmpty()) {
            LOG.warn("No extractor was defined !");
        }
        final Set<VCFHeaderLine> headerLines = new HashSet<>();
        final VCFFilterHeaderLine noAltVariantFilter;
        if (!StringUtil.isBlank(this.filterAllAltInGnomad)) {
            noAltVariantFilter = new VCFFilterHeaderLine(this.filterAllAltInGnomad, "All ALT alleles don't pass the " + this.user_af_minimum + " < gnomad treshold AF < " + this.user_af_maximum);
            headerLines.add(noAltVariantFilter);
        } else {
            noAltVariantFilter = null;
        }
        VCFStandardHeaderLines.addStandardFormatLines(headerLines, true, VCFConstants.GENOTYPE_FILTER_KEY);
        JVarkitVersion.getInstance().addMetaData(getClass().getSimpleName(), header);
        this.recalculator.setHeader(header);
        headerLines.stream().forEach(H -> header.addMetaDataLine(H));
        final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header).logger(LOG).build();
        out.writeHeader(header);
        while (in.hasNext()) {
            final VariantContext ctx = progress.apply(in.next());
            if (!ctx.isVariant()) {
                out.add(ctx);
                continue;
            }
            final List<Allele> alt_alleles = ctx.getAlternateAlleles();
            final Set<Allele> ok_alleles = new HashSet<>(alt_alleles);
            ok_alleles.remove(Allele.SPAN_DEL);
            for (final AFExtractor afExtractor : afExtractors) {
                if (ok_alleles.isEmpty())
                    break;
                final List<Double> afo_list = afExtractor.parse(ctx);
                if (afo_list == null)
                    continue;
                if (afo_list.size() != alt_alleles.size()) {
                    LOG.warn("in " + ctx.getContig() + ":" + ctx.getStart() + ":" + ctx.getReference() + " illegal number of AF values " + afExtractor);
                }
                for (int x = 0; x < afo_list.size() && x < alt_alleles.size(); ++x) {
                    final Double af = afo_list.get(x);
                    if (af == null)
                        continue;
                    if (af.doubleValue() < this.user_af_minimum || af.doubleValue() > this.user_af_maximum) {
                        if (this.filter_for_any_allele) {
                            ok_alleles.clear();
                        } else {
                            ok_alleles.remove(alt_alleles.get(x));
                        }
                    }
                }
            }
            if (ok_alleles.isEmpty()) {
                if (noAltVariantFilter != null) {
                    out.add(new VariantContextBuilder(ctx).filter(noAltVariantFilter.getID()).make());
                }
                continue;
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            final List<Genotype> genotypes = new ArrayList<>(ctx.getNSamples());
            for (final Genotype gt : ctx.getGenotypes()) {
                if (gt.isNoCall() || gt.isHomRef()) {
                    genotypes.add(gt);
                    continue;
                }
                boolean got_good_allele = false;
                boolean got_bad_alt = false;
                for (final Allele gta : gt.getAlleles()) {
                    if (!gta.isCalled() || gta.equals(Allele.SPAN_DEL)) {
                        continue;
                    } else if (gta.isReference() || ok_alleles.contains(gta)) {
                        got_good_allele = true;
                    } else if (alt_alleles.contains(gta)) {
                        got_bad_alt = true;
                    }
                }
                if (got_good_allele || !got_bad_alt) {
                    genotypes.add(gt);
                    continue;
                }
                if (StringUtil.isBlank(this.genotypeFilter)) {
                    genotypes.add(GenotypeBuilder.createMissing(gt.getSampleName(), gt.getPloidy()));
                } else {
                    genotypes.add(new GenotypeBuilder(gt).filter(this.genotypeFilter).make());
                }
            }
            vcb.genotypes(genotypes);
            out.add(this.recalculator.apply(vcb.make()));
        }
        progress.close();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) AFExtractor(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor)

Aggregations

AFExtractorFactory (com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory)3 AFExtractor (com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor)3 VariantContext (htsjdk.variant.variantcontext.VariantContext)3 ProgressFactory (com.github.lindenb.jvarkit.util.log.ProgressFactory)2 Allele (htsjdk.variant.variantcontext.Allele)2 Genotype (htsjdk.variant.variantcontext.Genotype)2 ArrayList (java.util.ArrayList)2 NumberAxis (com.github.lindenb.jvarkit.chart.NumberAxis)1 RExporter (com.github.lindenb.jvarkit.chart.RExporter)1 ScatterChart (com.github.lindenb.jvarkit.chart.ScatterChart)1 XYChart (com.github.lindenb.jvarkit.chart.XYChart)1 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)1 Pedigree (com.github.lindenb.jvarkit.util.Pedigree)1 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)1 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)1 VCFFilterHeaderLine (htsjdk.variant.vcf.VCFFilterHeaderLine)1 VCFHeader (htsjdk.variant.vcf.VCFHeader)1 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)1 VCFIterator (htsjdk.variant.vcf.VCFIterator)1 PrintWriter (java.io.PrintWriter)1