Search in sources :

Example 1 with AFExtractorFactory

use of com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory in project jvarkit by lindenb.

the class VcfStrechToSvg method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        this.afExtractor = new AFExtractorFactory().parseFieldExtractor(this.afExtractorFactoryStr);
        final String input = super.oneAndOnlyOneFile(args);
        if (!this.bamListPath.isEmpty()) {
            LOG.info("reading bam list");
            for (Path bamPath : IOUtils.unrollPaths(this.bamListPath)) {
                try (SamReader sr = openSamReader(bamPath)) {
                    final SAMFileHeader hdr = sr.getFileHeader();
                    for (final String sn : hdr.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toSet())) {
                        if (this.sample2bam.containsKey(sn)) {
                            LOG.info("duplicate sample in bam " + bamPath + " and " + this.sample2bam.get(sn));
                            return -1;
                        }
                        this.sample2bam.put(sn, bamPath);
                    }
                }
            }
        }
        try (VCFReader r = VCFReaderFactory.makeDefault().open(input, true)) {
            final VCFHeader header = r.getHeader();
            this.afExtractor.validateHeader(header);
            final String searchFormat;
            switch(this.useFormat) {
                case PL:
                    searchFormat = VCFConstants.GENOTYPE_PL_KEY;
                    break;
                // through
                case AD:
                default:
                    searchFormat = VCFConstants.GENOTYPE_ALLELE_DEPTHS;
                    break;
            }
            if (header.getFormatHeaderLine(searchFormat) == null) {
                LOG.error("FORMAT/" + searchFormat + " undefined in " + input);
                return -1;
            }
            if (!header.hasGenotypingData()) {
                LOG.error("No genotype in input");
                return -1;
            }
            final SAMSequenceDictionary dict = header.getSequenceDictionary();
            if (dict != null && this.refFaidx != null) {
                SequenceUtil.assertSequenceDictionariesEqual(dict, SequenceDictionaryUtils.extractRequired(this.refFaidx));
            }
            if (this.gff3Path != null) {
                LOG.info("reading gtf" + this.gff3Path);
                try (GtfReader gtfReader = new GtfReader(this.gff3Path)) {
                    if (dict != null)
                        gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
                    gtfReader.getAllGenes().forEach(G -> {
                        this.all_genes.put(new Interval(G), G);
                    });
                }
            }
            try (BufferedReader br = IOUtils.openPathForBufferedReading(this.bedFile)) {
                try (PrintWriter manifest = (this.manifestPath == null ? new PrintWriter(new NullOuputStream()) : IOUtils.openPathForPrintWriter(this.manifestPath))) {
                    try (ArchiveFactory out = ArchiveFactory.open(this.outputFile)) {
                        final BedLineCodec codec = new BedLineCodec();
                        br.lines().map(L -> codec.decode(L)).filter(B -> B != null).forEach(B -> {
                            run(out, B, header, r, manifest);
                        });
                    }
                    manifest.flush();
                }
            }
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : Path(java.nio.file.Path) Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) Document(org.w3c.dom.Document) Map(java.util.Map) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SAMException(htsjdk.samtools.SAMException) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) ToDoubleFunction(java.util.function.ToDoubleFunction) VariantContext(htsjdk.variant.variantcontext.VariantContext) GZIPOutputStream(java.util.zip.GZIPOutputStream) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Genotype(htsjdk.variant.variantcontext.Genotype) DOMSource(javax.xml.transform.dom.DOMSource) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) OptionalDouble(java.util.OptionalDouble) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) VCFConstants(htsjdk.variant.vcf.VCFConstants) OutputStream(java.io.OutputStream) Locatable(htsjdk.samtools.util.Locatable) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Element(org.w3c.dom.Element) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) DynamicParameter(com.beust.jcommander.DynamicParameter) BufferedReader(java.io.BufferedReader) TransformerFactory(javax.xml.transform.TransformerFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) SamReader(htsjdk.samtools.SamReader) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) VCFReader(htsjdk.variant.vcf.VCFReader) BufferedReader(java.io.BufferedReader) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) SAMFileHeader(htsjdk.samtools.SAMFileHeader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Interval(htsjdk.samtools.util.Interval) PrintWriter(java.io.PrintWriter)

Example 2 with AFExtractorFactory

use of com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory in project jvarkit by lindenb.

the class CaseControlJfx method doWork.

@Override
public int doWork(final List<String> args) {
    final VariantPartition partition;
    Pedigree pedigree = null;
    VCFIterator in = null;
    try {
        switch(this.partitionType) {
            case variantType:
                partition = new VariantTypePartition();
                break;
            case chromosome:
                partition = new ChromosomePartition();
                break;
            case autosomes:
                partition = new SexualContigPartition();
                break;
            case qual:
                partition = new QualPartition();
                break;
            case vqslod:
                partition = new VQSLODPartition();
                break;
            case typeFilter:
                partition = new TypeAndFilterPartiton();
                break;
            case distance:
                partition = new DisanceToDiagonalPartiton();
                break;
            case n_alts:
                partition = new NAltsPartition();
                break;
            default:
                throw new IllegalStateException(this.partitionType.name());
        }
        in = openVCFIterator(oneFileOrNull(args));
        if (this.pedigreeFile != null) {
            pedigree = Pedigree.newParser().parse(this.pedigreeFile);
        } else {
            pedigree = Pedigree.newParser().parse(in.getHeader());
        }
        final AFExtractor controlAFExtractor;
        if (!StringUtil.isBlank(this.controlFields)) {
            final List<AFExtractor> extractors = new AFExtractorFactory().parseFieldExtractors(this.controlFields);
            if (extractors.size() != 1) {
                LOG.error("extractor list should have size==1 . got " + extractors);
                return -1;
            }
            controlAFExtractor = extractors.get(0);
            if (!controlAFExtractor.validateHeader(in.getHeader())) {
                LOG.error("Invalid : " + controlAFExtractor);
                return -1;
            }
        } else {
            controlAFExtractor = null;
        }
        int count = 0;
        final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(in.getHeader()).logger(LOG).build();
        while (in.hasNext() && (this.limit_to_N_variants < 0 || count < this.limit_to_N_variants)) {
            final VariantContext ctx = progress.apply(in.next());
            if (this.ignore_ctx_filtered && ctx.isFiltered())
                continue;
            ++count;
            final List<Allele> alternates = ctx.getAlternateAlleles();
            for (int alt_idx = 0; alt_idx < alternates.size(); ++alt_idx) {
                final Allele alt = alternates.get(alt_idx);
                final Double[] mafs = { null, null };
                for (int i = 0; i < 2; ++i) {
                    if (i == 1 && controlAFExtractor != null) {
                        final List<Double> dvals = controlAFExtractor.parse(ctx);
                        if (alt_idx < dvals.size() && dvals.get(alt_idx) != null) {
                            final double d = dvals.get(alt_idx);
                            if (!Double.isNaN(d) && d >= 0 && d <= 1.0)
                                mafs[1] = d;
                        }
                    } else {
                        final MafCalculator mafCalculator = new MafCalculator(alt, ctx.getContig());
                        mafCalculator.setNoCallIsHomRef(no_call_is_homref);
                        for (Pedigree.Person person : (i == 0 ? pedigree.getAffected() : pedigree.getUnaffected())) {
                            if (selectSamples.equals(SelectSamples.males) && !person.isMale())
                                continue;
                            if (selectSamples.equals(SelectSamples.females) && !person.isFemale())
                                continue;
                            final Genotype genotype = ctx.getGenotype(person.getId());
                            if (genotype == null)
                                continue;
                            if (ignore_gt_filtered && genotype.isFiltered())
                                continue;
                            mafCalculator.add(genotype, person.isMale());
                        }
                        if (!mafCalculator.isEmpty()) {
                            mafs[i] = mafCalculator.getMaf();
                        }
                    }
                }
                if (mafs[0] == null || mafs[1] == null)
                    continue;
                final XYChart.Data<Number, Number> data = new XYChart.Data<Number, Number>(mafs[0], mafs[1]);
                partition.add(ctx, pedigree, data);
            }
        }
        progress.close();
        in.close();
        in = null;
        final NumberAxis xAxis = new NumberAxis(0.0, 1.0, 0.1);
        xAxis.setLabel("Cases");
        final NumberAxis yAxis = new NumberAxis(0.0, 1.0, 0.1);
        yAxis.setLabel("Controls" + (StringUtil.isBlank(this.controlFields) ? "" : "[" + this.controlFields + "]"));
        final ScatterChart<Number, Number> chart = new ScatterChart<>(xAxis, yAxis);
        for (final XYChart.Series<Number, Number> series : partition.getSeries()) {
            chart.getData().add(series);
        }
        String title = "Case/Control";
        if (!args.isEmpty()) {
            title = args.get(0);
            int slash = title.lastIndexOf("/");
            if (slash != -1)
                title = title.substring(slash + 1);
            if (title.endsWith(".vcf.gz"))
                title = title.substring(0, title.length() - 7);
            if (title.endsWith(".vcf"))
                title = title.substring(0, title.length() - 4);
        }
        if (userTitle != null)
            title = userTitle;
        chart.setTitle(title);
        chart.setAnimated(false);
        // chart.setLegendSide(this.legendSide);
        final RExporter rExporter = new RExporter();
        final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        rExporter.exportToR(pw, chart);
        pw.flush();
        pw.close();
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
    }
    return 0;
}
Also used : NumberAxis(com.github.lindenb.jvarkit.chart.NumberAxis) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ScatterChart(com.github.lindenb.jvarkit.chart.ScatterChart) VariantContext(htsjdk.variant.variantcontext.VariantContext) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) VCFIterator(htsjdk.variant.vcf.VCFIterator) PrintWriter(java.io.PrintWriter) Genotype(htsjdk.variant.variantcontext.Genotype) RExporter(com.github.lindenb.jvarkit.chart.RExporter) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) AFExtractor(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor) XYChart(com.github.lindenb.jvarkit.chart.XYChart)

Example 3 with AFExtractorFactory

use of com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory in project jvarkit by lindenb.

the class CaseControlPlot method parseConfigFile.

private List<CaseControlExtractor> parseConfigFile(final VCFHeader header) throws Exception {
    Document dom = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(this.xmlFile);
    Element root = dom.getDocumentElement();
    if (root == null || !root.getNodeName().equals("config"))
        throw new JvarkitException.XmlDomError(root, "Root is note <config>");
    final Map<String, JSPredigate> id2variantFilter = new HashMap<>();
    // first pass, collect filters
    for (Node n1 = root.getFirstChild(); n1 != null; n1 = n1.getNextSibling()) {
        if (n1.getNodeType() != Node.ELEMENT_NODE)
            continue;
        Element e1 = Element.class.cast(n1);
        if (!e1.getNodeName().equals("filter"))
            continue;
        final String filterid = e1.getAttribute("id");
        if (StringUtil.isBlank(filterid))
            throw new JvarkitException.XmlDomError(e1, "@id missing");
        if (id2variantFilter.containsKey(filterid))
            throw new JvarkitException.XmlDomError(e1, "duplicate filter id : " + filterid);
        final String expression = e1.getTextContent();
        if (StringUtil.isBlank(expression))
            throw new JvarkitException.XmlDomError(e1, "expression missing");
        id2variantFilter.put(filterid, new JSPredigate(header, expression));
    }
    // second pass collect maf extractors
    final Map<String, MafExtractor> id2mafExtractor = new HashMap<>();
    for (Node n1 = root.getFirstChild(); n1 != null; n1 = n1.getNextSibling()) {
        if (n1.getNodeType() != Node.ELEMENT_NODE)
            continue;
        Element e1 = Element.class.cast(n1);
        if (!e1.getNodeName().equals("maf"))
            continue;
        final String mafid = e1.getAttribute("id");
        if (StringUtil.isBlank(mafid))
            throw new JvarkitException.XmlDomError(e1, "@id missing");
        if (id2mafExtractor.containsKey(mafid))
            throw new JvarkitException.XmlDomError(e1, "duplicate maf id : " + mafid);
        final MafExtractor mafExtractor;
        if (e1.hasAttribute("attribute")) {
            final String tag = e1.getAttribute("attribute");
            if (StringUtil.isBlank(tag))
                throw new JvarkitException.XmlDomError(e1, "@attribute is empty");
            final List<AFExtractor> extractors = new AFExtractorFactory().parseFieldExtractors(tag);
            if (extractors.size() != 1) {
                throw new JvarkitException.XmlDomError(e1, "expected one AF extractor but got " + extractors);
            }
            final DefaultMafExtractor at = new DefaultMafExtractor(extractors.get(0));
            mafExtractor = at;
        } else {
            final GenotypeMafExtractor at = new GenotypeMafExtractor();
            mafExtractor = at;
        }
        id2mafExtractor.put(mafid, mafExtractor);
    }
    final List<CaseControlExtractor> excractors = new ArrayList<>();
    // parse handlers
    for (Node n1 = root.getFirstChild(); n1 != null; n1 = n1.getNextSibling()) {
        if (n1.getNodeType() != Node.ELEMENT_NODE)
            continue;
        Element e1 = Element.class.cast(n1);
        if (!e1.getNodeName().equals("handler"))
            continue;
        final CaseControlExtractor extractor = new CaseControlExtractor();
        extractor.name = e1.getAttribute("name");
        if (StringUtil.isBlank(extractor.name))
            throw new JvarkitException.XmlDomError(e1, "@name missing");
        extractor.name = this.prefix + extractor.name;
        if (excractors.stream().filter(F -> F.getName().equals(extractor.name)).findAny().isPresent()) {
            throw new JvarkitException.XmlDomError(e1, "duplicate name" + extractor.name);
        }
        for (Node n2 = n1.getFirstChild(); n2 != null; n2 = n2.getNextSibling()) {
            if (n2.getNodeType() != Node.ELEMENT_NODE)
                continue;
            Element e2 = Element.class.cast(n2);
            if (e2.getNodeName().equals("filter")) {
                final Predicate<VariantContext> expr;
                if (e2.hasAttribute("ref")) {
                    final String filterid = e2.getAttribute("ref");
                    JSPredigate predicate = id2variantFilter.get(filterid);
                    if (predicate == null) {
                        throw new JvarkitException.XmlDomError(e2, "no such filter:" + filterid);
                    }
                    expr = predicate;
                } else {
                    final String expressionstr = e2.getTextContent();
                    if (StringUtil.isBlank(expressionstr))
                        throw new JvarkitException.XmlDomError(e2, "expression missing");
                    expr = new JSPredigate(header, expressionstr);
                }
                extractor.variantPredicate = extractor.variantPredicate.and(expr);
            } else if (e2.getNodeName().equals("case") || e2.getNodeName().equals("ctrl")) {
                final MafExtractor mafextractor;
                if (e2.hasAttribute("ref")) {
                    final String mafid = e2.getAttribute("ref");
                    if (!id2mafExtractor.containsKey(e2.getAttribute(mafid))) {
                        throw new JvarkitException.XmlDomError(e2, "no such mafextractor:" + mafid);
                    }
                    mafextractor = id2mafExtractor.get(mafid);
                } else {
                    final GenotypeMafExtractor genotypeMafExtractor = new GenotypeMafExtractor();
                    mafextractor = genotypeMafExtractor;
                }
                if (e2.getNodeName().equals("case")) {
                    extractor.caseExtractor = mafextractor;
                } else {
                    extractor.ctrlExtractor = mafextractor;
                }
            } else {
                LOG.error("unknown XML element " + e2.getNodeName());
            }
        }
        excractors.add(extractor);
    }
    return excractors;
}
Also used : HashMap(java.util.HashMap) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Document(org.w3c.dom.Document) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) AFExtractor(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor)

Example 4 with AFExtractorFactory

use of com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory in project jvarkit by lindenb.

the class VcfAfInfoFilter method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator in, final VariantContextWriter out) {
    try {
        if (this.vcf_gnomad_nfe) {
            this.user_fields_str += ",gnomad_exome_AF_NFE,gnomad_genome_AF_NFE";
            this.user_fields_str += ",gnomad_exome_AC_NFE/gnomad_exome_AN_NFE," + "gnomad_genome_AC_NFE/gnomad_genome_AN_NFE";
        }
        if (!StringUtil.isBlank(this.deprecated_user_af_fields)) {
            LOG.warn("-af use is deprecated.");
            this.user_fields_str += "," + String.join(" , ", this.deprecated_user_af_fields.split("[,; \t\n]+"));
        }
        if (!StringUtil.isBlank(this.deprecated_user_af_fields)) {
            LOG.warn("-af use is deprecated.");
            this.user_fields_str += "," + String.join(" , ", this.deprecated_user_af_fields.split("[,; \t\n]+"));
        }
        if (!StringUtil.isBlank(this.deprecated_user_ac_an_fields)) {
            LOG.warn("--acn use is deprecated.");
            final String[] array = this.deprecated_user_ac_an_fields.split("[,; \t\n]+");
            int i = 0;
            while (i < array.length) {
                String s = array[i];
                if (StringUtil.isBlank(s)) {
                    i++;
                    continue;
                }
                if (s.contains("*")) {
                    this.user_fields_str += ";" + s;
                } else {
                    final String acf = s;
                    i++;
                    while (i < array.length) {
                        s = array[i];
                        if (StringUtil.isBlank(s)) {
                            i++;
                            continue;
                        }
                        break;
                    }
                    if (i == array.length) {
                        LOG.error("missing AN for " + acf + " in " + this.deprecated_user_ac_an_fields);
                        return -1;
                    }
                    final String anf = s;
                    this.user_fields_str += ";" + acf + "/" + anf;
                }
            }
        }
        final AFExtractorFactory afExtractorFactory = new AFExtractorFactory();
        final VCFHeader header = in.getHeader();
        final List<AFExtractor> afExtractors = new ArrayList<>(afExtractorFactory.parseFieldExtractors(this.user_fields_str));
        afExtractors.removeIf(T -> {
            if (!T.validateHeader(header) && !this.ignore_INFO_field_validation) {
                LOG.warn("Ignoring " + T + " because it's not valid.");
                return true;
            }
            return false;
        });
        if (afExtractors.isEmpty()) {
            LOG.warn("No extractor was defined !");
        }
        final Set<VCFHeaderLine> headerLines = new HashSet<>();
        final VCFFilterHeaderLine noAltVariantFilter;
        if (!StringUtil.isBlank(this.filterAllAltInGnomad)) {
            noAltVariantFilter = new VCFFilterHeaderLine(this.filterAllAltInGnomad, "All ALT alleles don't pass the " + this.user_af_minimum + " < gnomad treshold AF < " + this.user_af_maximum);
            headerLines.add(noAltVariantFilter);
        } else {
            noAltVariantFilter = null;
        }
        VCFStandardHeaderLines.addStandardFormatLines(headerLines, true, VCFConstants.GENOTYPE_FILTER_KEY);
        JVarkitVersion.getInstance().addMetaData(getClass().getSimpleName(), header);
        this.recalculator.setHeader(header);
        headerLines.stream().forEach(H -> header.addMetaDataLine(H));
        final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header).logger(LOG).build();
        out.writeHeader(header);
        while (in.hasNext()) {
            final VariantContext ctx = progress.apply(in.next());
            if (!ctx.isVariant()) {
                out.add(ctx);
                continue;
            }
            final List<Allele> alt_alleles = ctx.getAlternateAlleles();
            final Set<Allele> ok_alleles = new HashSet<>(alt_alleles);
            ok_alleles.remove(Allele.SPAN_DEL);
            for (final AFExtractor afExtractor : afExtractors) {
                if (ok_alleles.isEmpty())
                    break;
                final List<Double> afo_list = afExtractor.parse(ctx);
                if (afo_list == null)
                    continue;
                if (afo_list.size() != alt_alleles.size()) {
                    LOG.warn("in " + ctx.getContig() + ":" + ctx.getStart() + ":" + ctx.getReference() + " illegal number of AF values " + afExtractor);
                }
                for (int x = 0; x < afo_list.size() && x < alt_alleles.size(); ++x) {
                    final Double af = afo_list.get(x);
                    if (af == null)
                        continue;
                    if (af.doubleValue() < this.user_af_minimum || af.doubleValue() > this.user_af_maximum) {
                        if (this.filter_for_any_allele) {
                            ok_alleles.clear();
                        } else {
                            ok_alleles.remove(alt_alleles.get(x));
                        }
                    }
                }
            }
            if (ok_alleles.isEmpty()) {
                if (noAltVariantFilter != null) {
                    out.add(new VariantContextBuilder(ctx).filter(noAltVariantFilter.getID()).make());
                }
                continue;
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            final List<Genotype> genotypes = new ArrayList<>(ctx.getNSamples());
            for (final Genotype gt : ctx.getGenotypes()) {
                if (gt.isNoCall() || gt.isHomRef()) {
                    genotypes.add(gt);
                    continue;
                }
                boolean got_good_allele = false;
                boolean got_bad_alt = false;
                for (final Allele gta : gt.getAlleles()) {
                    if (!gta.isCalled() || gta.equals(Allele.SPAN_DEL)) {
                        continue;
                    } else if (gta.isReference() || ok_alleles.contains(gta)) {
                        got_good_allele = true;
                    } else if (alt_alleles.contains(gta)) {
                        got_bad_alt = true;
                    }
                }
                if (got_good_allele || !got_bad_alt) {
                    genotypes.add(gt);
                    continue;
                }
                if (StringUtil.isBlank(this.genotypeFilter)) {
                    genotypes.add(GenotypeBuilder.createMissing(gt.getSampleName(), gt.getPloidy()));
                } else {
                    genotypes.add(new GenotypeBuilder(gt).filter(this.genotypeFilter).make());
                }
            }
            vcb.genotypes(genotypes);
            out.add(this.recalculator.apply(vcb.make()));
        }
        progress.close();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) AFExtractor(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor)

Aggregations

AFExtractorFactory (com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory)4 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 AFExtractor (com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor)3 Genotype (htsjdk.variant.variantcontext.Genotype)3 ProgressFactory (com.github.lindenb.jvarkit.util.log.ProgressFactory)2 Allele (htsjdk.variant.variantcontext.Allele)2 VCFHeader (htsjdk.variant.vcf.VCFHeader)2 ArrayList (java.util.ArrayList)2 DynamicParameter (com.beust.jcommander.DynamicParameter)1 Parameter (com.beust.jcommander.Parameter)1 NumberAxis (com.github.lindenb.jvarkit.chart.NumberAxis)1 RExporter (com.github.lindenb.jvarkit.chart.RExporter)1 ScatterChart (com.github.lindenb.jvarkit.chart.ScatterChart)1 XYChart (com.github.lindenb.jvarkit.chart.XYChart)1 ArchiveFactory (com.github.lindenb.jvarkit.io.ArchiveFactory)1 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)1 NullOuputStream (com.github.lindenb.jvarkit.io.NullOuputStream)1 FractionConverter (com.github.lindenb.jvarkit.jcommander.converter.FractionConverter)1 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)1 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)1