Search in sources :

Example 1 with AnnPredictionParser

use of com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser in project jvarkit by lindenb.

the class VcfBurdenFilterGenes method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final VCFHeader header = in.getHeader();
    try {
        final VCFHeader h2 = addMetaData(new VCFHeader(header));
        final VCFFilterHeaderLine filterControlsHeader;
        if (!StringUtil.isBlank(this.filterTag)) {
            filterControlsHeader = new VCFFilterHeaderLine(this.filterTag.trim(), "Genes not in list " + this.geneFile);
            h2.addMetaDataLine(filterControlsHeader);
        } else {
            filterControlsHeader = null;
        }
        final List<String> lookColumns = Arrays.asList("CCDS", "Feature", "ENSP", "Gene", "HGNC", "HGNC_ID", "SYMBOL", "RefSeq");
        final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
        final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary()).logger(LOG);
        out.writeHeader(h2);
        while (in.hasNext() && !out.checkError()) {
            final VariantContext ctx = progess.watch(in.next());
            boolean keep = false;
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            // not just set FILTER ?
            if (filterControlsHeader == null) {
                vcb.rmAttribute(vepParser.getTag());
                vcb.rmAttribute(annParser.getTag());
            }
            final List<String> newVepList = new ArrayList<>();
            for (final String predStr : ctx.getAttributeAsList(vepParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final VepPredictionParser.VepPrediction pred = vepParser.parseOnePrediction(ctx, predStr);
                for (final String col : lookColumns) {
                    final String token = pred.getByCol(col);
                    if (!StringUtil.isBlank(token) && this.geneNames.contains(token)) {
                        newVepList.add(predStr);
                        keep = true;
                        break;
                    }
                }
            }
            final List<String> newEffList = new ArrayList<>();
            for (final String predStr : ctx.getAttributeAsList(annParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final AnnPredictionParser.AnnPrediction pred = annParser.parseOnePrediction(predStr);
                final String token = pred.getGeneName();
                if (!StringUtil.isBlank(token) && this.geneNames.contains(token)) {
                    newEffList.add(predStr);
                    keep = true;
                    break;
                }
            }
            // not just set FILTER ?
            if (filterControlsHeader == null) {
                if (!newVepList.isEmpty())
                    vcb.attribute(vepParser.getTag(), newVepList);
                if (!newEffList.isEmpty())
                    vcb.attribute(annParser.getTag(), newEffList);
            }
            if (filterControlsHeader != null) {
                if (!keep) {
                    vcb.filter(filterControlsHeader.getID());
                } else if (!ctx.isFiltered()) {
                    vcb.passFilters();
                }
                out.add(vcb.make());
            } else {
                if (keep)
                    out.add(vcb.make());
            }
        }
        progess.finish();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
    }
}
Also used : AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringUtil(htsjdk.samtools.util.StringUtil) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Files(java.nio.file.Files) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 2 with AnnPredictionParser

use of com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser in project jvarkit by lindenb.

the class VCFComposite method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter out = null;
    try {
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        if (listModels) {
            for (final Type t : Type.values()) {
                out.println(t.name());
                out.println("\t" + t.getDescription());
            }
            out.flush();
            return 0;
        }
        this.pedigree = Pedigree.newParser().parse(pedigreeFile);
        if (this.pedigree.getAffected().isEmpty()) {
            LOG.error("No Affected sample in " + this.pedigreeFile);
            return -1;
        }
        if (this.pedigree.getUnaffected().isEmpty()) {
            LOG.error("No Unaffected sample in " + this.pedigreeFile);
            return -1;
        }
        final DiseaseModel model = this.createModel();
        final String inputName = super.oneFileOrNull(args);
        final LineIterator r = (inputName == null ? IOUtils.openStreamForLineIterator(stdin()) : IOUtils.openURIForLineIterator(inputName));
        final VCFCodec codec = new VCFCodec();
        final VCFHeader header = (VCFHeader) codec.readActualHeader(r);
        final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
        final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
        // final VCFHeader h2=new VCFHeader(header.getMetaDataInInputOrder(),header.getSampleNamesInOrder());
        // h2.addMetaDataLine(new VCFInfoHeaderLine(this.TAG,1,VCFHeaderLineType.String,"Values from bigwig file: "+BIGWIG));
        SortingCollection<GeneAndVariant> sorting = null;
        String prevContig = null;
        for (; ; ) {
            String line;
            final VariantContext ctx;
            if (r.hasNext()) {
                line = r.next();
                ctx = codec.decode(line);
            } else {
                line = null;
                ctx = null;
            }
            if (ctx == null || !ctx.getContig().equals(prevContig)) {
                if (sorting != null) {
                    LOG.debug("Dump contig " + prevContig);
                    sorting.doneAdding();
                    CloseableIterator<GeneAndVariant> iter2 = sorting.iterator();
                    EqualRangeIterator<GeneAndVariant> eqiter = new EqualRangeIterator<>(iter2, (A, B) -> A.gene.compareTo(B.gene));
                    while (eqiter.hasNext()) {
                        final List<GeneAndVariant> variants = eqiter.next();
                        model.scan(variants.get(0).gene, variants.stream().map(L -> codec.decode(L.ctxLine)).collect(Collectors.toList()), out);
                    }
                    eqiter.close();
                    iter2.close();
                    sorting.cleanup();
                }
                sorting = null;
                if (ctx == null)
                    break;
                prevContig = ctx.getContig();
            }
            if (!ctx.isVariant())
                continue;
            if (!acceptFiltered && ctx.isFiltered())
                continue;
            if (!acceptID && ctx.hasID())
                continue;
            if (!model.accept(ctx))
                continue;
            final Set<String> geneKeys = new HashSet<>();
            for (final AnnPredictionParser.AnnPrediction pred : annParser.getPredictions(ctx)) {
                geneKeys.addAll(pred.getGeneKeys().stream().map(S -> ctx.getContig() + "_" + S).collect(Collectors.toSet()));
            }
            for (final VepPredictionParser.VepPrediction pred : vepParser.getPredictions(ctx)) {
                geneKeys.addAll(pred.getGeneKeys().stream().map(S -> ctx.getContig() + "_" + S).collect(Collectors.toSet()));
            }
            if (sorting == null) {
                sorting = SortingCollection.newInstance(GeneAndVariant.class, new GeneAndVariantCodec(), (A, B) -> {
                    int i = A.gene.compareTo(B.gene);
                    if (i != 0)
                        return i;
                    return A.ctxLine.compareTo(B.ctxLine);
                }, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
                sorting.setDestructiveIteration(true);
            }
            for (final String gk : geneKeys) {
                final GeneAndVariant gav = new GeneAndVariant();
                gav.gene = gk;
                gav.ctxLine = line;
                sorting.add(gav);
            }
        }
        out.flush();
        out.close();
        out = null;
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) Genotype(htsjdk.variant.variantcontext.Genotype) DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) LineIterator(htsjdk.tribble.readers.LineIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) ParametersDelegate(com.beust.jcommander.ParametersDelegate) HashSet(java.util.HashSet) ContigPosRef(com.github.lindenb.jvarkit.util.vcf.ContigPosRef) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VCFCodec(htsjdk.variant.vcf.VCFCodec) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SortingCollection(htsjdk.samtools.util.SortingCollection) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet) VCFCodec(htsjdk.variant.vcf.VCFCodec) IOException(java.io.IOException) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 3 with AnnPredictionParser

use of com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser in project jvarkit by lindenb.

the class TestNg01 method testVcfFilterSo.

@Test
public void testVcfFilterSo() throws IOException {
    File output = new File(TEST_RESULTS_DIR, "jeter.filrerso.vcf");
    final AnnPredictionParser parser = new AnnPredictionParserFactory().createDefaultParser();
    final SequenceOntologyTree tree = SequenceOntologyTree.getInstance();
    String acn = "SO:0001583";
    final SequenceOntologyTree.Term term = tree.getTermByAcn(acn);
    final Set<SequenceOntologyTree.Term> terms = term.getAllDescendants();
    Assert.assertNotNull(term);
    Assert.assertTrue(terms.size() > 1);
    Assert.assertTrue(terms.contains(term));
    Assert.assertEquals(0, new VcfFilterSequenceOntology().instanceMain(new String[] { "-o", output.getPath(), "-A", acn, VCF01 }));
    streamVcf(output).forEach(V -> {
        // System.err.println(V.getAttribute("ANN")+" vs "+ terms);
        Assert.assertTrue(parser.getPredictions(V).stream().flatMap(P -> P.getSOTerms().stream()).anyMatch(T -> terms.contains(T)));
    });
    Assert.assertEquals(0, new VcfFilterSequenceOntology().instanceMain(new String[] { "-o", output.getPath(), "-A", acn, "--rmatt", "--invert", VCF01 }));
    streamVcf(output).forEach(V -> {
        Assert.assertFalse(parser.getPredictions(V).stream().flatMap(P -> P.getSOTerms().stream()).anyMatch(T -> terms.contains(T)));
    });
    Assert.assertEquals(0, new VcfFilterSequenceOntology().instanceMain(new String[] { "-o", output.getPath(), "-A", acn, "--rmatt", VCF01 }));
    Assert.assertTrue(streamVcf(output).findAny().isPresent());
    Assert.assertTrue(output.delete());
}
Also used : AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) Arrays(java.util.Arrays) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) VcfFilterJdk(com.github.lindenb.jvarkit.tools.vcffilterjs.VcfFilterJdk) Test(org.testng.annotations.Test) VcfBurdenFisherH(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFisherH) Vcf2Xml(com.github.lindenb.jvarkit.tools.vcf2xml.Vcf2Xml) VcfInjectPedigree(com.github.lindenb.jvarkit.tools.burden.VcfInjectPedigree) VcfToTable(com.github.lindenb.jvarkit.tools.misc.VcfToTable) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) VcfMultiToOneAllele(com.github.lindenb.jvarkit.tools.misc.VcfMultiToOneAllele) SAXParser(javax.xml.parsers.SAXParser) VcfNoCallToHomRef(com.github.lindenb.jvarkit.tools.misc.VcfNoCallToHomRef) VcfBurdenFisherV(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFisherV) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) VcfBurdenFilterGenes(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFilterGenes) Set(java.util.Set) PadEmptyFastq(com.github.lindenb.jvarkit.tools.misc.PadEmptyFastq) VcfMultiToOne(com.github.lindenb.jvarkit.tools.onesamplevcf.VcfMultiToOne) VcfOffsetsIndexFactory(com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory) Stream(java.util.stream.Stream) VcfBurdenFilterExac(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFilterExac) VcfMoveFiltersToInfo(com.github.lindenb.jvarkit.tools.burden.VcfMoveFiltersToInfo) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Bam2Raster(com.github.lindenb.jvarkit.tools.bam2graphics.Bam2Raster) SortVcfOnInfo(com.github.lindenb.jvarkit.tools.sortvcfonref.SortVcfOnInfo) TrapIndexer(com.github.lindenb.jvarkit.tools.trap.TrapIndexer) IterableAdapter(htsjdk.samtools.util.IterableAdapter) VcfTrap(com.github.lindenb.jvarkit.tools.trap.VcfTrap) ArrayList(java.util.ArrayList) FixVcfMissingGenotypes(com.github.lindenb.jvarkit.tools.misc.FixVcfMissingGenotypes) Assert(org.testng.Assert) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) VcfRebase(com.github.lindenb.jvarkit.tools.vcfrebase.VcfRebase) StreamSupport(java.util.stream.StreamSupport) Properties(java.util.Properties) Files(java.nio.file.Files) VCFBigWig(com.github.lindenb.jvarkit.tools.vcfbigwig.VCFBigWig) VcfGnomad(com.github.lindenb.jvarkit.tools.gnomad.VcfGnomad) IOException(java.io.IOException) File(java.io.File) DefaultHandler(org.xml.sax.helpers.DefaultHandler) VcfSetSequenceDictionary(com.github.lindenb.jvarkit.tools.misc.VcfSetSequenceDictionary) VcfCreateDictionary(com.github.lindenb.jvarkit.tools.misc.VcfCreateDictionary) NgsFilesSummary(com.github.lindenb.jvarkit.tools.ngsfiles.NgsFilesSummary) VcfBurdenRscriptV(com.github.lindenb.jvarkit.tools.burden.VcfBurdenRscriptV) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) BufferedReader(java.io.BufferedReader) GroupByGene(com.github.lindenb.jvarkit.tools.groupbygene.GroupByGene) VCFFamilies(com.github.lindenb.jvarkit.tools.vcftrios.VCFFamilies) ReferenceGenome(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenome) Algorithms(com.github.lindenb.jvarkit.util.Algorithms) Biostar84452(com.github.lindenb.jvarkit.tools.biostar.Biostar84452) VCFTrios(com.github.lindenb.jvarkit.tools.vcftrios.VCFTrios) URL(java.net.URL) LowResBam2Raster(com.github.lindenb.jvarkit.tools.bam2graphics.LowResBam2Raster) VCFBed(com.github.lindenb.jvarkit.tools.vcfbed.VCFBed) Random(java.util.Random) VcfToSql(com.github.lindenb.jvarkit.tools.vcf2sql.VcfToSql) MiniCaller(com.github.lindenb.jvarkit.tools.calling.MiniCaller) GoUtils(com.github.lindenb.jvarkit.tools.misc.GoUtils) FindAVariation(com.github.lindenb.jvarkit.tools.misc.FindAVariation) VcfXmlAmalgamation(com.github.lindenb.jvarkit.tools.vcfamalgation.VcfXmlAmalgamation) ImageIO(javax.imageio.ImageIO) BamToSql(com.github.lindenb.jvarkit.tools.misc.BamToSql) Gff2KnownGene(com.github.lindenb.jvarkit.tools.misc.Gff2KnownGene) VCFFixIndels(com.github.lindenb.jvarkit.tools.vcffixindels.VCFFixIndels) VCFStripAnnotations(com.github.lindenb.jvarkit.tools.vcfstripannot.VCFStripAnnotations) BufferedImage(java.awt.image.BufferedImage) Predicate(java.util.function.Predicate) BeforeClass(org.testng.annotations.BeforeClass) Collectors(java.util.stream.Collectors) IlluminaReadName(com.github.lindenb.jvarkit.tools.misc.IlluminaReadName) List(java.util.List) BackLocate(com.github.lindenb.jvarkit.tools.backlocate.BackLocate) VcfToSvg(com.github.lindenb.jvarkit.tools.misc.VcfToSvg) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) FastaSequenceReader(com.github.lindenb.jvarkit.util.bio.fasta.FastaSequenceReader) VcfList(com.github.lindenb.jvarkit.tools.vcflist.VcfList) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VcfFilterNotInPedigree(com.github.lindenb.jvarkit.tools.burden.VcfFilterNotInPedigree) VcfFilterSequenceOntology(com.github.lindenb.jvarkit.tools.vcffilterso.VcfFilterSequenceOntology) FindAllCoverageAtPosition(com.github.lindenb.jvarkit.tools.misc.FindAllCoverageAtPosition) VcfToRdf(com.github.lindenb.jvarkit.tools.vcf2rdf.VcfToRdf) DataProvider(org.testng.annotations.DataProvider) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) SAXParserFactory(javax.xml.parsers.SAXParserFactory) VcfLoopOverGenes(com.github.lindenb.jvarkit.tools.burden.VcfLoopOverGenes) VcfToHilbert(com.github.lindenb.jvarkit.tools.hilbert.VcfToHilbert) VcfStats(com.github.lindenb.jvarkit.tools.vcfstats.VcfStats) VcfRemoveUnusedAlt(com.github.lindenb.jvarkit.tools.misc.VcfRemoveUnusedAlt) FileInputStream(java.io.FileInputStream) CaseControlCanvas(com.github.lindenb.jvarkit.tools.burden.CaseControlCanvas) VcfCompareCallers(com.github.lindenb.jvarkit.tools.vcfcmp.VcfCompareCallers) ReferenceContig(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceContig) SamReader(htsjdk.samtools.SamReader) FastqShuffle(com.github.lindenb.jvarkit.tools.fastq.FastqShuffle) BamTile(com.github.lindenb.jvarkit.tools.misc.BamTile) VcfBurdenMAF(com.github.lindenb.jvarkit.tools.burden.VcfBurdenMAF) FastaSequence(com.github.lindenb.jvarkit.util.bio.fasta.FastaSequence) ConcatSam(com.github.lindenb.jvarkit.tools.misc.ConcatSam) Bam2Wig(com.github.lindenb.jvarkit.tools.bam2wig.Bam2Wig) FileReader(java.io.FileReader) VcfFilterSequenceOntology(com.github.lindenb.jvarkit.tools.vcffilterso.VcfFilterSequenceOntology) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) File(java.io.File) Test(org.testng.annotations.Test)

Example 4 with AnnPredictionParser

use of com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser in project jvarkit by lindenb.

the class VcfStage method buildAnnTableRow.

private TableView<AnnPredictionParser.AnnPrediction> buildAnnTableRow(final AnnPredictionParser parser) {
    final TableView<AnnPredictionParser.AnnPrediction> table = new TableView<>();
    if (parser.isValid()) {
        table.getColumns().add(makeColumn("SO", P -> P.getSOTermsString()));
        table.getColumns().add(makeColumn("Allele", P -> P.getAllele()));
        table.getColumns().add(makeColumn("Impact", P -> P.getPutativeImpact()));
        table.getColumns().add(makeColumn("GeneName", P -> P.getGeneName()));
        table.getColumns().add(makeColumn("GeneId", P -> P.getGeneId()));
        table.getColumns().add(makeColumn("Feature", P -> P.getFeatureType()));
        table.getColumns().add(makeColumn("FeatureId", P -> P.getFeatureId()));
        table.getColumns().add(makeColumn("Biotype", P -> P.getTranscriptBioType()));
        table.getColumns().add(makeColumn("HGVsc", P -> P.getHGVSc()));
        table.getColumns().add(makeColumn("Rank", P -> P.getRank()));
        table.getColumns().add(makeColumn("cDNA-pos", P -> P.getCDNAPos()));
        table.getColumns().add(makeColumn("CDS-pos", P -> P.getCDSPos()));
        table.getColumns().add(makeColumn("AA-pos", P -> P.getAAPos()));
        table.getColumns().add(makeColumn("Distance", P -> P.getDistance()));
        table.getColumns().add(makeColumn("Msg", P -> P.getMessages()));
    }
    table.setPlaceholder(new Label("No ANN prediction available"));
    return table;
}
Also used : Arrays(java.util.Arrays) VCFHeader(htsjdk.variant.vcf.VCFHeader) ChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.ChartFactory) VariantContextChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.VariantContextChartFactory) ScrollPane(javafx.scene.control.ScrollPane) TabPane(javafx.scene.control.TabPane) ReadOnlyObjectWrapper(javafx.beans.property.ReadOnlyObjectWrapper) VariantDepthChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.VariantDepthChartFactory) Map(java.util.Map) AlleleFrequencyChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.AlleleFrequencyChartFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) Rectangle2D(javafx.geometry.Rectangle2D) SplitPane(javafx.scene.control.SplitPane) PropertyValueFactory(javafx.scene.control.cell.PropertyValueFactory) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) GraphicsContext(javafx.scene.canvas.GraphicsContext) AFByPopulationChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.AFByPopulationChartFactory) TiTvChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.TiTvChartFactory) Set(java.util.Set) Screen(javafx.stage.Screen) CellDataFeatures(javafx.scene.control.TableColumn.CellDataFeatures) ArcType(javafx.scene.shape.ArcType) Separator(javafx.scene.control.Separator) PieChart(javafx.scene.chart.PieChart) BooleanProperty(javafx.beans.property.BooleanProperty) FlowPane(javafx.scene.layout.FlowPane) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CheckBoxTableCell(javafx.scene.control.cell.CheckBoxTableCell) ObservableList(javafx.collections.ObservableList) BorderPane(javafx.scene.layout.BorderPane) Genotype(htsjdk.variant.variantcontext.Genotype) CloseableIterator(htsjdk.samtools.util.CloseableIterator) OutputType(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder.OutputType) FXCollections(javafx.collections.FXCollections) TextFlow(javafx.scene.text.TextFlow) Supplier(java.util.function.Supplier) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) TabClosingPolicy(javafx.scene.control.TabPane.TabClosingPolicy) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) VCFConstants(htsjdk.variant.vcf.VCFConstants) Counter(com.github.lindenb.jvarkit.util.Counter) Color(javafx.scene.paint.Color) CheckBox(javafx.scene.control.CheckBox) IOException(java.io.IOException) AFBySexChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.AFBySexChartFactory) File(java.io.File) Menu(javafx.scene.control.Menu) FileChooser(javafx.stage.FileChooser) Tab(javafx.scene.control.Tab) CompiledScript(javax.script.CompiledScript) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) ObservableValue(javafx.beans.value.ObservableValue) VariantTypeChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.VariantTypeChartFactory) EventHandler(javafx.event.EventHandler) Button(javafx.scene.control.Button) Allele(htsjdk.variant.variantcontext.Allele) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VBox(javafx.scene.layout.VBox) AlertType(javafx.scene.control.Alert.AlertType) ContextMenu(javafx.scene.control.ContextMenu) WindowEvent(javafx.stage.WindowEvent) TableView(javafx.scene.control.TableView) Orientation(javafx.geometry.Orientation) Alert(javafx.scene.control.Alert) HBox(javafx.scene.layout.HBox) TextField(javafx.scene.control.TextField) PatternSyntaxException(java.util.regex.PatternSyntaxException) MenuItem(javafx.scene.control.MenuItem) Predicate(java.util.function.Predicate) VariantQualChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.VariantQualChartFactory) GenotypeType(htsjdk.variant.variantcontext.GenotypeType) Font(javafx.scene.text.Font) Collectors(java.util.stream.Collectors) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) Text(javafx.scene.text.Text) List(java.util.List) Paint(javafx.scene.paint.Paint) Term(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree.Term) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Scene(javafx.scene.Scene) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) ButtonType(javafx.scene.control.ButtonType) Function(java.util.function.Function) TableColumn(javafx.scene.control.TableColumn) Interval(htsjdk.samtools.util.Interval) Insets(javafx.geometry.Insets) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) Callback(javafx.util.Callback) GenotypeTypeChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.GenotypeTypeChartFactory) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Label(javafx.scene.control.Label) ActionEvent(javafx.event.ActionEvent) SimpleBooleanProperty(javafx.beans.property.SimpleBooleanProperty) SpinnerValueFactory(javafx.scene.control.SpinnerValueFactory) ExtensionFilter(javafx.stage.FileChooser.ExtensionFilter) Collections(java.util.Collections) Label(javafx.scene.control.Label) TableView(javafx.scene.control.TableView)

Example 5 with AnnPredictionParser

use of com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser in project jvarkit by lindenb.

the class VcfBurdenGoEnrichment method doWork.

@Override
public int doWork(final List<String> args) {
    if (StringUtil.isBlank(this.readingGo.goUri)) {
        LOG.error("Undefined GOs uri.");
        return -1;
    }
    if (this.geneFile == null || !this.geneFile.exists()) {
        LOG.error("Undefined gene file option.");
        return -1;
    }
    try {
        final GoTree gotree = this.readingGo.createParser().setIgnoreDbXRef(true).parse(this.readingGo.goUri);
        List<GoTree.Term> terms = new ArrayList<>(gotree.getTerms());
        final Map<GoTree.Term, Node> term2node = new HashMap<>();
        // build the node TREE
        while (!terms.isEmpty()) {
            int i = 0;
            while (i < terms.size()) {
                final GoTree.Term t = terms.get(i);
                if (!t.hasRelations()) {
                    term2node.put(t, new Node(t));
                    terms.remove(i);
                } else if (t.getRelations().stream().allMatch(L -> term2node.containsKey(L.getTo()))) {
                    final Node n = new Node(t);
                    n.parents.addAll(t.getRelations().stream().map(L -> term2node.get(L.getTo())).collect(Collectors.toSet()));
                    term2node.put(t, n);
                    terms.remove(i);
                } else {
                    i++;
                }
            }
        }
        terms = null;
        final Set<String> unknownAcn = new HashSet<>();
        final Map<String, Set<Node>> gene2node = new HashMap<>();
        final BufferedReader r = IOUtils.openFileForBufferedReading(this.geneFile);
        String line;
        while ((line = r.readLine()) != null) {
            if (line.isEmpty() || line.startsWith("#"))
                continue;
            final int t = line.indexOf('\t');
            if (t == -1) {
                r.close();
                LOG.error("tab missing in " + line + " of " + this.geneFile);
                return -1;
            }
            final String gene = line.substring(0, t).trim();
            if (StringUtil.isBlank(gene)) {
                r.close();
                LOG.error("Emtpy gene in " + line);
                return -1;
            }
            // using getTermByName because found sysnonym in GOA
            final String termAcn = line.substring(t + 1).trim();
            if (unknownAcn.contains(termAcn))
                continue;
            final GoTree.Term term = gotree.getTermByName(termAcn);
            if (term == null && !unknownAcn.contains(termAcn)) {
                unknownAcn.add(termAcn);
                LOG.warning("Don't know this GO term in " + line + " of " + this.geneFile + ". Could be obsolete, synonym, go specific division. Skipping.");
                continue;
            }
            final Node node = term2node.get(term);
            if (node == null) {
                r.close();
                LOG.error("Don't know this node in " + line + " of " + this.geneFile);
                return -1;
            }
            Set<Node> nodes = gene2node.get(gene);
            if (nodes == null) {
                nodes = new HashSet<>();
                gene2node.put(gene, nodes);
            }
            node.numGenes++;
            nodes.add(node);
        }
        ;
        // clean up
        unknownAcn.clear();
        r.close();
        final VcfIterator iter = openVcfIterator(oneFileOrNull(args));
        final VCFHeader header = iter.getHeader();
        final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
        final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
        final Set<Pedigree.Person> persons;
        if (this.pedFile != null) {
            final Pedigree pedigree = Pedigree.newParser().parse(this.pedFile);
            persons = new Pedigree.CaseControlExtractor().extract(header, pedigree);
        } else {
            persons = new Pedigree.CaseControlExtractor().extract(header);
        }
        final Set<Pedigree.Person> affected = persons.stream().filter(P -> P.isAffected()).collect(Collectors.toSet());
        final Set<Pedigree.Person> unaffected = persons.stream().filter(P -> P.isUnaffected()).collect(Collectors.toSet());
        if (affected.isEmpty()) {
            LOG.error("No Affected individual");
            return -1;
        }
        if (unaffected.isEmpty()) {
            LOG.error("No unaffected individual");
            return -1;
        }
        final List<String> lookColumns = Arrays.asList("CCDS", "Feature", "ENSP", "Gene", "HGNC", "HGNC_ID", "SYMBOL", "RefSeq");
        final Predicate<Genotype> isWildGenotype = G -> {
            if (G == null)
                return false;
            return G.isHomRef();
        };
        final Predicate<Genotype> isAltGenotype = G -> {
            if (G == null)
                return false;
            return G.isCalled() && !G.isHomRef();
        };
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            if (!this.variantFilter.test(ctx))
                continue;
            final Set<String> genes = new HashSet<>();
            for (final String predStr : ctx.getAttributeAsList(vepParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final VepPredictionParser.VepPrediction pred = vepParser.parseOnePrediction(ctx, predStr);
                for (final String col : lookColumns) {
                    final String token = pred.getByCol(col);
                    if (!StringUtil.isBlank(token)) {
                        genes.add(token);
                    }
                }
            }
            for (final String predStr : ctx.getAttributeAsList(annParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final AnnPredictionParser.AnnPrediction pred = annParser.parseOnePrediction(predStr);
                final String token = pred.getGeneName();
                if (!StringUtil.isBlank(token)) {
                    genes.add(token);
                }
            }
            if (genes.isEmpty())
                continue;
            final Set<Node> nodes = genes.stream().filter(G -> gene2node.containsKey(G)).flatMap(G -> gene2node.get(G).stream()).collect(Collectors.toSet());
            if (nodes.isEmpty())
                continue;
            final long unaffected_alt = unaffected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isAltGenotype).count();
            final long affected_alt = affected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isAltGenotype).count();
            /* no informative */
            if (unaffected_alt + affected_alt == 0L) {
                continue;
            }
            final long affected_ref = affected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isWildGenotype).count();
            final long unaffected_ref = unaffected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isWildGenotype).count();
            nodes.stream().forEach(N -> N.resetVisitedFlag());
            nodes.stream().forEach(N -> N.visit(unaffected_ref, unaffected_alt, affected_ref, affected_alt));
        }
        iter.close();
        progress.finish();
        LOG.info("Calculating Fisher and dumping.. please wait");
        final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        pw.println("#go_term\tfisher\tname\tgo_term_depth\tcount_genes_in_this_node" + "\tunaffected_ref_gt" + "\tunaffected_alt_gt" + "\taffected_ref_gt" + "\taffected_alt_gt");
        term2node.values().stream().filter(N -> this.show_never_seeen_term || N.sum() > 0L).sorted((n1, n2) -> Double.compare(n1.fisher(), n2.fisher())).forEach(N -> {
            pw.print(N.goTerm.getAcn());
            pw.print('\t');
            pw.print(N.fisher());
            pw.print("\t");
            pw.print(N.goTerm.getName().replaceAll("[ \',\\-]+", "_"));
            pw.print("\t");
            pw.print(N.goTerm.getMinDepth());
            pw.print('\t');
            pw.print(N.numGenes);
            pw.print('\t');
            pw.print(N.unaffected_ref);
            pw.print('\t');
            pw.print(N.unaffected_alt);
            pw.print('\t');
            pw.print(N.affected_ref);
            pw.print('\t');
            pw.print(N.affected_alt);
            pw.println();
        });
        pw.flush();
        pw.close();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Arrays(java.util.Arrays) JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) StringUtil(htsjdk.samtools.util.StringUtil) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) PrintWriter(java.io.PrintWriter) JexlGenotypePredicate(com.github.lindenb.jvarkit.util.vcf.JexlGenotypePredicate) GoTree(com.github.lindenb.jvarkit.util.go.GoTree) Predicate(java.util.function.Predicate) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) PrintWriter(java.io.PrintWriter) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Genotype(htsjdk.variant.variantcontext.Genotype) GoTree(com.github.lindenb.jvarkit.util.go.GoTree) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) BufferedReader(java.io.BufferedReader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Aggregations

AnnPredictionParser (com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser)5 AnnPredictionParserFactory (com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory)5 VariantContext (htsjdk.variant.variantcontext.VariantContext)5 VCFHeader (htsjdk.variant.vcf.VCFHeader)5 File (java.io.File)5 List (java.util.List)5 Set (java.util.Set)5 Collectors (java.util.stream.Collectors)5 VepPredictionParser (com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser)4 VepPredictionParserFactory (com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)4 CloserUtil (htsjdk.samtools.util.CloserUtil)4 ArrayList (java.util.ArrayList)4 Arrays (java.util.Arrays)4 Predicate (java.util.function.Predicate)4 Parameter (com.beust.jcommander.Parameter)3 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)3 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)3 Program (com.github.lindenb.jvarkit.util.jcommander.Program)3 Logger (com.github.lindenb.jvarkit.util.log.Logger)3 CloseableIterator (htsjdk.samtools.util.CloseableIterator)3