Search in sources :

Example 1 with VepPrediction

use of com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction in project jvarkit by lindenb.

the class VcfToSql method read.

private void read(File filename) throws IOException {
    /* insert ATGC */
    this.alleleTable.insert(outputWriter, null, "A");
    this.alleleTable.insert(outputWriter, null, "C");
    this.alleleTable.insert(outputWriter, null, "G");
    this.alleleTable.insert(outputWriter, null, "T");
    /* insert this sample */
    this.vcfFileTable.insert(outputWriter, null, filename);
    final SelectStmt vcffile_id = new SelectStmt(this.vcfFileTable);
    final Map<String, SelectStmt> sample2sampleid = new HashMap<String, SelectStmt>();
    final Map<String, SelectStmt> filter2filterid = new HashMap<String, SelectStmt>();
    final Map<String, SelectStmt> chrom2chromId = new HashMap<String, SelectStmt>();
    final VcfIterator r = VCFUtils.createVcfIteratorFromFile(filename);
    final VCFHeader header = r.getHeader();
    /* parse samples */
    for (final String sampleName : header.getSampleNamesInOrder()) {
        this.sampleTable.insert(outputWriter, null, sampleName);
        SelectStmt sample_id = new SelectStmt(this.sampleTable, "name", sampleName);
        sample2sampleid.put(sampleName, sample_id);
        this.sample2fileTable.insert(outputWriter, null, vcffile_id, sample_id);
    }
    /* parse filters */
    for (final VCFFilterHeaderLine filter : header.getFilterLines()) {
        this.filterTable.insert(outputWriter, null, vcffile_id, filter.getID(), filter.getValue());
        filter2filterid.put(filter.getID(), new SelectStmt(this.filterTable, "name", filter.getID()));
    }
    filter2filterid.put(VCFConstants.PASSES_FILTERS_v4, new SelectStmt(this.filterTable, "name", VCFConstants.PASSES_FILTERS_v4));
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (dict == null) {
        throw new RuntimeException("dictionary missing in VCF");
    }
    /* parse sequence dict */
    for (final SAMSequenceRecord ssr : dict.getSequences()) {
        this.chromosomeTable.insert(outputWriter, null, vcffile_id, ssr.getSequenceName(), ssr.getSequenceLength());
        chrom2chromId.put(ssr.getSequenceName(), new SelectStmt(this.chromosomeTable, "name", ssr.getSequenceName()));
    }
    VepPredictionParser vepPredictionParser = new VepPredictionParserFactory(header).get();
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
    int nVariants = 0;
    while (r.hasNext()) {
        if (this.outputWriter.checkError())
            break;
        VariantContext var = progress.watch(r.next());
        ++nVariants;
        /* insert ref allele */
        this.alleleTable.insert(outputWriter, null, var.getReference().getBaseString());
        /* insert variant */
        this.variantTable.insert(outputWriter, null, vcffile_id, nVariants, chrom2chromId.get(var.getContig()), var.getStart(), (var.hasID() ? var.getID() : null), new SelectStmt(this.alleleTable, "bases", var.getReference().getBaseString()), (var.hasLog10PError() ? var.getPhredScaledQual() : null));
        SelectStmt variant_id = new SelectStmt(variantTable);
        /* insert alternate alleles */
        for (Allele alt : var.getAlternateAlleles()) {
            /* insert alt allele */
            this.alleleTable.insert(outputWriter, null, alt.getBaseString());
            this.variant2altTable.insert(outputWriter, null, variant_id, new SelectStmt(this.alleleTable, "bases", alt.getBaseString()));
        }
        /* insert filters */
        for (final String filter : var.getFilters()) {
            if (filter2filterid.get(filter) == null) {
                throw new IOException("VCF Error: filter " + filter + " is not defined in the VCF header.");
            }
            this.variant2filters.insert(outputWriter, null, variant_id, filter2filterid.get(filter));
        }
        if (!this.ignore_info) {
            for (final VepPrediction pred : vepPredictionParser.getPredictions(var)) {
            /*
					vepPrediction.insert(
							outputWriter,
							null,
							variant_id,
							pred.getEnsemblGene(),
							pred.getEnsemblTranscript(),
							pred.getEnsemblProtein(),
							pred.getSymbol()
							);
					SelectStmt pred_id = new SelectStmt(vepPrediction);
			
					for(SequenceOntologyTree.Term t: pred.getSOTerms())
						{
						String term=t.getAcn().replace(':', '_');
						soTermTable.insert(
								outputWriter,
								null,
								term,
								t.getAcn()
								);//for bioportal compatibility
						SelectStmt term_id = new SelectStmt(soTermTable,"acn",term);
						
						vepPrediction2so.insert(
							outputWriter,
							null,
							pred_id,
							term_id
							);
						}
					*/
            }
        }
        /* insert genotypes */
        for (final String sampleName : sample2sampleid.keySet()) {
            final Genotype g = var.getGenotype(sampleName);
            if (!g.isAvailable() || g.isNoCall())
                continue;
            genotypeTable.insert(outputWriter, null, variant_id, sample2sampleid.get(sampleName), g.isCalled() ? new SelectStmt(this.alleleTable, "bases", g.getAllele(0).getBaseString()) : null, g.isCalled() ? new SelectStmt(this.alleleTable, "bases", g.getAllele(1).getBaseString()) : null, g.hasDP() ? g.getDP() : null, g.hasGQ() ? g.getGQ() : null);
        }
    }
    r.close();
}
Also used : VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Allele(htsjdk.variant.variantcontext.Allele) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 2 with VepPrediction

use of com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction in project jvarkit by lindenb.

the class VCFComparePredictions method doWork.

@Override
public int doWork(List<String> args) {
    PrintWriter out = null;
    SortingCollection<LineAndFile> variants = null;
    try {
        if (args.isEmpty()) {
            LOG.error("Illegal number of arguments");
            return -1;
        }
        out = super.openFileOrStdoutAsPrintWriter(super.outputFile);
        variants = SortingCollection.newInstance(LineAndFile.class, new AbstractVCFCompareBase.LineAndFileCodec(), new AbstractVCFCompareBase.LineAndFileComparator(), super.sortingCollectionArgs.getMaxRecordsInRam(), super.sortingCollectionArgs.getTmpPaths());
        variants.setDestructiveIteration(true);
        for (final String filename : args) {
            LOG.info("Reading from " + filename);
            Input input = super.put(variants, filename);
            LOG.info("end reading " + input.filename);
        }
        List<PredictionTuple> predictionTuples = new ArrayList<PredictionTuple>(super.inputs.size());
        for (AbstractVCFCompareBase.Input input : this.inputs) {
            PredictionTuple predictionTuple = new PredictionTuple();
            predictionTuple.snpEffPredictionParser = new SnpEffPredictionParserFactory(input.codecAndHeader.header).get();
            predictionTuple.vepPredictionParser = new VepPredictionParserFactory(input.codecAndHeader.header).get();
            predictionTuples.add(predictionTuple);
        }
        List<AbstractVCFCompareBase.LineAndFile> row = new ArrayList<LineAndFile>(super.inputs.size());
        CloseableIterator<LineAndFile> iter = variants.iterator();
        final Comparator<LineAndFile> posCompare = (A, B) -> A.getContigPosRef().compareTo(B.getContigPosRef());
        for (; ; ) {
            LineAndFile rec = null;
            if (iter.hasNext()) {
                rec = iter.next();
            }
            if (rec == null || (!row.isEmpty() && posCompare.compare(row.get(0), rec) != 0)) {
                if (!row.isEmpty()) {
                    boolean printed = false;
                    VariantContext ctx = row.get(0).getContext();
                    if (row.size() != this.inputs.size()) {
                        startLine(out, ctx);
                        out.println("\tDiscordant number of variants");
                        printed = true;
                    }
                    for (int i = 0; i + 1 < row.size(); ++i) {
                        Input input1 = this.inputs.get(row.get(i).fileIdx);
                        VariantContext ctx1 = row.get(i).getContext();
                        PredictionTuple predtuple1 = predictionTuples.get(row.get(i).fileIdx);
                        List<VepPrediction> vepPredictions1 = predtuple1.vepPredictionParser.getPredictions(ctx1);
                        List<SnpEffPrediction> snpEffPredictions1 = predtuple1.snpEffPredictionParser.getPredictions(ctx1);
                        Set<SequenceOntologyTree.Term> so_vep_1 = getVepSoTerms(predtuple1.vepPredictionParser, ctx1);
                        Set<SequenceOntologyTree.Term> so_snpeff_1 = getSnpEffSoTerms(predtuple1.snpEffPredictionParser, ctx1);
                        for (int j = i + 1; j < row.size(); ++j) {
                            Input input2 = this.inputs.get(row.get(j).fileIdx);
                            VariantContext ctx2 = row.get(j).getContext();
                            PredictionTuple predtuple2 = predictionTuples.get(row.get(j).fileIdx);
                            List<VepPrediction> vepPredictions2 = predtuple2.vepPredictionParser.getPredictions(ctx2);
                            List<SnpEffPrediction> snpEffPredictions2 = predtuple2.snpEffPredictionParser.getPredictions(ctx2);
                            Set<SequenceOntologyTree.Term> so_vep_2 = getVepSoTerms(predtuple2.vepPredictionParser, ctx2);
                            Set<SequenceOntologyTree.Term> so_snpeff_2 = getSnpEffSoTerms(predtuple2.snpEffPredictionParser, ctx2);
                            if (vepPredictions1.size() != vepPredictions2.size()) {
                                startLine(out, ctx);
                                out.print("\tVEP discordant transcripts count");
                                out.print("\t" + input1.filename + ":" + vepPredictions1.size());
                                out.print("\t" + input2.filename + ":" + vepPredictions2.size());
                                out.println();
                                printed = true;
                            }
                            if (snpEffPredictions1.size() != snpEffPredictions2.size()) {
                                startLine(out, ctx);
                                out.print("\tSNPEFF discordant transcripts count");
                                out.print("\t" + input1.filename + ":" + snpEffPredictions1.size());
                                out.print("\t" + input2.filename + ":" + snpEffPredictions2.size());
                                out.println();
                                printed = true;
                            }
                            if (!unshared(so_vep_1, so_vep_2).isEmpty()) {
                                startLine(out, ctx);
                                out.print("\tVEP discordant SO:terms");
                                printDiscordantSO(out, input1, so_vep_1, input2, so_vep_2);
                                printed = true;
                            }
                            if (!unshared(so_snpeff_1, so_snpeff_2).isEmpty()) {
                                startLine(out, ctx);
                                out.print("\tSNPEFF discordant SO:terms");
                                printDiscordantSO(out, input1, so_snpeff_1, input2, so_snpeff_2);
                                printed = true;
                            }
                        }
                    }
                    if (!printed) {
                        startLine(out, ctx);
                        out.println("\tPASS");
                    }
                    row.clear();
                }
                if (rec == null)
                    break;
            }
            row.add(rec);
        }
        iter.close();
        out.flush();
        out.close();
        out = null;
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        try {
            if (variants != null)
                variants.cleanup();
        } catch (Exception err) {
        }
    }
}
Also used : PrintWriter(java.io.PrintWriter) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SortingCollection(htsjdk.samtools.util.SortingCollection) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SnpEffPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser.SnpEffPrediction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SnpEffPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory) List(java.util.List) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) VariantContext(htsjdk.variant.variantcontext.VariantContext) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) Comparator(java.util.Comparator) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) SnpEffPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser) CloserUtil(htsjdk.samtools.util.CloserUtil) VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) PrintWriter(java.io.PrintWriter) SnpEffPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser.SnpEffPrediction) SnpEffPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 3 with VepPrediction

use of com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction in project jvarkit by lindenb.

the class VcfGeneSplitter method getVariantKeys.

private Set<String> getVariantKeys(final VepPredictionParser vepPredictionParser, final VariantContext ctx) {
    final Set<String> keys = new HashSet<>();
    for (final VepPrediction pred : vepPredictionParser.getPredictions(ctx)) {
        String s = pred.getHGNC();
        if (!isEmpty(s) && !this.ignoreHgnc) {
            keys.add(String.format("HGNC_%s_%s", ctx.getContig(), s));
        }
        s = pred.getEnsemblGene();
        if (!isEmpty(s) && !this.ignoreEnsg) {
            keys.add(String.format("ENSG_%s_%s", ctx.getContig(), s));
        }
        /* same as feature 
				s= pred.getEnsemblTranscript();
				if(!isEmpty(s)) {
					keys.add(String.format("ENST_%s_%s",ctx.getContig(),s));
					}*/
        s = pred.getFeature();
        if (!isEmpty(s) && !this.ignoreFeature) {
            keys.add(String.format("FEATURE_%s_%s", ctx.getContig(), s));
            if ((s.startsWith("XM_") || s.startsWith("NM_")) && !this.ignoreRefSeq) {
                keys.add(String.format("REFSEQ_%s_%s", ctx.getContig(), s));
            } else if (s.startsWith("ENST_") && !this.ignoreEnst) {
                keys.add(String.format("ENST_%s_%s", ctx.getContig(), s));
            }
        }
        s = pred.getSymbol();
        if (!isEmpty(s) && !this.ignoreSymbol) {
            keys.add(String.format("SYMBOL_%s_%s", ctx.getContig(), s));
        }
        s = pred.getENSP();
        if (!isEmpty(s) && !this.ignoreENSP) {
            keys.add(String.format("ENSP_%s_%s", ctx.getContig(), s));
        }
    }
    return keys;
}
Also used : VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) HashSet(java.util.HashSet)

Example 4 with VepPrediction

use of com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction in project jvarkit by lindenb.

the class VcfToRdf method scanVCF.

private void scanVCF(final File filein) throws IOException {
    VcfIterator in = null;
    URI source = null;
    try {
        if (filein != null)
            source = filein.toURI();
        in = (filein == null ? VCFUtils.createVcfIteratorStdin() : VCFUtils.createVcfIteratorFromFile(filein));
        final VCFHeader header = in.getHeader();
        final VepPredictionParser vepPredictionParser = new VepPredictionParserFactory(header).get();
        writeHeader(header, source);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        while (in.hasNext()) {
            if (this.w.checkError()) {
                LOG.warn("I/O interruption");
                break;
            }
            final VariantContext ctx = progress.watch(in.next());
            /* Variant */
            final URI variant = URI.create("urn:variant/" + ctx.getContig() + ":" + ctx.getStart() + ":" + ctx.getReference().getBaseString());
            emit(variant, "rdf:type", "vcf:Variant", "vcf:chrom", URI.create("urn:chrom/" + ctx.getContig()), "vcf:position", ctx.getStart(), "vcf:ref", ctx.getReference().getBaseString(), "vcf:id", (ctx.hasID() ? ctx.getID() : null), "vcf:qual", (ctx.hasLog10PError() ? ctx.getPhredScaledQual() : null));
            if (this.printAlleles) {
                for (final Allele alt : ctx.getAlternateAlleles()) {
                    emit(variant, "vcf:alt", alt.getBaseString());
                }
            }
            if (this.printFilters) {
                for (final String f : ctx.getFilters()) {
                    emit(variant, "vcf:filter", URI.create("urn:filter/" + f));
                }
            }
            if (this.printVep) {
                for (final VepPrediction prediction : vepPredictionParser.getPredictions(ctx)) {
                /* 
					final List<Object> L=new ArrayList<>();
					L.add("rdf:type");L.add("vep:Prediction");
					L.add("vcf:variant"); L.add(variant);
					L.add("vcf:allele");L.add(prediction.getAllele().getBaseString());
					for(final SequenceOntologyTree.Term term:prediction.getSOTerms())
						{
						L.add("vcf:so");
						L.add(URI.create(term.getUri()));
						}
					if(prediction.getEnsemblTranscript()!=null)
						{
						final  URI transcriptid=URI.create("http://www.ensembl.org/id/"+prediction.getEnsemblTranscript());
						L.add("vep:transcript");
						L.add(transcriptid);

						
						if(prediction.getEnsemblGene()!=null)
							{
							emit(transcriptid,
								"uniprot:transcribedFrom",//used  in uniprot dump
								URI.create("http://www.ensembl.org/id/"+prediction.getEnsemblGene())
								);
							}
						
						if(prediction.getEnsemblProtein()!=null)
							{
							emit(
								transcriptid,
								"uniprot:translatedTo",//used  in uniprot dump
								URI.create("http://www.ensembl.org/id/"+prediction.getEnsemblProtein())
								);
							}
						}
					
					
					
					emit(
						URI.create("urn:vep/"+(++id_generator)),
						L.toArray()
						);
					*/
                }
            }
            if (this.printGenotypes) {
                for (final String sample : ctx.getSampleNames()) {
                    final Genotype g = ctx.getGenotype(sample);
                    final List<Object> L = new ArrayList<>();
                    L.add("vcf:sample");
                    L.add(URI.create("urn:sample/" + sample));
                    L.add("vcf:variant");
                    L.add(variant);
                    L.add("rdf:type");
                    L.add("vcf:Genotype");
                    if (g.hasDP()) {
                        L.add("vcf:dp");
                        L.add(g.getDP());
                    }
                    if (g.hasGQ()) {
                        L.add("vcf:gq");
                        L.add(g.getGQ());
                    }
                    if (g.isCalled()) {
                        if (g.isHet()) {
                            if (g.isHetNonRef()) {
                                L.add("rdf:type");
                                L.add("vcf:HetNonRefGenotype");
                            } else {
                                L.add("rdf:type");
                                L.add("vcf:HetGenotype");
                            }
                        } else if (g.isHom()) {
                            if (g.isHomRef()) {
                                L.add("rdf:type");
                                L.add("vcf:HomRefGenotype");
                            } else {
                                L.add("rdf:type");
                                L.add("vcf:HomVarGenotype");
                            }
                        }
                        for (final Allele a : g.getAlleles()) {
                            L.add("vcf:allele");
                            L.add(a.getBaseString());
                        }
                    }
                    emit(URI.create("urn:gt/" + ctx.getContig() + ":" + ctx.getStart() + ":" + ctx.getReference().getBaseString() + ":" + sample), L.toArray());
                }
            }
        }
        in.close();
        in = null;
        progress.finish();
    } catch (final Exception e) {
        throw new IOException(e);
    } finally {
        CloserUtil.close(in);
    }
}
Also used : VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) URI(java.net.URI) IOException(java.io.IOException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Allele(htsjdk.variant.variantcontext.Allele) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VCFHeader(htsjdk.variant.vcf.VCFHeader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 5 with VepPrediction

use of com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction in project jvarkit by lindenb.

the class VcfGeneOntology method filterVcfIterator.

private void filterVcfIterator(final VcfIterator in) throws IOException {
    VariantContextWriter w = null;
    try {
        VCFHeader header = in.getHeader();
        VCFHeader h2 = new VCFHeader(header);
        h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "GO terms from GO " + GO + " and GOA=" + GOA));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
        if (filterName != null) {
            h2.addMetaDataLine(new VCFFilterHeaderLine(filterName, "Flag  GO terms " + (inverse_filter ? " not descendant of " : "") + " the provided GO terms"));
        }
        w = super.openVariantContextWriter(outputFile);
        w.writeHeader(h2);
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
        final SnpEffPredictionParser snpEffPredictionParser = new SnpEffPredictionParserFactory().header(header).get();
        final VepPredictionParser vepPredictionParser = new VepPredictionParserFactory().header(header).get();
        while (in.hasNext()) {
            if (System.out.checkError())
                break;
            VariantContext ctx = progess.watch(in.next());
            /* symbols for this variant */
            Set<String> symbols = new HashSet<String>();
            /* scan SNPEFF gene */
            for (SnpEffPrediction pred : snpEffPredictionParser.getPredictions(ctx)) {
                String genName = pred.getGeneName();
                if (genName == null || genName.isEmpty())
                    continue;
                symbols.add(genName);
            }
            /* scan VEP gene */
            for (VepPrediction pred : vepPredictionParser.getPredictions(ctx)) {
                String genName = pred.getGeneName();
                if (!(genName == null || genName.isEmpty())) {
                    symbols.add(genName);
                }
                genName = pred.getGene();
                if (!(genName == null || genName.isEmpty())) {
                    symbols.add(genName);
                }
                genName = pred.getHGNC();
                if (!(genName == null || genName.isEmpty())) {
                    symbols.add(genName);
                }
            }
            /* only keep known GENES from GOA */
            symbols.retainAll(this.name2go.keySet());
            boolean found_child_of_filter = false;
            /* ATTS */
            List<String> atts = new ArrayList<String>();
            /* loop over symbols */
            for (String symbol : symbols) {
                /* go terms associated to this symbol */
                Set<GoTree.Term> t2 = this.name2go.get(symbol);
                if (t2 == null || t2.isEmpty())
                    continue;
                StringBuilder sb = new StringBuilder(symbol);
                sb.append("|");
                boolean first = true;
                for (GoTree.Term gt : t2) {
                    /* user gave terms to filter */
                    if (!found_child_of_filter && this.goTermToFilter != null) {
                        for (GoTree.Term userTerm : this.goTermToFilter) {
                            if (userTerm.hasDescendant(gt.getAcn())) {
                                found_child_of_filter = true;
                                break;
                            }
                        }
                    }
                    if (!first)
                        sb.append("&");
                    sb.append(gt.getAcn());
                    first = false;
                }
                atts.add(sb.toString());
            }
            /* no go term was found */
            if (atts.isEmpty()) {
                if (!removeIfNoGo) {
                    w.add(ctx);
                }
                continue;
            }
            VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            /* check children of user's terms */
            if (this.goTermToFilter != null) {
                /* keep if found children*/
                if ((this.inverse_filter && found_child_of_filter) || (!this.inverse_filter && !found_child_of_filter)) {
                    /* don't remove, but set filter */
                    if (this.filterName != null) {
                        Set<String> filters = new HashSet<String>(ctx.getFilters());
                        filters.add(this.filterName);
                        vcb.filters(filters);
                    } else {
                        continue;
                    }
                }
            }
            /* add go terms */
            vcb.attribute(this.TAG, atts);
            w.add(vcb.make());
        }
        progess.finish();
        w.close();
        w = null;
    } finally {
        CloserUtil.close(w);
        w = null;
    }
}
Also used : VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) SnpEffPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SnpEffPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser.SnpEffPrediction) GoTree(com.github.lindenb.jvarkit.util.go.GoTree) SnpEffPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Aggregations

VepPrediction (com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction)5 VepPredictionParser (com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser)4 VepPredictionParserFactory (com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)4 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)3 VCFHeader (htsjdk.variant.vcf.VCFHeader)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)2 SnpEffPredictionParser (com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser)2 SnpEffPrediction (com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser.SnpEffPrediction)2 SnpEffPredictionParserFactory (com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory)2 Allele (htsjdk.variant.variantcontext.Allele)2 Genotype (htsjdk.variant.variantcontext.Genotype)2 VCFFilterHeaderLine (htsjdk.variant.vcf.VCFFilterHeaderLine)2 IOException (java.io.IOException)2 GoTree (com.github.lindenb.jvarkit.util.go.GoTree)1 Program (com.github.lindenb.jvarkit.util.jcommander.Program)1 Logger (com.github.lindenb.jvarkit.util.log.Logger)1 SequenceOntologyTree (com.github.lindenb.jvarkit.util.so.SequenceOntologyTree)1