Search in sources :

Example 1 with VepPredictionParserFactory

use of com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory in project jvarkit by lindenb.

the class VcfBurdenFilterGenes method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final VCFHeader header = in.getHeader();
    try {
        final VCFHeader h2 = addMetaData(new VCFHeader(header));
        final VCFFilterHeaderLine filterControlsHeader;
        if (!StringUtil.isBlank(this.filterTag)) {
            filterControlsHeader = new VCFFilterHeaderLine(this.filterTag.trim(), "Genes not in list " + this.geneFile);
            h2.addMetaDataLine(filterControlsHeader);
        } else {
            filterControlsHeader = null;
        }
        final List<String> lookColumns = Arrays.asList("CCDS", "Feature", "ENSP", "Gene", "HGNC", "HGNC_ID", "SYMBOL", "RefSeq");
        final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
        final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary()).logger(LOG);
        out.writeHeader(h2);
        while (in.hasNext() && !out.checkError()) {
            final VariantContext ctx = progess.watch(in.next());
            boolean keep = false;
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            // not just set FILTER ?
            if (filterControlsHeader == null) {
                vcb.rmAttribute(vepParser.getTag());
                vcb.rmAttribute(annParser.getTag());
            }
            final List<String> newVepList = new ArrayList<>();
            for (final String predStr : ctx.getAttributeAsList(vepParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final VepPredictionParser.VepPrediction pred = vepParser.parseOnePrediction(ctx, predStr);
                for (final String col : lookColumns) {
                    final String token = pred.getByCol(col);
                    if (!StringUtil.isBlank(token) && this.geneNames.contains(token)) {
                        newVepList.add(predStr);
                        keep = true;
                        break;
                    }
                }
            }
            final List<String> newEffList = new ArrayList<>();
            for (final String predStr : ctx.getAttributeAsList(annParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final AnnPredictionParser.AnnPrediction pred = annParser.parseOnePrediction(predStr);
                final String token = pred.getGeneName();
                if (!StringUtil.isBlank(token) && this.geneNames.contains(token)) {
                    newEffList.add(predStr);
                    keep = true;
                    break;
                }
            }
            // not just set FILTER ?
            if (filterControlsHeader == null) {
                if (!newVepList.isEmpty())
                    vcb.attribute(vepParser.getTag(), newVepList);
                if (!newEffList.isEmpty())
                    vcb.attribute(annParser.getTag(), newEffList);
            }
            if (filterControlsHeader != null) {
                if (!keep) {
                    vcb.filter(filterControlsHeader.getID());
                } else if (!ctx.isFiltered()) {
                    vcb.passFilters();
                }
                out.add(vcb.make());
            } else {
                if (keep)
                    out.add(vcb.make());
            }
        }
        progess.finish();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
    }
}
Also used : AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringUtil(htsjdk.samtools.util.StringUtil) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Files(java.nio.file.Files) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 2 with VepPredictionParserFactory

use of com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory in project jvarkit by lindenb.

the class VcfToSql method read.

private void read(File filename) throws IOException {
    /* insert ATGC */
    this.alleleTable.insert(outputWriter, null, "A");
    this.alleleTable.insert(outputWriter, null, "C");
    this.alleleTable.insert(outputWriter, null, "G");
    this.alleleTable.insert(outputWriter, null, "T");
    /* insert this sample */
    this.vcfFileTable.insert(outputWriter, null, filename);
    final SelectStmt vcffile_id = new SelectStmt(this.vcfFileTable);
    final Map<String, SelectStmt> sample2sampleid = new HashMap<String, SelectStmt>();
    final Map<String, SelectStmt> filter2filterid = new HashMap<String, SelectStmt>();
    final Map<String, SelectStmt> chrom2chromId = new HashMap<String, SelectStmt>();
    final VcfIterator r = VCFUtils.createVcfIteratorFromFile(filename);
    final VCFHeader header = r.getHeader();
    /* parse samples */
    for (final String sampleName : header.getSampleNamesInOrder()) {
        this.sampleTable.insert(outputWriter, null, sampleName);
        SelectStmt sample_id = new SelectStmt(this.sampleTable, "name", sampleName);
        sample2sampleid.put(sampleName, sample_id);
        this.sample2fileTable.insert(outputWriter, null, vcffile_id, sample_id);
    }
    /* parse filters */
    for (final VCFFilterHeaderLine filter : header.getFilterLines()) {
        this.filterTable.insert(outputWriter, null, vcffile_id, filter.getID(), filter.getValue());
        filter2filterid.put(filter.getID(), new SelectStmt(this.filterTable, "name", filter.getID()));
    }
    filter2filterid.put(VCFConstants.PASSES_FILTERS_v4, new SelectStmt(this.filterTable, "name", VCFConstants.PASSES_FILTERS_v4));
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (dict == null) {
        throw new RuntimeException("dictionary missing in VCF");
    }
    /* parse sequence dict */
    for (final SAMSequenceRecord ssr : dict.getSequences()) {
        this.chromosomeTable.insert(outputWriter, null, vcffile_id, ssr.getSequenceName(), ssr.getSequenceLength());
        chrom2chromId.put(ssr.getSequenceName(), new SelectStmt(this.chromosomeTable, "name", ssr.getSequenceName()));
    }
    VepPredictionParser vepPredictionParser = new VepPredictionParserFactory(header).get();
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
    int nVariants = 0;
    while (r.hasNext()) {
        if (this.outputWriter.checkError())
            break;
        VariantContext var = progress.watch(r.next());
        ++nVariants;
        /* insert ref allele */
        this.alleleTable.insert(outputWriter, null, var.getReference().getBaseString());
        /* insert variant */
        this.variantTable.insert(outputWriter, null, vcffile_id, nVariants, chrom2chromId.get(var.getContig()), var.getStart(), (var.hasID() ? var.getID() : null), new SelectStmt(this.alleleTable, "bases", var.getReference().getBaseString()), (var.hasLog10PError() ? var.getPhredScaledQual() : null));
        SelectStmt variant_id = new SelectStmt(variantTable);
        /* insert alternate alleles */
        for (Allele alt : var.getAlternateAlleles()) {
            /* insert alt allele */
            this.alleleTable.insert(outputWriter, null, alt.getBaseString());
            this.variant2altTable.insert(outputWriter, null, variant_id, new SelectStmt(this.alleleTable, "bases", alt.getBaseString()));
        }
        /* insert filters */
        for (final String filter : var.getFilters()) {
            if (filter2filterid.get(filter) == null) {
                throw new IOException("VCF Error: filter " + filter + " is not defined in the VCF header.");
            }
            this.variant2filters.insert(outputWriter, null, variant_id, filter2filterid.get(filter));
        }
        if (!this.ignore_info) {
            for (final VepPrediction pred : vepPredictionParser.getPredictions(var)) {
            /*
					vepPrediction.insert(
							outputWriter,
							null,
							variant_id,
							pred.getEnsemblGene(),
							pred.getEnsemblTranscript(),
							pred.getEnsemblProtein(),
							pred.getSymbol()
							);
					SelectStmt pred_id = new SelectStmt(vepPrediction);
			
					for(SequenceOntologyTree.Term t: pred.getSOTerms())
						{
						String term=t.getAcn().replace(':', '_');
						soTermTable.insert(
								outputWriter,
								null,
								term,
								t.getAcn()
								);//for bioportal compatibility
						SelectStmt term_id = new SelectStmt(soTermTable,"acn",term);
						
						vepPrediction2so.insert(
							outputWriter,
							null,
							pred_id,
							term_id
							);
						}
					*/
            }
        }
        /* insert genotypes */
        for (final String sampleName : sample2sampleid.keySet()) {
            final Genotype g = var.getGenotype(sampleName);
            if (!g.isAvailable() || g.isNoCall())
                continue;
            genotypeTable.insert(outputWriter, null, variant_id, sample2sampleid.get(sampleName), g.isCalled() ? new SelectStmt(this.alleleTable, "bases", g.getAllele(0).getBaseString()) : null, g.isCalled() ? new SelectStmt(this.alleleTable, "bases", g.getAllele(1).getBaseString()) : null, g.hasDP() ? g.getDP() : null, g.hasGQ() ? g.getGQ() : null);
        }
    }
    r.close();
}
Also used : VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Allele(htsjdk.variant.variantcontext.Allele) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 3 with VepPredictionParserFactory

use of com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory in project jvarkit by lindenb.

the class VCFComparePredictions method doWork.

@Override
public int doWork(List<String> args) {
    PrintWriter out = null;
    SortingCollection<LineAndFile> variants = null;
    try {
        if (args.isEmpty()) {
            LOG.error("Illegal number of arguments");
            return -1;
        }
        out = super.openFileOrStdoutAsPrintWriter(super.outputFile);
        variants = SortingCollection.newInstance(LineAndFile.class, new AbstractVCFCompareBase.LineAndFileCodec(), new AbstractVCFCompareBase.LineAndFileComparator(), super.sortingCollectionArgs.getMaxRecordsInRam(), super.sortingCollectionArgs.getTmpPaths());
        variants.setDestructiveIteration(true);
        for (final String filename : args) {
            LOG.info("Reading from " + filename);
            Input input = super.put(variants, filename);
            LOG.info("end reading " + input.filename);
        }
        List<PredictionTuple> predictionTuples = new ArrayList<PredictionTuple>(super.inputs.size());
        for (AbstractVCFCompareBase.Input input : this.inputs) {
            PredictionTuple predictionTuple = new PredictionTuple();
            predictionTuple.snpEffPredictionParser = new SnpEffPredictionParserFactory(input.codecAndHeader.header).get();
            predictionTuple.vepPredictionParser = new VepPredictionParserFactory(input.codecAndHeader.header).get();
            predictionTuples.add(predictionTuple);
        }
        List<AbstractVCFCompareBase.LineAndFile> row = new ArrayList<LineAndFile>(super.inputs.size());
        CloseableIterator<LineAndFile> iter = variants.iterator();
        final Comparator<LineAndFile> posCompare = (A, B) -> A.getContigPosRef().compareTo(B.getContigPosRef());
        for (; ; ) {
            LineAndFile rec = null;
            if (iter.hasNext()) {
                rec = iter.next();
            }
            if (rec == null || (!row.isEmpty() && posCompare.compare(row.get(0), rec) != 0)) {
                if (!row.isEmpty()) {
                    boolean printed = false;
                    VariantContext ctx = row.get(0).getContext();
                    if (row.size() != this.inputs.size()) {
                        startLine(out, ctx);
                        out.println("\tDiscordant number of variants");
                        printed = true;
                    }
                    for (int i = 0; i + 1 < row.size(); ++i) {
                        Input input1 = this.inputs.get(row.get(i).fileIdx);
                        VariantContext ctx1 = row.get(i).getContext();
                        PredictionTuple predtuple1 = predictionTuples.get(row.get(i).fileIdx);
                        List<VepPrediction> vepPredictions1 = predtuple1.vepPredictionParser.getPredictions(ctx1);
                        List<SnpEffPrediction> snpEffPredictions1 = predtuple1.snpEffPredictionParser.getPredictions(ctx1);
                        Set<SequenceOntologyTree.Term> so_vep_1 = getVepSoTerms(predtuple1.vepPredictionParser, ctx1);
                        Set<SequenceOntologyTree.Term> so_snpeff_1 = getSnpEffSoTerms(predtuple1.snpEffPredictionParser, ctx1);
                        for (int j = i + 1; j < row.size(); ++j) {
                            Input input2 = this.inputs.get(row.get(j).fileIdx);
                            VariantContext ctx2 = row.get(j).getContext();
                            PredictionTuple predtuple2 = predictionTuples.get(row.get(j).fileIdx);
                            List<VepPrediction> vepPredictions2 = predtuple2.vepPredictionParser.getPredictions(ctx2);
                            List<SnpEffPrediction> snpEffPredictions2 = predtuple2.snpEffPredictionParser.getPredictions(ctx2);
                            Set<SequenceOntologyTree.Term> so_vep_2 = getVepSoTerms(predtuple2.vepPredictionParser, ctx2);
                            Set<SequenceOntologyTree.Term> so_snpeff_2 = getSnpEffSoTerms(predtuple2.snpEffPredictionParser, ctx2);
                            if (vepPredictions1.size() != vepPredictions2.size()) {
                                startLine(out, ctx);
                                out.print("\tVEP discordant transcripts count");
                                out.print("\t" + input1.filename + ":" + vepPredictions1.size());
                                out.print("\t" + input2.filename + ":" + vepPredictions2.size());
                                out.println();
                                printed = true;
                            }
                            if (snpEffPredictions1.size() != snpEffPredictions2.size()) {
                                startLine(out, ctx);
                                out.print("\tSNPEFF discordant transcripts count");
                                out.print("\t" + input1.filename + ":" + snpEffPredictions1.size());
                                out.print("\t" + input2.filename + ":" + snpEffPredictions2.size());
                                out.println();
                                printed = true;
                            }
                            if (!unshared(so_vep_1, so_vep_2).isEmpty()) {
                                startLine(out, ctx);
                                out.print("\tVEP discordant SO:terms");
                                printDiscordantSO(out, input1, so_vep_1, input2, so_vep_2);
                                printed = true;
                            }
                            if (!unshared(so_snpeff_1, so_snpeff_2).isEmpty()) {
                                startLine(out, ctx);
                                out.print("\tSNPEFF discordant SO:terms");
                                printDiscordantSO(out, input1, so_snpeff_1, input2, so_snpeff_2);
                                printed = true;
                            }
                        }
                    }
                    if (!printed) {
                        startLine(out, ctx);
                        out.println("\tPASS");
                    }
                    row.clear();
                }
                if (rec == null)
                    break;
            }
            row.add(rec);
        }
        iter.close();
        out.flush();
        out.close();
        out = null;
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        try {
            if (variants != null)
                variants.cleanup();
        } catch (Exception err) {
        }
    }
}
Also used : PrintWriter(java.io.PrintWriter) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SortingCollection(htsjdk.samtools.util.SortingCollection) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SnpEffPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser.SnpEffPrediction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SnpEffPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory) List(java.util.List) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) VariantContext(htsjdk.variant.variantcontext.VariantContext) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) Comparator(java.util.Comparator) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) SnpEffPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser) CloserUtil(htsjdk.samtools.util.CloserUtil) VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) PrintWriter(java.io.PrintWriter) SnpEffPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser.SnpEffPrediction) SnpEffPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 4 with VepPredictionParserFactory

use of com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory in project jvarkit by lindenb.

the class VcfTools method init.

public void init(final VCFHeader header) {
    this.header = header;
    this.snpEffPredictionParser = new SnpEffPredictionParserFactory().header(header).get();
    this.vepPredictionParser = new VepPredictionParserFactory().header(header).get();
    this.annPredictionParser = new AnnPredictionParserFactory(header).get();
}
Also used : AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) SnpEffPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 5 with VepPredictionParserFactory

use of com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory in project jvarkit by lindenb.

the class VcfBurden method doWork2.

private int doWork2(List<String> args) {
    ZipOutputStream zout = null;
    FileOutputStream fout = null;
    VcfIterator in = null;
    try {
        if (args.isEmpty()) {
            LOG.info("reading from stdin.");
            in = VCFUtils.createVcfIteratorStdin();
        } else if (args.size() == 1) {
            String filename = args.get(0);
            LOG.info("reading from " + filename);
            in = VCFUtils.createVcfIterator(filename);
        } else {
            LOG.error("Illegal number of arguments.");
            return -1;
        }
        if (outputFile == null) {
            LOG.error("undefined output");
            return -1;
        } else if (!outputFile.getName().endsWith(".zip")) {
            LOG.error("output " + outputFile + " should end with .zip");
            return -1;
        } else {
            fout = new FileOutputStream(outputFile);
            zout = new ZipOutputStream(fout);
        }
        final List<String> samples = in.getHeader().getSampleNamesInOrder();
        final VCFHeader header = in.getHeader();
        String prev_chrom = null;
        final VepPredictionParser vepPredParser = new VepPredictionParserFactory().header(header).get();
        final Map<GeneTranscript, List<VariantAndCsq>> gene2variants = new HashMap<>();
        final SequenceOntologyTree soTree = SequenceOntologyTree.getInstance();
        final Set<SequenceOntologyTree.Term> acn = new HashSet<>();
        /* mail solena  *SO en remplacement des SO actuels (VEP HIGH + MODERATE) - pas la peine de faire retourner les analyses mais servira pour les futures analyses burden */
        String[] acn_list = new String[] { "SO:0001893", "SO:0001574", "SO:0001575", "SO:0001587", "SO:0001589", "SO:0001578", "SO:0002012", "SO:0001889", "SO:0001821", "SO:0001822", "SO:0001583", "SO:0001818" /*
					"SO:0001589", "SO:0001587", "SO:0001582", "SO:0001583",
					"SO:0001575", "SO:0001578", "SO:0001574", "SO:0001889",
					"SO:0001821", "SO:0001822", "SO:0001893"*/
        };
        if (this.highdamage) {
            acn_list = new String[] { "SO:0001893", "SO:0001574", "SO:0001575", "SO:0001587", "SO:0001589", "SO:0001578", "SO:0002012", "SO:0001889" };
        }
        for (final String acns : acn_list) {
            final SequenceOntologyTree.Term tacn = soTree.getTermByAcn(acns);
            if (tacn == null) {
                in.close();
                throw new NullPointerException("tacn == null pour " + acns);
            }
            acn.addAll(tacn.getAllDescendants());
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
        for (; ; ) {
            VariantContext ctx1 = null;
            if (in.hasNext()) {
                ctx1 = progress.watch(in.next());
                if (ctx1.getAlternateAlleles().size() != 1) {
                    // info("count(ALT)!=1 in "+ctx1.getChr()+":"+ctx1.getStart());
                    continue;
                }
                if (ctx1.getAlternateAlleles().get(0).equals(Allele.SPAN_DEL)) {
                    continue;
                }
            }
            if (ctx1 == null || !ctx1.getContig().equals(prev_chrom)) {
                LOG.info("DUMP to zip n=" + gene2variants.size());
                final Set<String> geneNames = new HashSet<>();
                for (GeneTranscript gene_transcript : gene2variants.keySet()) {
                    geneNames.add(gene_transcript.geneName);
                    final Set<VariantAndCsq> uniq = new TreeSet<>(this.variantAndCsqComparator);
                    uniq.addAll(gene2variants.get(gene_transcript));
                    dumpVariants(zout, prev_chrom, gene_transcript.geneName + "_" + gene_transcript.transcriptName, samples, new ArrayList<VariantAndCsq>(uniq));
                    LOG.info("dumped" + gene_transcript.geneName);
                }
                LOG.info("loop over geneName");
                for (final String geneName : geneNames) {
                    final SortedSet<VariantAndCsq> lis_nm = new TreeSet<>(this.variantAndCsqComparator);
                    final SortedSet<VariantAndCsq> lis_all = new TreeSet<>(this.variantAndCsqComparator);
                    final SortedSet<VariantAndCsq> lis_refseq = new TreeSet<>(this.variantAndCsqComparator);
                    final SortedSet<VariantAndCsq> lis_enst = new TreeSet<>(this.variantAndCsqComparator);
                    LOG.info("loop over gene2variants");
                    for (final GeneTranscript gene_transcript : gene2variants.keySet()) {
                        if (!geneName.equals(gene_transcript.geneName))
                            continue;
                        lis_all.addAll(gene2variants.get(gene_transcript));
                        if (gene_transcript.transcriptName.startsWith("NM_")) {
                            lis_nm.addAll(gene2variants.get(gene_transcript));
                        }
                        if (!gene_transcript.transcriptName.startsWith("ENST")) {
                            lis_refseq.addAll(gene2variants.get(gene_transcript));
                        }
                        if (gene_transcript.transcriptName.startsWith("ENST")) {
                            lis_enst.addAll(gene2variants.get(gene_transcript));
                        }
                    }
                    LOG.info("dump_ALL_TRANSCRIPTS");
                    dumpVariants(zout, prev_chrom, geneName + "_ALL_TRANSCRIPTS", samples, new ArrayList<VariantAndCsq>(lis_all));
                    LOG.info("dump_ALL_NM");
                    dumpVariants(zout, prev_chrom, geneName + "_ALL_NM", samples, new ArrayList<VariantAndCsq>(lis_nm));
                    LOG.info("dump_ALL_REFSEQ");
                    dumpVariants(zout, prev_chrom, geneName + "_ALL_REFSEQ", samples, new ArrayList<VariantAndCsq>(lis_refseq));
                    LOG.info("dump_ALL_ENST");
                    dumpVariants(zout, prev_chrom, geneName + "_ALL_ENST", samples, new ArrayList<VariantAndCsq>(lis_enst));
                }
                if (ctx1 == null)
                    break;
                LOG.info("gene2variants");
                gene2variants.clear();
                LOG.info("System.gc();");
                System.gc();
                prev_chrom = ctx1.getContig();
                LOG.info("prev_chrom=" + prev_chrom);
            }
            final Set<GeneTranscript> seen_names = new HashSet<>();
            for (final VepPredictionParser.VepPrediction pred : vepPredParser.getPredictions(ctx1)) {
                String geneName = pred.getSymbol();
                if (geneName == null || geneName.trim().isEmpty())
                    continue;
                if (this._gene2seen != null) {
                    if (!this._gene2seen.containsKey(geneName))
                        continue;
                }
                final String transcriptName = pred.getFeature();
                if (transcriptName == null || transcriptName.trim().isEmpty()) {
                    LOG.info("No transcript in " + ctx1);
                    continue;
                }
                final GeneTranscript geneTranscript = new GeneTranscript(geneName, transcriptName);
                if (seen_names.contains(geneTranscript))
                    continue;
                boolean ok = false;
                for (SequenceOntologyTree.Term so : pred.getSOTerms()) {
                    if (acn.contains(so)) {
                        ok = true;
                    }
                }
                if (!printSOTerms && !ok)
                    continue;
                List<VariantAndCsq> L = gene2variants.get(geneTranscript);
                if (L == null) {
                    L = new ArrayList<>();
                    gene2variants.put(geneTranscript, L);
                }
                Float vqslod = null;
                if (this.printVQSLOD && ctx1.hasAttribute("VQSLOD")) {
                    vqslod = (float) ctx1.getAttributeAsDouble("VQSLOD", -9999999.0);
                }
                L.add(new VariantAndCsq(ctx1, pred.getSOTerms(), pred.getPositionInCDS(), vqslod));
                seen_names.add(geneTranscript);
                if (this._gene2seen != null) {
                    this._gene2seen.put(geneTranscript.geneName, Boolean.TRUE);
                }
            }
        }
        if (this._gene2seen != null) {
            final List<VariantAndCsq> emptylist = Collections.emptyList();
            for (final String gene : this._gene2seen.keySet()) {
                if (this._gene2seen.get(gene).equals(Boolean.TRUE))
                    continue;
                LOG.warning("Gene not found : " + gene);
                dumpVariants(zout, "UNDEFINED", gene + "_000000000000000.txt", samples, emptylist);
            }
        }
        progress.finish();
        zout.finish();
        fout.flush();
        zout.flush();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(zout);
        CloserUtil.close(fout);
    }
}
Also used : HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) List(java.util.List) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IOException(java.io.IOException) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) ZipOutputStream(java.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Aggregations

VepPredictionParserFactory (com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)12 VariantContext (htsjdk.variant.variantcontext.VariantContext)10 VepPredictionParser (com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser)9 VCFHeader (htsjdk.variant.vcf.VCFHeader)9 ArrayList (java.util.ArrayList)8 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)7 HashSet (java.util.HashSet)7 IOException (java.io.IOException)6 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)5 AnnPredictionParserFactory (com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory)5 PrintWriter (java.io.PrintWriter)5 List (java.util.List)5 Set (java.util.Set)5 Program (com.github.lindenb.jvarkit.util.jcommander.Program)4 Logger (com.github.lindenb.jvarkit.util.log.Logger)4 SnpEffPredictionParserFactory (com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParserFactory)4 VepPrediction (com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction)4 Genotype (htsjdk.variant.variantcontext.Genotype)4 File (java.io.File)4 Parameter (com.beust.jcommander.Parameter)3