Search in sources :

Example 56 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class VcfStage method buildAlleleTable.

/**
 * build INFO table
 */
private TableView<Allele> buildAlleleTable() {
    final TableView<Allele> table = new TableView<>();
    table.getColumns().add(makeColumn("REF", A -> A.isReference() ? "*" : null));
    table.getColumns().add(makeColumn("Sym.", A -> A.isSymbolic() ? "*" : null));
    table.getColumns().add(makeColumn("Bases.", A -> allele2stringConverter.apply(A)));
    table.getColumns().add(makeColumn("Length.", A -> {
        if (A.isSymbolic())
            return (Integer) null;
        return A.length();
    }));
    table.setPlaceholder(new Label("No Allele."));
    return table;
}
Also used : Arrays(java.util.Arrays) VCFHeader(htsjdk.variant.vcf.VCFHeader) ChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.ChartFactory) VariantContextChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.VariantContextChartFactory) ScrollPane(javafx.scene.control.ScrollPane) TabPane(javafx.scene.control.TabPane) ReadOnlyObjectWrapper(javafx.beans.property.ReadOnlyObjectWrapper) VariantDepthChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.VariantDepthChartFactory) Map(java.util.Map) AlleleFrequencyChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.AlleleFrequencyChartFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) Rectangle2D(javafx.geometry.Rectangle2D) SplitPane(javafx.scene.control.SplitPane) PropertyValueFactory(javafx.scene.control.cell.PropertyValueFactory) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) GraphicsContext(javafx.scene.canvas.GraphicsContext) AFByPopulationChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.AFByPopulationChartFactory) TiTvChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.TiTvChartFactory) Set(java.util.Set) Screen(javafx.stage.Screen) CellDataFeatures(javafx.scene.control.TableColumn.CellDataFeatures) ArcType(javafx.scene.shape.ArcType) Separator(javafx.scene.control.Separator) PieChart(javafx.scene.chart.PieChart) BooleanProperty(javafx.beans.property.BooleanProperty) FlowPane(javafx.scene.layout.FlowPane) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CheckBoxTableCell(javafx.scene.control.cell.CheckBoxTableCell) ObservableList(javafx.collections.ObservableList) BorderPane(javafx.scene.layout.BorderPane) Genotype(htsjdk.variant.variantcontext.Genotype) CloseableIterator(htsjdk.samtools.util.CloseableIterator) OutputType(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder.OutputType) FXCollections(javafx.collections.FXCollections) TextFlow(javafx.scene.text.TextFlow) Supplier(java.util.function.Supplier) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) TabClosingPolicy(javafx.scene.control.TabPane.TabClosingPolicy) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) VCFConstants(htsjdk.variant.vcf.VCFConstants) Counter(com.github.lindenb.jvarkit.util.Counter) Color(javafx.scene.paint.Color) CheckBox(javafx.scene.control.CheckBox) IOException(java.io.IOException) AFBySexChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.AFBySexChartFactory) File(java.io.File) Menu(javafx.scene.control.Menu) FileChooser(javafx.stage.FileChooser) Tab(javafx.scene.control.Tab) CompiledScript(javax.script.CompiledScript) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) ObservableValue(javafx.beans.value.ObservableValue) VariantTypeChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.VariantTypeChartFactory) EventHandler(javafx.event.EventHandler) Button(javafx.scene.control.Button) Allele(htsjdk.variant.variantcontext.Allele) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VBox(javafx.scene.layout.VBox) AlertType(javafx.scene.control.Alert.AlertType) ContextMenu(javafx.scene.control.ContextMenu) WindowEvent(javafx.stage.WindowEvent) TableView(javafx.scene.control.TableView) Orientation(javafx.geometry.Orientation) Alert(javafx.scene.control.Alert) HBox(javafx.scene.layout.HBox) TextField(javafx.scene.control.TextField) PatternSyntaxException(java.util.regex.PatternSyntaxException) MenuItem(javafx.scene.control.MenuItem) Predicate(java.util.function.Predicate) VariantQualChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.VariantQualChartFactory) GenotypeType(htsjdk.variant.variantcontext.GenotypeType) Font(javafx.scene.text.Font) Collectors(java.util.stream.Collectors) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) Text(javafx.scene.text.Text) List(java.util.List) Paint(javafx.scene.paint.Paint) Term(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree.Term) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Scene(javafx.scene.Scene) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) ButtonType(javafx.scene.control.ButtonType) Function(java.util.function.Function) TableColumn(javafx.scene.control.TableColumn) Interval(htsjdk.samtools.util.Interval) Insets(javafx.geometry.Insets) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) Callback(javafx.util.Callback) GenotypeTypeChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.GenotypeTypeChartFactory) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Label(javafx.scene.control.Label) ActionEvent(javafx.event.ActionEvent) SimpleBooleanProperty(javafx.beans.property.SimpleBooleanProperty) SpinnerValueFactory(javafx.scene.control.SpinnerValueFactory) ExtensionFilter(javafx.stage.FileChooser.ExtensionFilter) Collections(java.util.Collections) Allele(htsjdk.variant.variantcontext.Allele) Label(javafx.scene.control.Label) TableView(javafx.scene.control.TableView)

Example 57 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class AFBySexChartFactory method visit.

@Override
public void visit(final VariantContext ctx) {
    for (final Allele alt : ctx.getAlternateAlleles()) {
        final MafCalculator[] mafCalculators = new MafCalculator[PedFile.Sex.values().length];
        for (int i = 0; i < mafCalculators.length; ++i) {
            mafCalculators[i] = new MafCalculator(alt, ctx.getContig());
        }
        for (final Genotype gt : ctx.getGenotypes()) {
            final PedFile.Sample sample = getPedigree().get(gt.getSampleName());
            if (sample == null) {
                mafCalculators[PedFile.Sex.Unknown.ordinal()].add(gt, false);
            } else {
                mafCalculators[sample.getSex().ordinal()].add(gt, sample.isMale());
            }
        }
        for (int i = 0; i < mafCalculators.length; ++i) {
            if (mafCalculators[i].isEmpty())
                continue;
            final Total total = popcount.get(i);
            total.num_maf++;
            total.sum += mafCalculators[i].getMaf();
        }
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) PedFile(com.github.lindenb.jvarkit.tools.vcfviewgui.PedFile) MafCalculator(com.github.lindenb.jvarkit.tools.burden.MafCalculator) Genotype(htsjdk.variant.variantcontext.Genotype)

Example 58 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class VcfToSql method read.

private void read(File filename) throws IOException {
    /* insert ATGC */
    this.alleleTable.insert(outputWriter, null, "A");
    this.alleleTable.insert(outputWriter, null, "C");
    this.alleleTable.insert(outputWriter, null, "G");
    this.alleleTable.insert(outputWriter, null, "T");
    /* insert this sample */
    this.vcfFileTable.insert(outputWriter, null, filename);
    final SelectStmt vcffile_id = new SelectStmt(this.vcfFileTable);
    final Map<String, SelectStmt> sample2sampleid = new HashMap<String, SelectStmt>();
    final Map<String, SelectStmt> filter2filterid = new HashMap<String, SelectStmt>();
    final Map<String, SelectStmt> chrom2chromId = new HashMap<String, SelectStmt>();
    final VcfIterator r = VCFUtils.createVcfIteratorFromFile(filename);
    final VCFHeader header = r.getHeader();
    /* parse samples */
    for (final String sampleName : header.getSampleNamesInOrder()) {
        this.sampleTable.insert(outputWriter, null, sampleName);
        SelectStmt sample_id = new SelectStmt(this.sampleTable, "name", sampleName);
        sample2sampleid.put(sampleName, sample_id);
        this.sample2fileTable.insert(outputWriter, null, vcffile_id, sample_id);
    }
    /* parse filters */
    for (final VCFFilterHeaderLine filter : header.getFilterLines()) {
        this.filterTable.insert(outputWriter, null, vcffile_id, filter.getID(), filter.getValue());
        filter2filterid.put(filter.getID(), new SelectStmt(this.filterTable, "name", filter.getID()));
    }
    filter2filterid.put(VCFConstants.PASSES_FILTERS_v4, new SelectStmt(this.filterTable, "name", VCFConstants.PASSES_FILTERS_v4));
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (dict == null) {
        throw new RuntimeException("dictionary missing in VCF");
    }
    /* parse sequence dict */
    for (final SAMSequenceRecord ssr : dict.getSequences()) {
        this.chromosomeTable.insert(outputWriter, null, vcffile_id, ssr.getSequenceName(), ssr.getSequenceLength());
        chrom2chromId.put(ssr.getSequenceName(), new SelectStmt(this.chromosomeTable, "name", ssr.getSequenceName()));
    }
    VepPredictionParser vepPredictionParser = new VepPredictionParserFactory(header).get();
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
    int nVariants = 0;
    while (r.hasNext()) {
        if (this.outputWriter.checkError())
            break;
        VariantContext var = progress.watch(r.next());
        ++nVariants;
        /* insert ref allele */
        this.alleleTable.insert(outputWriter, null, var.getReference().getBaseString());
        /* insert variant */
        this.variantTable.insert(outputWriter, null, vcffile_id, nVariants, chrom2chromId.get(var.getContig()), var.getStart(), (var.hasID() ? var.getID() : null), new SelectStmt(this.alleleTable, "bases", var.getReference().getBaseString()), (var.hasLog10PError() ? var.getPhredScaledQual() : null));
        SelectStmt variant_id = new SelectStmt(variantTable);
        /* insert alternate alleles */
        for (Allele alt : var.getAlternateAlleles()) {
            /* insert alt allele */
            this.alleleTable.insert(outputWriter, null, alt.getBaseString());
            this.variant2altTable.insert(outputWriter, null, variant_id, new SelectStmt(this.alleleTable, "bases", alt.getBaseString()));
        }
        /* insert filters */
        for (final String filter : var.getFilters()) {
            if (filter2filterid.get(filter) == null) {
                throw new IOException("VCF Error: filter " + filter + " is not defined in the VCF header.");
            }
            this.variant2filters.insert(outputWriter, null, variant_id, filter2filterid.get(filter));
        }
        if (!this.ignore_info) {
            for (final VepPrediction pred : vepPredictionParser.getPredictions(var)) {
            /*
					vepPrediction.insert(
							outputWriter,
							null,
							variant_id,
							pred.getEnsemblGene(),
							pred.getEnsemblTranscript(),
							pred.getEnsemblProtein(),
							pred.getSymbol()
							);
					SelectStmt pred_id = new SelectStmt(vepPrediction);
			
					for(SequenceOntologyTree.Term t: pred.getSOTerms())
						{
						String term=t.getAcn().replace(':', '_');
						soTermTable.insert(
								outputWriter,
								null,
								term,
								t.getAcn()
								);//for bioportal compatibility
						SelectStmt term_id = new SelectStmt(soTermTable,"acn",term);
						
						vepPrediction2so.insert(
							outputWriter,
							null,
							pred_id,
							term_id
							);
						}
					*/
            }
        }
        /* insert genotypes */
        for (final String sampleName : sample2sampleid.keySet()) {
            final Genotype g = var.getGenotype(sampleName);
            if (!g.isAvailable() || g.isNoCall())
                continue;
            genotypeTable.insert(outputWriter, null, variant_id, sample2sampleid.get(sampleName), g.isCalled() ? new SelectStmt(this.alleleTable, "bases", g.getAllele(0).getBaseString()) : null, g.isCalled() ? new SelectStmt(this.alleleTable, "bases", g.getAllele(1).getBaseString()) : null, g.hasDP() ? g.getDP() : null, g.hasGQ() ? g.getGQ() : null);
        }
    }
    r.close();
}
Also used : VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Allele(htsjdk.variant.variantcontext.Allele) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 59 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class VCFPredictions method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator r, VariantContextWriter w) {
    ReferenceContig genomicSequence = null;
    try {
        LOG.info("opening REF:" + this.referenceGenomeSource);
        this.referenceGenome = new ReferenceGenomeFactory().open(this.referenceGenomeSource);
        loadKnownGenesFromUri();
        final VCFHeader header = (VCFHeader) r.getHeader();
        final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(this.referenceGenome.getDictionary());
        contigNameConverter.setOnNotFound(OnNotFound.SKIP);
        final VCFHeader h2 = new VCFHeader(header);
        addMetaData(h2);
        switch(this.outputSyntax) {
            case Vep:
                {
                    h2.addMetaDataLine(new VCFInfoHeaderLine("CSQ", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Consequence type as predicted by VEP" + ". Format: Allele|Feature|Feature_type|Consequence|CDS_position|Protein_position|Amino_acids|Codons"));
                    break;
                }
            case SnpEff:
                {
                    h2.addMetaDataLine(new VCFInfoHeaderLine("ANN", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'"));
                    break;
                }
            default:
                {
                    final StringBuilder format = new StringBuilder();
                    for (FORMAT1 f : FORMAT1.values()) {
                        if (format.length() > 0)
                            format.append("|");
                        format.append(f.name());
                    }
                    h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Prediction from " + getClass().getSimpleName() + ". Format: " + format));
                    break;
                }
        }
        w.writeHeader(h2);
        final SequenceOntologyTree soTree = SequenceOntologyTree.getInstance();
        final SequenceOntologyTree.Term so_intron = soTree.getTermByAcn("SO:0001627");
        final SequenceOntologyTree.Term so_exon = soTree.getTermByAcn("SO:0001791");
        final SequenceOntologyTree.Term so_splice_donor = soTree.getTermByAcn("SO:0001575");
        final SequenceOntologyTree.Term so_splice_acceptor = soTree.getTermByAcn("SO:0001574");
        final SequenceOntologyTree.Term so_5_prime_UTR_variant = soTree.getTermByAcn("SO:0001623");
        final SequenceOntologyTree.Term so_3_prime_UTR_variant = soTree.getTermByAcn("SO:0001624");
        final SequenceOntologyTree.Term so_splicing_variant = soTree.getTermByAcn("SO:0001568");
        final SequenceOntologyTree.Term so_stop_lost = soTree.getTermByAcn("SO:0001578");
        final SequenceOntologyTree.Term so_stop_gained = soTree.getTermByAcn("SO:0001587");
        final SequenceOntologyTree.Term so_coding_synonymous = soTree.getTermByAcn("SO:0001819");
        final SequenceOntologyTree.Term so_coding_non_synonymous = soTree.getTermByAcn("SO:0001583");
        final SequenceOntologyTree.Term so_intergenic = soTree.getTermByAcn("SO:0001628");
        final SequenceOntologyTree.Term so_nc_transcript_variant = soTree.getTermByAcn("SO:0001619");
        final SequenceOntologyTree.Term so_non_coding_exon_variant = soTree.getTermByAcn("SO:0001792");
        final SequenceOntologyTree.Term _2KB_upstream_variant = soTree.getTermByAcn("SO:0001636");
        final SequenceOntologyTree.Term _5KB_upstream_variant = soTree.getTermByAcn("SO:0001635");
        final SequenceOntologyTree.Term _5KB_downstream_variant = soTree.getTermByAcn("SO:0001633");
        final SequenceOntologyTree.Term _500bp_downstream_variant = soTree.getTermByAcn("SO:0001634");
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        while (r.hasNext()) {
            final VariantContext ctx = progress.watch(r.next());
            final String normalizedContig = contigNameConverter.apply(ctx.getContig());
            final List<KnownGene> genes = new ArrayList<>();
            if (!StringUtil.isBlank(normalizedContig)) {
                for (final List<KnownGene> l2 : this.knownGenes.getOverlapping(new Interval(normalizedContig, ctx.getStart(), // 1-based
                ctx.getEnd()))) {
                    genes.addAll(l2);
                }
            }
            final List<Annotation> ctx_annotations = new ArrayList<Annotation>();
            if (genes == null || genes.isEmpty()) {
                // intergenic
                Annotation a = new Annotation();
                a.seqont.add(so_intergenic);
                ctx_annotations.add(a);
            } else {
                if (genomicSequence == null || !genomicSequence.hasName(normalizedContig)) {
                    LOG.info("getting genomic Sequence for " + normalizedContig);
                    genomicSequence = this.referenceGenome.getContig(normalizedContig);
                    if (genomicSequence == null)
                        throw new JvarkitException.ContigNotFoundInDictionary(normalizedContig, this.referenceGenome.getDictionary());
                }
                for (final KnownGene gene : genes) {
                    final GeneticCode geneticCode = GeneticCode.getStandard();
                    for (final Allele alt2 : ctx.getAlternateAlleles()) {
                        if (alt2.isNoCall())
                            continue;
                        if (alt2.isSymbolic()) {
                            LOG.warn("symbolic allele are not handled... " + alt2.getDisplayString());
                            continue;
                        }
                        if (alt2.isReference())
                            continue;
                        final Annotation annotations = new Annotation();
                        annotations.kg = gene;
                        annotations.alt2 = alt2;
                        if (gene.isNonCoding()) {
                            annotations.seqont.add(so_nc_transcript_variant);
                            continue;
                        }
                        ctx_annotations.add(annotations);
                        StringBuilder wildRNA = null;
                        ProteinCharSequence wildProt = null;
                        ProteinCharSequence mutProt = null;
                        MutedSequence mutRNA = null;
                        int position_in_cds = -1;
                        final int position = ctx.getStart() - 1;
                        if (!String.valueOf(genomicSequence.charAt(position)).equalsIgnoreCase(ctx.getReference().getBaseString())) {
                            if (isSimpleBase(ctx.getReference())) {
                                LOG.warn("Warning REF!=GENOMIC SEQ!!! at " + position + "/" + ctx.getReference());
                            }
                            continue;
                        }
                        if (gene.isPositiveStrand()) {
                            if (position < gene.getTxStart() - 2000) {
                                annotations.seqont.add(_5KB_upstream_variant);
                            } else if (position < gene.getTxStart()) {
                                annotations.seqont.add(_2KB_upstream_variant);
                            } else if (position >= gene.getTxEnd() + 500) {
                                annotations.seqont.add(_5KB_downstream_variant);
                            } else if (position >= gene.getTxEnd()) {
                                annotations.seqont.add(_500bp_downstream_variant);
                            } else if (position < gene.getCdsStart()) {
                                // UTR5
                                annotations.seqont.add(so_5_prime_UTR_variant);
                            } else if (gene.getCdsEnd() <= position) {
                                annotations.seqont.add(so_3_prime_UTR_variant);
                            } else {
                                int exon_index = 0;
                                while (exon_index < gene.getExonCount()) {
                                    final KnownGene.Exon exon = gene.getExon(exon_index);
                                    for (int i = exon.getStart(); i < exon.getEnd(); ++i) {
                                        if (i == position) {
                                            annotations.exon_name = exon.getName();
                                            if (exon.isNonCoding()) {
                                                annotations.seqont.add(so_non_coding_exon_variant);
                                            }
                                        }
                                        if (i < gene.getTxStart())
                                            continue;
                                        if (i < gene.getCdsStart())
                                            continue;
                                        if (i >= gene.getCdsEnd())
                                            break;
                                        if (wildRNA == null) {
                                            wildRNA = new StringBuilder();
                                            mutRNA = new MutedSequence(wildRNA);
                                        }
                                        if (i == position) {
                                            annotations.seqont.add(so_exon);
                                            annotations.exon_name = exon.getName();
                                            position_in_cds = wildRNA.length();
                                            annotations.position_cds = position_in_cds;
                                            // in splicing ?
                                            if (exon.isSplicing(position)) {
                                                if (exon.isSplicingAcceptor(position)) {
                                                    // SPLICING_ACCEPTOR
                                                    annotations.seqont.add(so_splice_acceptor);
                                                } else if (exon.isSplicingDonor(position)) {
                                                    // SPLICING_DONOR
                                                    annotations.seqont.add(so_splice_donor);
                                                } else // ??
                                                {
                                                    annotations.seqont.add(so_splicing_variant);
                                                }
                                            }
                                        }
                                        wildRNA.append(genomicSequence.charAt(i));
                                        if (i == position && isSimpleBase(alt2) && isSimpleBase(ctx.getReference())) {
                                            mutRNA.put(position_in_cds, alt2.getBaseString().charAt(0));
                                        }
                                        if (wildRNA.length() % 3 == 0 && wildRNA.length() > 0 && wildProt == null) {
                                            wildProt = new ProteinCharSequence(geneticCode, wildRNA);
                                            mutProt = new ProteinCharSequence(geneticCode, mutRNA);
                                        }
                                    }
                                    final KnownGene.Intron intron = exon.getNextIntron();
                                    if (intron != null && intron.contains(position)) {
                                        annotations.intron_name = intron.getName();
                                        annotations.seqont.add(so_intron);
                                        if (intron.isSplicing(position)) {
                                            if (intron.isSplicingAcceptor(position)) {
                                                annotations.seqont.add(so_splice_acceptor);
                                            } else if (intron.isSplicingDonor(position)) {
                                                annotations.seqont.add(so_splice_donor);
                                            } else // ???
                                            {
                                                annotations.seqont.add(so_splicing_variant);
                                            }
                                        }
                                    }
                                    ++exon_index;
                                }
                            }
                        } else // reverse orientation
                        {
                            if (position >= gene.getTxEnd() + 2000) {
                                annotations.seqont.add(_5KB_upstream_variant);
                            } else if (position >= gene.getTxEnd()) {
                                annotations.seqont.add(_2KB_upstream_variant);
                            } else if (position < gene.getTxStart() - 500) {
                                annotations.seqont.add(_5KB_downstream_variant);
                            } else if (position < gene.getTxStart()) {
                                annotations.seqont.add(_500bp_downstream_variant);
                            } else if (position < gene.getCdsStart()) {
                                annotations.seqont.add(so_3_prime_UTR_variant);
                            } else if (gene.getCdsEnd() <= position) {
                                annotations.seqont.add(so_5_prime_UTR_variant);
                            } else {
                                int exon_index = gene.getExonCount() - 1;
                                while (exon_index >= 0) {
                                    final KnownGene.Exon exon = gene.getExon(exon_index);
                                    for (int i = exon.getEnd() - 1; i >= exon.getStart(); --i) {
                                        if (i == position) {
                                            annotations.exon_name = exon.getName();
                                            if (exon.isNonCoding()) {
                                                annotations.seqont.add(so_non_coding_exon_variant);
                                            }
                                        }
                                        if (i >= gene.getCdsEnd())
                                            continue;
                                        if (i < gene.getCdsStart())
                                            break;
                                        if (wildRNA == null) {
                                            wildRNA = new StringBuilder();
                                            mutRNA = new MutedSequence(wildRNA);
                                        }
                                        if (i == position) {
                                            annotations.seqont.add(so_exon);
                                            position_in_cds = wildRNA.length();
                                            annotations.position_cds = position_in_cds;
                                            // in splicing ?
                                            if (exon.isSplicing(position)) {
                                                if (exon.isSplicingAcceptor(position)) {
                                                    annotations.seqont.add(so_splice_acceptor);
                                                } else if (exon.isSplicingDonor(position)) {
                                                    annotations.seqont.add(so_splice_donor);
                                                } else // ?
                                                {
                                                    annotations.seqont.add(so_splicing_variant);
                                                }
                                            }
                                            if (isSimpleBase(alt2) && isSimpleBase(ctx.getReference())) {
                                                mutRNA.put(position_in_cds, AcidNucleics.complement(alt2.getBaseString().charAt(0)));
                                            }
                                        }
                                        wildRNA.append(AcidNucleics.complement(genomicSequence.charAt(i)));
                                        if (wildRNA.length() % 3 == 0 && wildRNA.length() > 0 && wildProt == null) {
                                            wildProt = new ProteinCharSequence(geneticCode, wildRNA);
                                            mutProt = new ProteinCharSequence(geneticCode, mutRNA);
                                        }
                                    }
                                    final KnownGene.Intron intron = exon.getPrevIntron();
                                    if (intron != null && intron.contains(position)) {
                                        annotations.intron_name = intron.getName();
                                        annotations.seqont.add(so_intron);
                                        if (intron.isSplicing(position)) {
                                            if (intron.isSplicingAcceptor(position)) {
                                                annotations.seqont.add(so_splice_acceptor);
                                            } else if (intron.isSplicingDonor(position)) {
                                                annotations.seqont.add(so_splice_donor);
                                            } else // ?
                                            {
                                                annotations.seqont.add(so_splicing_variant);
                                            }
                                        }
                                    }
                                    --exon_index;
                                }
                            }
                        }
                        if (isSimpleBase(alt2) && isSimpleBase(ctx.getReference()) && wildProt != null && mutProt != null && position_in_cds >= 0) {
                            final int pos_aa = position_in_cds / 3;
                            final int mod = position_in_cds % 3;
                            annotations.wildCodon = ("" + wildRNA.charAt(position_in_cds - mod + 0) + wildRNA.charAt(position_in_cds - mod + 1) + wildRNA.charAt(position_in_cds - mod + 2));
                            annotations.mutCodon = ("" + mutRNA.charAt(position_in_cds - mod + 0) + mutRNA.charAt(position_in_cds - mod + 1) + mutRNA.charAt(position_in_cds - mod + 2));
                            annotations.position_protein = (pos_aa + 1);
                            annotations.wildAA = String.valueOf(wildProt.charAt(pos_aa));
                            annotations.mutAA = (String.valueOf(mutProt.charAt(pos_aa)));
                            annotations.seqont.remove(so_exon);
                            if (isStop(wildProt.charAt(pos_aa)) && !isStop(mutProt.charAt(pos_aa))) {
                                annotations.seqont.add(so_stop_lost);
                            } else if (!isStop(wildProt.charAt(pos_aa)) && isStop(mutProt.charAt(pos_aa))) {
                                annotations.seqont.add(so_stop_gained);
                            } else if (wildProt.charAt(pos_aa) == mutProt.charAt(pos_aa)) {
                                annotations.seqont.add(so_coding_synonymous);
                            } else {
                                annotations.seqont.add(so_coding_non_synonymous);
                            }
                        }
                    }
                }
            }
            final Set<String> info = new HashSet<String>(ctx_annotations.size());
            for (final Annotation a : ctx_annotations) {
                info.add(a.toString());
            }
            final VariantContextBuilder vb = new VariantContextBuilder(ctx);
            final String thetag;
            switch(this.outputSyntax) {
                case Vep:
                    thetag = "CSQ";
                    break;
                case SnpEff:
                    thetag = "ANN";
                    break;
                default:
                    thetag = TAG;
                    break;
            }
            vb.attribute(thetag, info.toArray());
            w.add(vb.make());
        }
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.referenceGenome);
    }
}
Also used : ReferenceContig(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceContig) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) GeneticCode(com.github.lindenb.jvarkit.util.bio.GeneticCode) Interval(htsjdk.samtools.util.Interval)

Example 60 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class VCFMerge method buildContextFromVariantContext.

private List<VariantContext> buildContextFromVariantContext(final VCFHeader header, final List<VariantContext> row) {
    final VariantContextBuilder vcb = new VariantContextBuilder();
    final Map<String, Object> atts = new HashMap<String, Object>();
    final VariantContext ctx0 = row.get(0);
    vcb.chr(ctx0.getContig());
    vcb.start(ctx0.getStart());
    vcb.stop(ctx0.getEnd());
    // fill genotypes
    final HashMap<String, Genotype> sample2genotype = new HashMap<String, Genotype>();
    for (final VariantContext ctx : row) {
        for (final String sample : ctx.getSampleNames()) {
            final Genotype g1 = ctx.getGenotype(sample);
            if (g1 == null || !g1.isCalled())
                continue;
            final Genotype g2 = sample2genotype.get(sample);
            if (g2 == null || this.genotypeComparator.compare(g1, g2) < 0) {
                sample2genotype.put(sample, g1);
            }
        }
    }
    // missing samples ?
    final Set<String> remainingSamples = new HashSet<String>(header.getSampleNamesInOrder());
    remainingSamples.removeAll(sample2genotype.keySet());
    for (final String sampleName : remainingSamples) {
        final Genotype missing = createMissingGenotype(sampleName, row.get(0).getReference());
        sample2genotype.put(sampleName, missing);
    }
    // collect alleles
    final List<Allele> alleleList = new ArrayList<>();
    alleleList.add(ctx0.getReference());
    for (final String sampleName : sample2genotype.keySet()) {
        final Genotype g1 = sample2genotype.get(sampleName);
        if (!g1.isCalled())
            continue;
        for (final Allele ga : g1.getAlleles()) {
            if (ga.isReference() || alleleList.contains(ga))
                continue;
            alleleList.add(ga);
        }
    }
    vcb.attributes(atts);
    vcb.alleles(alleleList);
    vcb.genotypes(sample2genotype.values());
    return Collections.singletonList(vcb.make());
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) HashSet(java.util.HashSet)

Aggregations

Allele (htsjdk.variant.variantcontext.Allele)157 VariantContext (htsjdk.variant.variantcontext.VariantContext)82 Genotype (htsjdk.variant.variantcontext.Genotype)72 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)66 ArrayList (java.util.ArrayList)50 Test (org.testng.annotations.Test)48 VCFHeader (htsjdk.variant.vcf.VCFHeader)42 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)37 File (java.io.File)31 Collectors (java.util.stream.Collectors)31 HashSet (java.util.HashSet)30 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)29 IOException (java.io.IOException)28 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)26 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)25 VCFConstants (htsjdk.variant.vcf.VCFConstants)22 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)22 List (java.util.List)22 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)21 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)20