Search in sources :

Example 11 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class SVPredictions method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VCFIterator r, VariantContextWriter w) {
    try {
        final VCFHeader header = r.getHeader();
        final SAMSequenceDictionary dict = header.getSequenceDictionary();
        try (final GtfReader gtfReader = new GtfReader(this.gtfPath)) {
            if (dict != null)
                gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
            gtfReader.getAllGenes().stream().forEach(G -> this.all_gene.put(new Interval(G), G));
        }
        final VCFHeader h2 = new VCFHeader(header);
        /* split 'limit' string */
        if (StringUtils.isBlank(this.whereStr)) {
            // add all
            Arrays.stream(WhereInGene.values()).forEach(C -> this.limitWhere.add(C));
        } else {
            for (final String ss : this.whereStr.split("[ \t,;]+")) {
                if (StringUtils.isBlank(ss))
                    continue;
                // gene and transcript expand to everything but intergenic
                if (ss.toLowerCase().equals("gene") || ss.toLowerCase().equals("transcript")) {
                    Arrays.stream(WhereInGene.values()).filter(C -> !C.equals(WhereInGene.intergenic)).forEach(C -> this.limitWhere.add(C));
                } else {
                    final WhereInGene g = Arrays.stream(WhereInGene.values()).filter(A -> A.name().equalsIgnoreCase(ss)).findFirst().orElseThrow(() -> new IllegalArgumentException("Bad identifier expected one of :" + Arrays.stream(WhereInGene.values()).map(X -> X.name()).collect(Collectors.joining(", "))));
                    this.limitWhere.add(g);
                }
            }
            if (this.limitWhere.isEmpty()) {
                LOG.error("--where option provided but no identifier was found.");
                return -1;
            }
        }
        final VCFFilterHeaderLine filterHeader;
        if (!StringUtils.isBlank(this.filterStr)) {
            filterHeader = new VCFFilterHeaderLine(this.filterStr, "variant failing locations: " + this.limitWhere.stream().map(V -> V.name()).collect(Collectors.joining(",")));
            h2.addMetaDataLine(filterHeader);
        } else {
            filterHeader = null;
        }
        h2.addMetaDataLine(new VCFInfoHeaderLine(this.info_tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Structural variant consequence."));
        JVarkitVersion.getInstance().addMetaData(this, h2);
        w.writeHeader(h2);
        while (r.hasNext()) {
            final VariantContext ctx = r.next();
            final int ctx_bnd_end;
            if (this.ignore_bnd_end && ctx.hasAttribute(VCFConstants.SVTYPE) && ctx.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND")) {
                ctx_bnd_end = ctx.getStart();
            } else {
                ctx_bnd_end = ctx.getEnd();
            }
            final Collection<Gene> genes = this.all_gene.getOverlapping(new Interval(ctx.getContig(), Math.max(1, ctx.getStart() - this.upstream_size), ctx_bnd_end + this.upstream_size));
            if (// intergenic
            genes.isEmpty()) {
                // discard anyway
                if (!this.limitWhere.contains(WhereInGene.intergenic) && filterHeader == null)
                    continue;
                Gene leftGene = null;
                for (final Gene g : this.all_gene.getOverlapping(new Interval(ctx.getContig(), 1, ctx.getStart()))) {
                    if (leftGene == null || leftGene.getEnd() < g.getEnd())
                        leftGene = g;
                }
                final String leftId = (leftGene == null ? "" : leftGene.getId());
                final String leftName = (leftGene == null ? "" : leftGene.getGeneName());
                Gene rightGene = null;
                for (final Gene g : this.all_gene.getOverlapping(new Interval(ctx.getContig(), ctx.getStart(), Integer.MAX_VALUE))) {
                    if (rightGene == null || rightGene.getStart() > g.getStart())
                        rightGene = g;
                }
                final String rightId = (rightGene == null ? "" : rightGene.getId());
                final String rightName = (rightGene == null ? "" : rightGene.getGeneName());
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                // FILTER
                if (!this.limitWhere.contains(WhereInGene.intergenic) && filterHeader != null) {
                    vcb.filter(filterHeader.getID());
                }
                if (!(leftGene == null && rightGene == null)) {
                    vcb.attribute(this.info_tag, "intergenic|" + leftId + "-" + rightId + "|" + leftName + "-" + rightName);
                }
                w.add(vcb.make());
            } else {
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                final List<List<Annotation>> annotations = genes.stream().map(G -> annotGene(G, ctx)).collect(Collectors.toList());
                final boolean match_user_filter = annotations.stream().flatMap(L -> L.stream()).flatMap(A -> A.where.stream()).anyMatch(A -> this.limitWhere.contains(A));
                if (!match_user_filter) {
                    if (filterHeader == null)
                        continue;
                    vcb.filter(filterHeader.getID());
                }
                if (this.max_genes_count != -1 && genes.size() > this.max_genes_count) {
                    final String prefix = annotations.stream().flatMap(L -> L.stream()).flatMap(A -> A.where.stream()).map(A -> A.name()).collect(Collectors.toCollection(TreeSet::new)).stream().collect(Collectors.joining("&"));
                    vcb.attribute(this.info_tag, (StringUtils.isBlank(prefix) ? "." : prefix) + "|multiple_genes|" + genes.size());
                } else {
                    final List<String> annots = annotations.stream().flatMap(L -> L.stream()).map(A -> A.where.stream().map(X -> X.name()).collect(Collectors.toCollection(TreeSet::new)).stream().collect(Collectors.joining("&")) + A.label).collect(Collectors.toSet()).stream().collect(Collectors.toList());
                    if (!annots.isEmpty())
                        vcb.attribute(this.info_tag, annots);
                }
                w.add(vcb.make());
            }
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(w);
    }
}
Also used : Arrays(java.util.Arrays) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) VCFHeader(htsjdk.variant.vcf.VCFHeader) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Collection(java.util.Collection) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) TreeSet(java.util.TreeSet) List(java.util.List) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) Interval(htsjdk.samtools.util.Interval)

Example 12 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VCFPredictions method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VCFIterator r, VariantContextWriter w) {
    try {
        this.referenceGenome = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidxPath);
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.referenceGenome);
        final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
        loadGtf(dict);
        final VCFHeader header = r.getHeader();
        final VCFHeader h2 = new VCFHeader(header);
        JVarkitVersion.getInstance().addMetaData(this, h2);
        switch(this.outputSyntax) {
            case Vep:
                {
                    h2.addMetaDataLine(new VCFInfoHeaderLine("CSQ", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Consequence type as predicted by VEP" + ". Format: Allele|Feature|Feature_type|Consequence|CDS_position|Protein_position|Amino_acids|Codons"));
                    break;
                }
            case SnpEff:
                {
                    h2.addMetaDataLine(new VCFInfoHeaderLine("ANN", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'"));
                    break;
                }
            default:
                {
                    final StringBuilder format = new StringBuilder();
                    for (FORMAT1 f : FORMAT1.values()) {
                        if (format.length() > 0)
                            format.append("|");
                        format.append(f.name());
                    }
                    h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Prediction from " + getClass().getSimpleName() + ". Format: " + format));
                    break;
                }
        }
        w.writeHeader(h2);
        final RNASequenceFactory rnaSeqFactory = new RNASequenceFactory();
        rnaSeqFactory.setContigToGenomicSequence(S -> getGenomicSequence(S));
        while (r.hasNext()) {
            final VariantContext ctx = r.next();
            final String normalizedContig = contigNameConverter.apply(ctx.getContig());
            if (StringUtil.isBlank(normalizedContig)) {
                w.add(ctx);
                continue;
            }
            final List<Transcript> transcripts = this.transcriptTreeMap.getOverlapping(new SimpleInterval(normalizedContig, ctx.getStart(), ctx.getEnd())).stream().flatMap(L -> L.stream()).collect(Collectors.toList());
            final List<Annotation> all_annotations = new ArrayList<>();
            final List<Allele> alternateAlleles;
            if (ctx.getNAlleles() <= 1) {
                // not a variant, just REF
                alternateAlleles = Arrays.asList(Allele.NO_CALL);
            } else {
                alternateAlleles = ctx.getAlternateAlleles();
            }
            for (final Allele altAllele : alternateAlleles) {
                if (altAllele.isReference() || altAllele.equals(Allele.SPAN_DEL) || altAllele.equals(Allele.NON_REF_ALLELE))
                    continue;
                /* intergenic ====================================================== */
                if (transcripts.isEmpty()) {
                    Transcript leftGene = null;
                    String leftId = "";
                    String leftName = "";
                    for (Iterator<Transcript> iter = this.transcriptTreeMap.getOverlapping(new SimpleInterval(normalizedContig, 1, ctx.getStart())).stream().flatMap(L -> L.stream()).iterator(); iter.hasNext(); ) {
                        final Transcript t = iter.next();
                        if (leftGene == null || leftGene.getEnd() < t.getEnd()) {
                            leftGene = t;
                            leftId = t.getGene().getId();
                            leftName = t.getGene().getGeneName();
                        }
                    }
                    Transcript rightGene = null;
                    String rightId = "";
                    String rightName = "";
                    for (Iterator<Transcript> iter = this.transcriptTreeMap.getOverlapping(new SimpleInterval(normalizedContig, ctx.getEnd(), dict.getSequence(normalizedContig).getSequenceLength())).stream().flatMap(L -> L.stream()).iterator(); iter.hasNext(); ) {
                        final Transcript t = iter.next();
                        if (rightGene == null || t.getStart() < rightGene.getStart()) {
                            rightGene = t;
                            rightId = t.getGene().getId();
                            rightName = t.getGene().getGeneName();
                        }
                    }
                    // intergenic
                    final Annotation annot = new Annotation(altAllele);
                    annot.seqont.add("intergenic");
                    annot.geneId = leftId + "-" + rightId;
                    annot.geneName = leftName + "-" + rightName;
                    all_annotations.add(annot);
                } else {
                    for (final Transcript transcript : transcripts) {
                        final Annotation annotation = new Annotation(altAllele, transcript);
                        all_annotations.add(annotation);
                        if (!transcript.overlaps(ctx)) {
                            if (((ctx.getEnd() < transcript.getStart() && transcript.isNegativeStrand()) || (ctx.getStart() > transcript.getEnd() && transcript.isPositiveStrand()))) {
                                if (ctx.withinDistanceOf(transcript, 500)) {
                                    annotation.seqont.add("500B_downstream_variant");
                                } else if (ctx.withinDistanceOf(transcript, 2_000)) {
                                    annotation.seqont.add("2KB_downstream_variant");
                                }
                            } else if (((ctx.getEnd() < transcript.getStart() && transcript.isPositiveStrand()) || (ctx.getStart() > transcript.getEnd() && transcript.isNegativeStrand()))) {
                                if (ctx.withinDistanceOf(transcript, 2_000)) {
                                    annotation.seqont.add("2KB_upstream_variant");
                                } else if (ctx.withinDistanceOf(transcript, 5_000)) {
                                    annotation.seqont.add("5KB_upstream_variant");
                                }
                            }
                            continue;
                        }
                        if (CoordMath.encloses(ctx.getStart(), ctx.getEnd(), transcript.getStart(), transcript.getEnd())) {
                            // TODO can be inversion ,etc...
                            annotation.seqont.add("transcript_ablation");
                            continue;
                        }
                        for (int side = 0; side < 2; ++side) {
                            final Optional<UTR> opt_utr = (side == 0 ? transcript.getTranscriptUTR5() : transcript.getTranscriptUTR3());
                            if (!opt_utr.isPresent())
                                continue;
                            final UTR utr = opt_utr.get();
                            if (CoordMath.overlaps(utr.getStart(), utr.getEnd(), ctx.getStart(), ctx.getEnd())) {
                                annotation.seqont.add(side == 0 ? "5_prime_UTR_variant" : "3_prime_UTR_variant");
                            }
                        }
                        for (int side = 0; side < 2; ++side) {
                            final Optional<? extends ExonOrIntron> opt_ex;
                            if (side == 0) {
                                opt_ex = transcript.getExons().stream().filter(E -> E.overlaps(ctx)).findFirst();
                            } else {
                                opt_ex = transcript.getIntrons().stream().filter(E -> E.overlaps(ctx)).findFirst();
                            }
                            if (!opt_ex.isPresent())
                                continue;
                            final ExonOrIntron ei = opt_ex.get();
                            if (side == 0) {
                                if (transcript.isNonCoding())
                                    annotation.seqont.add("non_coding_transcript_exon_variant");
                            } else {
                                if (transcript.isNonCoding())
                                    annotation.seqont.add("non_coding_transcript_intron_variant");
                                annotation.seqont.add("intron");
                            }
                            if (ctx.getStart() == ctx.getEnd() && ei.isSplicing(ctx.getStart())) {
                                if (ei.isSplicingAcceptor(ctx.getStart())) {
                                    // SPLICING_ACCEPTOR
                                    annotation.seqont.add("splice_acceptor");
                                } else if (ei.isSplicingDonor(ctx.getStart())) {
                                    // SPLICING_DONOR
                                    annotation.seqont.add("splice_donor");
                                } else // ??
                                {
                                    annotation.seqont.add("splicing_variant");
                                }
                            }
                        }
                        final StructuralVariantType svType = ctx.getStructuralVariantType();
                        if (svType != null) {
                            continue;
                        }
                        if (transcript.isNonCoding()) {
                            // TODO
                            annotation.seqont.add("non_coding_transcript_variant");
                            continue;
                        }
                        RNASequence cDNA = this.transcriptId2cdna.get(transcript.getId());
                        if (cDNA == null) {
                            cDNA = rnaSeqFactory.getCodingRNA(transcript);
                            this.transcriptId2cdna.put(transcript.getId(), cDNA);
                        }
                        final OptionalInt opt_pos_cdna0 = cDNA.convertGenomic0ToRnaIndex0(ctx.getStart() - 1);
                        if (!opt_pos_cdna0.isPresent())
                            continue;
                        final int pos_cdna0 = opt_pos_cdna0.getAsInt();
                        final int pos_aa = pos_cdna0 / 3;
                        final GeneticCode geneticCode = GeneticCode.getStandard();
                        if (AcidNucleics.isATGC(altAllele.getBaseString())) {
                            String bases = altAllele.getBaseString().toUpperCase();
                            if (transcript.isNegativeStrand()) {
                                bases = AcidNucleics.reverseComplement(bases);
                            }
                            final MutedSequence mutRNA = new MutedSequence(cDNA, pos_cdna0, ctx.getReference().length(), bases);
                            final PeptideSequence<CharSequence> wildProt = PeptideSequence.of(cDNA, geneticCode);
                            final PeptideSequence<CharSequence> mutProt = PeptideSequence.of(mutRNA, geneticCode);
                            final int mod = pos_cdna0 % 3;
                            annotation.wildCodon = ("" + cDNA.charAt(pos_cdna0 - mod + 0) + cDNA.charAt(pos_cdna0 - mod + 1) + cDNA.charAt(pos_cdna0 - mod + 2));
                            annotation.mutCodon = ("" + mutRNA.charAt(pos_cdna0 - mod + 0) + mutRNA.charAt(pos_cdna0 - mod + 1) + mutRNA.charAt(pos_cdna0 - mod + 2));
                            annotation.position_protein = (pos_aa + 1);
                            annotation.wildAA = String.valueOf(wildProt.charAt(pos_aa));
                            annotation.mutAA = (String.valueOf(mutProt.charAt(pos_aa)));
                            if (isStop(wildProt.charAt(pos_aa)) && !isStop(mutProt.charAt(pos_aa))) {
                                annotation.seqont.add("stop_lost");
                            } else if (!isStop(wildProt.charAt(pos_aa)) && isStop(mutProt.charAt(pos_aa))) {
                                annotation.seqont.add("stop_gained");
                            } else if (wildProt.charAt(pos_aa) == mutProt.charAt(pos_aa)) {
                                annotation.seqont.add("synonymous");
                            } else {
                                annotation.seqont.add("missense_variant");
                            }
                        }
                    }
                }
            }
            final Set<String> info = new HashSet<String>(all_annotations.size());
            for (final Annotation a : all_annotations) {
                info.add(a.toString());
            }
            final VariantContextBuilder vb = new VariantContextBuilder(ctx);
            final String thetag;
            switch(this.outputSyntax) {
                case Vep:
                    thetag = "CSQ";
                    break;
                case SnpEff:
                    thetag = "ANN";
                    break;
                default:
                    thetag = TAG;
                    break;
            }
            vb.attribute(thetag, info.toArray());
            w.add(vb.make());
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(w);
        CloserUtil.close(r);
        CloserUtil.close(this.referenceGenome);
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) VCFHeader(htsjdk.variant.vcf.VCFHeader) ExonOrIntron(com.github.lindenb.jvarkit.util.bio.structure.ExonOrIntron) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) StringUtil(htsjdk.samtools.util.StringUtil) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) StructuralVariantType(htsjdk.variant.variantcontext.StructuralVariantType) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CoordMath(htsjdk.samtools.util.CoordMath) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) PeptideSequence(com.github.lindenb.jvarkit.util.bio.structure.PeptideSequence) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) OptionalInt(java.util.OptionalInt) RNASequenceFactory(com.github.lindenb.jvarkit.util.bio.structure.RNASequenceFactory) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) DelegateCharSequence(com.github.lindenb.jvarkit.lang.DelegateCharSequence) GeneticCode(com.github.lindenb.jvarkit.util.bio.GeneticCode) WeakHashMap(java.util.WeakHashMap) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) RNASequence(com.github.lindenb.jvarkit.util.bio.structure.RNASequence) Iterator(java.util.Iterator) UTR(com.github.lindenb.jvarkit.util.bio.structure.UTR) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) UTR(com.github.lindenb.jvarkit.util.bio.structure.UTR) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) DelegateCharSequence(com.github.lindenb.jvarkit.lang.DelegateCharSequence) RNASequence(com.github.lindenb.jvarkit.util.bio.structure.RNASequence) OptionalInt(java.util.OptionalInt) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) RNASequenceFactory(com.github.lindenb.jvarkit.util.bio.structure.RNASequenceFactory) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ExonOrIntron(com.github.lindenb.jvarkit.util.bio.structure.ExonOrIntron) GeneticCode(com.github.lindenb.jvarkit.util.bio.GeneticCode) StructuralVariantType(htsjdk.variant.variantcontext.StructuralVariantType)

Example 13 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VCFBedSetFilter method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator r, final VariantContextWriter w) {
    final Set<String> contigs_not_found = new HashSet<>();
    final VCFHeader h2 = new VCFHeader(r.getHeader());
    final VCFFilterHeaderLine filter;
    if (!StringUtil.isBlank(this.filterName)) {
        filter = new VCFFilterHeaderLine(this.filterName, "Variant " + (this.tabixBlackFile != null ? " overlapping any bed record of " : " that do not overlap any bed record of ") + tabixFile);
        h2.addMetaDataLine(filter);
    } else {
        filter = null;
    }
    JVarkitVersion.getInstance().addMetaData(this, h2);
    final ContigNameConverter ctgNameConverter;
    if (this.bedReader != null) {
        ctgNameConverter = ContigNameConverter.fromContigSet(this.bedReader.getContigs());
    } else {
        ctgNameConverter = ContigNameConverter.fromIntervalTreeMap(this.intervalTreeMap);
    }
    final SAMSequenceDictionary dict = r.getHeader().getSequenceDictionary();
    w.writeHeader(h2);
    while (r.hasNext()) {
        final VariantContext ctx = r.next();
        boolean set_filter = false;
        final String convert_contig = ctgNameConverter.apply(ctx.getContig());
        final int ctx_start = Math.max(1, ctx.getStart() - this.extend_bases);
        final int ctx_end;
        if (dict == null) {
            ctx_end = ctx.getEnd() + this.extend_bases;
        } else {
            final SAMSequenceRecord ssr = dict.getSequence(ctx.getContig());
            if (ssr == null)
                throw new JvarkitException.ContigNotFoundInDictionary(ctx.getContig(), dict);
            ctx_end = Math.min(ssr.getSequenceLength(), ctx.getEnd() + this.extend_bases);
        }
        if (StringUtil.isBlank(convert_contig)) {
            if (debug)
                LOG.warn("Cannot convert contig " + ctx.getContig());
            if (contigs_not_found.size() < 100) {
                if (contigs_not_found.add(ctx.getContig())) {
                    LOG.warn("Cannot convert variant contig " + ctx.getContig() + " to bed file. (Contig is not in BED file)");
                }
            }
            set_filter = false;
        } else if (this.intervalTreeMap != null) {
            if (this.intervalTreeMap.getOverlapping(new SimpleInterval(convert_contig, ctx_start, ctx_end)).stream().anyMatch(LOC -> testOverlap(ctx_start, ctx_end, LOC))) {
                if (debug)
                    LOG.warn("treemap.overlap=true set Filter=true for " + ctx.getContig() + ":" + ctx.getStart());
                set_filter = true;
            }
        } else {
            try (CloseableIterator<BedLine> iter = this.bedReader.iterator(convert_contig, ctx_start - 1, ctx_end + 1)) {
                while (iter.hasNext()) {
                    final BedLine bed = iter.next();
                    if (bed == null)
                        continue;
                    if (!testOverlap(ctx_start, ctx_end, bed))
                        continue;
                    if (debug)
                        LOG.warn("tabix=true set Filter=true for " + ctx.getContig() + ":" + ctx.getStart());
                    set_filter = true;
                    break;
                }
            } catch (IOException err) {
                throw new RuntimeIOException(err);
            }
        }
        /* variant is in whitelist, we want to keep it */
        if (this.tabixWhiteFile != null) {
            set_filter = !set_filter;
            if (debug)
                LOG.warn("inverse. Now Filter=" + set_filter + " for " + ctx.getContig() + ":" + ctx.getStart());
        }
        if (!set_filter) {
            if (debug)
                LOG.warn("no filter. Writing " + ctx.getContig() + ":" + ctx.getStart());
            w.add(ctx);
            continue;
        }
        if (filter != null) {
            if (debug)
                LOG.warn("adding filter. Writing " + ctx.getContig() + ":" + ctx.getStart());
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.filter(filter.getID());
            w.add(vcb.make());
        } else {
            if (debug)
                LOG.warn("Ignoring " + ctx.getContig() + ":" + ctx.getStart());
        }
    }
    if (!contigs_not_found.isEmpty()) {
        LOG.warn("The following contigs were not found: " + String.join(" ", contigs_not_found) + "...");
    }
    return 0;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) IndexedBedReader(com.github.lindenb.jvarkit.util.bio.bed.IndexedBedReader) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CoordMath(htsjdk.samtools.util.CoordMath) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) CloseableIterator(htsjdk.samtools.util.CloseableIterator) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet)

Example 14 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfGapFrequent method doWork.

@Override
public int doWork(final List<String> args) {
    if (af_treshold < 0 || this.af_treshold > 1) {
        LOG.error("bad treshold " + this.af_treshold);
        return -1;
    }
    VCFIterator in = null;
    try {
        this.freqentDBs.addAll(IOUtils.unrollFiles2018(this.databases).stream().map(F -> new FreqentDB(F)).collect(Collectors.toList()));
        if (this.freqentDBs.isEmpty()) {
            LOG.error("no database was defined");
            return -1;
        }
        if (this.freqentDBs.stream().flatMap(DB -> DB.afExtractors.stream()).count() == 0L) {
            LOG.error("No AF extractor is suitable for any databases.");
            return -1;
        }
        in = super.openVCFIterator(oneFileOrNull(args));
        final VCFHeader header = in.getHeader();
        this.dict = header.getSequenceDictionary();
        if (this.dict == null || this.dict.isEmpty()) {
            LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input"));
            return -1;
        }
        this.out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        final List<Handler> handlers = new ArrayList<>(header.getNGenotypeSamples() + 1);
        handlers.add(new Handler(null));
        header.getSampleNamesInOrder().stream().map(S -> new Handler(S)).forEach(H -> handlers.add(H));
        final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header).validatingSortOrder(true).logger(LOG).build();
        while (in.hasNext()) {
            final VariantContext ctx = progress.apply(in.next());
            handlers.stream().forEach(H -> H.visit(ctx));
        }
        handlers.stream().forEach(H -> H.finish());
        progress.close();
        out.flush();
        out.close();
        out = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(out);
        CloserUtil.close(this.freqentDBs);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) OptionalDouble(java.util.OptionalDouble) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringUtil(htsjdk.samtools.util.StringUtil) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) List(java.util.List) Closeable(java.io.Closeable) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) AFExtractor(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFIterator(htsjdk.variant.vcf.VCFIterator)

Example 15 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class GridssPostProcessVcf method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VCFIterator iterin, VariantContextWriter out) {
    SortingCollection<VariantContext> sorter1 = null;
    SortingCollection<VariantContext> sorter2 = null;
    PrintWriter debug = null;
    try {
        debug = this.debugFile == null ? new PrintWriter(new NullOuputStream()) : new PrintWriter(Files.newBufferedWriter(this.debugFile));
        final VCFHeader header = iterin.getHeader();
        LOG.info("reading input.");
        final Comparator<VariantContext> comparator = (A, B) -> A.getAttributeAsString(EVENT_KEY, "").compareTo(B.getAttributeAsString(EVENT_KEY, ""));
        sorter1 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        while (iterin.hasNext()) {
            final VariantContext ctx = iterin.next();
            if (!ctx.hasAttribute(EVENT_KEY)) {
                LOG.warn("skipping variant without INFO/" + EVENT_KEY + " at " + toString(ctx));
                continue;
            }
            sorter1.add(ctx);
        }
        sorter1.doneAdding();
        sorter1.setDestructiveIteration(true);
        LOG.info("done adding");
        CloseableIterator<VariantContext> iter2 = sorter1.iterator();
        @SuppressWarnings("resource") final EqualRangeIterator<VariantContext> equal_range = new EqualRangeIterator<>(iter2, comparator);
        sorter2 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), header.getVCFRecordComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        while (equal_range.hasNext()) {
            final List<VariantContext> variants = equal_range.next();
            if (variants.size() == 1) {
                final VariantContext ctx = variants.get(0);
                final Allele alt = ctx.getAlleles().get(1);
                // INSERTION LIKE. chr22:1234 A/.AAAACAAGGAG
                if (isATGC(ctx.getReference()) && isATGC(alt) && length(ctx.getReference()) < length(alt)) {
                    final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx);
                    vcb1.attribute(VCFConstants.SVTYPE, "INS");
                    vcb1.attribute("SVLEN", length(alt) - length(ctx.getReference()));
                    sorter2.add(vcb1.make());
                } else // STRANGE ? no ? change
                if (isATGC(ctx.getReference()) && isATGC(alt) && length(ctx.getReference()) == 1 && length(alt) == 1) {
                    sorter2.add(ctx);
                } else {
                    sorter2.add(ctx);
                    debug.println("ALONE " + toString(ctx));
                }
            } else if (variants.size() != 2) {
                for (final VariantContext ctx : variants) {
                    debug.println("SIZE>2 " + toString(ctx));
                    sorter2.add(ctx);
                }
            } else {
                final VariantContext ctx1 = variants.get(0);
                final VariantContext ctx2 = variants.get(1);
                final Breakend brk1 = Breakend.parse(ctx1.getAlleles().get(1)).orElse(null);
                final Breakend brk2 = Breakend.parse(ctx2.getAlleles().get(1)).orElse(null);
                if (brk1 == null || brk2 == null) {
                    debug.println("expected two bnd " + toString(ctx1) + " " + toString(ctx2));
                    sorter2.add(ctx1);
                    sorter2.add(ctx2);
                    return -1;
                // continue;
                }
                /* should be the same breakend */
                if (!ctx1.getContig().equals(brk2.getContig()) || !ctx2.getContig().equals(brk1.getContig()) || ctx1.getStart() != brk2.getStart() || ctx2.getStart() != brk1.getStart()) {
                    debug.println("expected concordant bnd " + toString(ctx1) + " " + toString(ctx2));
                    sorter2.add(ctx1);
                    sorter2.add(ctx2);
                    return -1;
                // continue;
                }
                final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx1);
                final VariantContextBuilder vcb2 = new VariantContextBuilder(ctx2);
                if (!ctx1.contigsMatch(ctx2)) {
                    vcb1.attribute(VCFConstants.SVTYPE, "CTX");
                    vcb2.attribute(VCFConstants.SVTYPE, "CTX");
                    sorter2.add(vcb1.make());
                    sorter2.add(vcb2.make());
                    continue;
                }
                if (ctx1.getStart() > ctx2.getStart()) {
                    debug.println("CTX1>CTX2?" + toString(ctx1) + " " + toString(ctx2));
                    sorter2.add(vcb1.make());
                    sorter2.add(vcb2.make());
                    continue;
                }
                final BndType bndType1 = getBndType(brk1);
                final BndType bndType2 = getBndType(brk2);
                if (bndType1.equals(BndType.DNA_OPEN) && bndType2.equals(BndType.CLOSE_DNA)) {
                    final Allele ctx1_alt = ctx1.getAlleles().get(1);
                    final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<DEL>", false));
                    vcb1.attribute(VCFConstants.SVTYPE, "DEL");
                    vcb1.alleles(alleles);
                    vcb1.attribute("SVLEN", ctx1.getEnd() - ctx2.getStart() + 1);
                    vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
                    sorter2.add(vcb1.make());
                } else if (bndType1.equals(BndType.CLOSE_DNA) && bndType2.equals(BndType.DNA_OPEN)) {
                    final Allele ctx1_alt = ctx1.getAlleles().get(1);
                    final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<DUP>", false));
                    vcb1.attribute(VCFConstants.SVTYPE, "DUP");
                    vcb1.alleles(alleles);
                    vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
                    vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
                    sorter2.add(vcb1.make());
                } else if (bndType1.equals(BndType.OPEN_DNA) && bndType2.equals(BndType.OPEN_DNA)) {
                    final Allele ctx1_alt = ctx1.getAlleles().get(1);
                    final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<INV>", false));
                    vcb1.attribute(VCFConstants.SVTYPE, "INV");
                    vcb1.alleles(alleles);
                    vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
                    vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
                    sorter2.add(vcb1.make());
                } else if (bndType1.equals(BndType.DNA_CLOSE) && bndType2.equals(BndType.DNA_CLOSE)) {
                    final Allele ctx1_alt = ctx1.getAlleles().get(1);
                    final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<SV>", false));
                    vcb1.attribute(VCFConstants.SVTYPE, "SV");
                    vcb1.alleles(alleles);
                    vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
                    vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
                    sorter2.add(vcb1.make());
                } else {
                    debug.println("How to handle " + toString(ctx1) + " " + toString(ctx2));
                    sorter2.add(ctx1);
                    sorter2.add(ctx2);
                }
            }
        }
        equal_range.close();
        iter2.close();
        sorter1.cleanup();
        sorter1 = null;
        sorter2.doneAdding();
        sorter2.setDestructiveIteration(true);
        iter2 = sorter2.iterator();
        final VCFHeader header2 = new VCFHeader(header);
        JVarkitVersion.getInstance().addMetaData(this, header2);
        final VCFInfoHeaderLine originalAlt = new VCFInfoHeaderLine("BND", 1, VCFHeaderLineType.String, "Original ALT allele.");
        header.addMetaDataLine(originalAlt);
        header.addMetaDataLine(new VCFInfoHeaderLine(originalAlt.getID(), 1, VCFHeaderLineType.Integer, "SV length"));
        header.addMetaDataLine(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "SV length"));
        header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
        out.writeHeader(header2);
        while (iter2.hasNext()) {
            out.add(iter2.next());
        }
        iter2.close();
        sorter2.cleanup();
        sorter2 = null;
        debug.flush();
        debug.close();
        debug = null;
        return 0;
    } catch (final Throwable err) {
        err.printStackTrace();
        LOG.error(err);
        return -1;
    } finally {
        if (sorter1 != null)
            sorter1.cleanup();
        if (sorter2 != null)
            sorter2.cleanup();
        if (debug != null)
            try {
                debug.close();
            } catch (final Throwable err2) {
            }
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) VCFRecordCodec(htsjdk.variant.vcf.VCFRecordCodec) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) Breakend(com.github.lindenb.jvarkit.variant.variantcontext.Breakend) Path(java.nio.file.Path) VCFConstants(htsjdk.variant.vcf.VCFConstants) PrintWriter(java.io.PrintWriter) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Files(java.nio.file.Files) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) List(java.util.List) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) Comparator(java.util.Comparator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) Breakend(com.github.lindenb.jvarkit.variant.variantcontext.Breakend) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VCFRecordCodec(htsjdk.variant.vcf.VCFRecordCodec) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) List(java.util.List) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter)

Aggregations

VCFIterator (htsjdk.variant.vcf.VCFIterator)98 VariantContext (htsjdk.variant.variantcontext.VariantContext)83 VCFHeader (htsjdk.variant.vcf.VCFHeader)76 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)62 Parameter (com.beust.jcommander.Parameter)56 Program (com.github.lindenb.jvarkit.util.jcommander.Program)56 Logger (com.github.lindenb.jvarkit.util.log.Logger)56 ArrayList (java.util.ArrayList)52 List (java.util.List)52 Set (java.util.Set)49 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)46 Collectors (java.util.stream.Collectors)45 HashSet (java.util.HashSet)42 Path (java.nio.file.Path)39 IOException (java.io.IOException)37 Allele (htsjdk.variant.variantcontext.Allele)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Genotype (htsjdk.variant.variantcontext.Genotype)33 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)32 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)32