Search in sources :

Example 66 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class VcfLiftOver method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    VariantContextWriter failed = null;
    GenomicSequence genomicSequence = null;
    try {
        final VCFHeader inputHeader = in.getHeader();
        final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInInputOrder().stream().filter(V -> {
            if (!(V instanceof VCFInfoHeaderLine))
                return true;
            final VCFInfoHeaderLine vih = VCFInfoHeaderLine.class.cast(V);
            if (removeInfo.contains(vih.getID()))
                return false;
            return true;
        }).collect(Collectors.toSet());
        if (this.failedFile != null) {
            final VCFHeader header2 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
            header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
            header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
            header2.addMetaDataLine(new VCFInfoHeaderLine(this.failedinfoTag, 1, VCFHeaderLineType.String, "Why the liftOver failed."));
            failed = super.openVariantContextWriter(failedFile);
            failed.writeHeader(header2);
        }
        final VCFHeader header3 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
        header3.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
        header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header3.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, 1, VCFHeaderLineType.String, "Chromosome|Position before liftOver."));
        out.writeHeader(header3);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
        while (in.hasNext()) {
            VariantContext ctx = progress.watch(in.next());
            if (!this.removeInfo.isEmpty()) {
                VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                for (final String tag : this.removeInfo) vcb.rmAttribute(tag);
                ctx = vcb.make();
            }
            if (ctx.isIndel() && this.ignoreIndels) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "Indel").make());
                continue;
            }
            if (adaptivematch) {
                double minAlleleLength = Math.min(0, ctx.getAlleles().stream().mapToInt(A -> A.length()).min().orElse(0));
                double maxAlleleLength = Math.max(1, ctx.getAlleles().stream().mapToInt(A -> A.length()).max().orElse(1));
                this.liftOver.setLiftOverMinMatch(minAlleleLength / maxAlleleLength);
            }
            final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
            false, String.join("|", ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().toString())));
            if (lifted == null) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "LiftOverFailed").make());
            } else if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(lifted.getContig()) == null) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "ContigMissingDictionary|" + lifted.getContig()).make());
            } else {
                boolean alleleAreValidatedVsRef = true;
                // part of the code was copied from picard/liftovervcf
                final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<>();
                final List<Allele> alleles = new ArrayList<Allele>();
                for (final Allele oldAllele : ctx.getAlleles()) {
                    final Allele fixedAllele;
                    if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) {
                        alleles.add(oldAllele);
                        continue;
                    } else if (lifted.isPositiveStrand()) {
                        fixedAllele = oldAllele;
                        alleles.add(oldAllele);
                    } else {
                        fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
                        alleles.add(fixedAllele);
                        reverseComplementAlleleMap.put(oldAllele, fixedAllele);
                    }
                    if (this.checkAlleleSequence) {
                        if (genomicSequence == null || !genomicSequence.getChrom().equals(lifted.getContig())) {
                            genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, lifted.getContig());
                        }
                        final String alleleStr = fixedAllele.getBaseString();
                        int x = 0;
                        while (x < alleleStr.length() && lifted.getStart() - 1 + x < genomicSequence.length()) {
                            final char refChar = genomicSequence.charAt(lifted.getStart() - 1 + x);
                            if (Character.toLowerCase(refChar) != Character.toLowerCase(alleleStr.charAt(x))) {
                                alleleAreValidatedVsRef = false;
                                break;
                            }
                            ++x;
                        }
                        if (x != alleleStr.length()) {
                            alleleAreValidatedVsRef = false;
                            break;
                        }
                    }
                }
                if (!alleleAreValidatedVsRef) {
                    if (failed != null)
                        failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleMismatchRef").make());
                    continue;
                }
                if (lifted.getEnd() - lifted.getStart() != ctx.getEnd() - ctx.getStart()) {
                    if (failed != null)
                        failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleBadLength|" + lifted.length()).make());
                    continue;
                }
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx.getSource(), lifted.getContig(), lifted.getStart(), lifted.getEnd(), alleles);
                vcb.id(ctx.getID());
                vcb.attributes(ctx.getAttributes());
                vcb.attribute(this.infoTag, ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString());
                vcb.filters(ctx.getFilters());
                vcb.log10PError(ctx.getLog10PError());
                final GenotypesContext genotypeContext = ctx.getGenotypes();
                final GenotypesContext fixedGenotypes = GenotypesContext.create(genotypeContext.size());
                for (final Genotype genotype : genotypeContext) {
                    final List<Allele> fixedAlleles = new ArrayList<Allele>();
                    for (final Allele allele : genotype.getAlleles()) {
                        final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
                        fixedAlleles.add(fixedAllele);
                    }
                    fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
                }
                vcb.genotypes(fixedGenotypes);
                out.add(vcb.make());
            }
        }
        if (failed != null) {
            failed.close();
            failed = null;
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(failed);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LiftOver(htsjdk.samtools.liftover.LiftOver) Interval(htsjdk.samtools.util.Interval) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) List(java.util.List) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) Map(java.util.Map) Interval(htsjdk.samtools.util.Interval)

Example 67 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class AlleleFrequencyCalculator method doWork.

@Override
public int doWork(final List<String> args) {
    PrintStream out = null;
    VcfIterator in = null;
    try {
        in = super.openVcfIterator(oneAndOnlyOneFile(args));
        out = openFileOrStdoutAsPrintStream(outputFile);
        out.println("CHR\tPOS\tID\tREF\tALT\tTOTAL_CNT\tALT_CNT\tFRQ");
        while (in.hasNext() && !out.checkError()) {
            final VariantContext ctx = in.next();
            final Allele ref = ctx.getReference();
            if (ref == null)
                continue;
            if (ctx.getNSamples() == 0 || ctx.getAlternateAlleles().isEmpty())
                continue;
            final Allele alt = ctx.getAltAlleleWithHighestAlleleCount();
            if (alt == null)
                continue;
            final GenotypesContext genotypes = ctx.getGenotypes();
            if (genotypes == null)
                continue;
            int total_ctn = 0;
            int alt_ctn = 0;
            for (int i = 0; i < genotypes.size(); ++i) {
                final Genotype g = genotypes.get(i);
                for (final Allele allele : g.getAlleles()) {
                    if (allele.equals(ref)) {
                        total_ctn++;
                    } else if (allele.equals(alt)) {
                        total_ctn++;
                        alt_ctn++;
                    }
                }
            }
            out.print(ctx.getContig());
            out.print("\t");
            out.print(ctx.getStart());
            out.print("\t");
            out.print(ctx.hasID() ? ctx.getID() : ".");
            out.print("\t");
            out.print(ref.getBaseString());
            out.print("\t");
            out.print(alt.getBaseString());
            out.print("\t");
            out.print(total_ctn);
            out.print("\t");
            out.print(alt_ctn);
            out.print("\t");
            out.print(alt_ctn / (float) total_ctn);
            out.println();
        }
        out.flush();
        out.close();
        out = null;
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        CloserUtil.close(in);
    }
}
Also used : PrintStream(java.io.PrintStream) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Allele(htsjdk.variant.variantcontext.Allele) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype)

Example 68 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class VCFUtils method copyAlternateAlleleAnnotations.

/**
 * copy InfoHeaderType.ALLELE from ctx from annotationVarian
 */
public static List<Object> copyAlternateAlleleAnnotations(final List<Allele> destAlternateAlleles, final VariantContext annotationVariant, final String infoAttributeName) {
    final List<Object> attributes = new ArrayList<>(destAlternateAlleles.size());
    final List<Allele> galts = annotationVariant.getAlternateAlleles();
    final List<Object> gatts = annotationVariant.getAttributeAsList(infoAttributeName);
    for (final Allele a : destAlternateAlleles) {
        Object found = null;
        // final int idx=gnomadCtx.getAlleleIndex(a);//non idx(REF)==0
        final int idx = galts.indexOf(a);
        if (idx >= 0) {
            if (idx < gatts.size() && gatts.get(idx) != null && !gatts.get(idx).equals(".")) {
                found = gatts.get(idx);
            }
        }
        attributes.add(found);
    }
    return attributes;
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) ArrayList(java.util.ArrayList)

Example 69 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class VariantAttributesRecalculator method apply.

public VariantContext apply(final VariantContext ctx) {
    if (this.recalculation_is_disabled)
        return ctx;
    if (!ctx.hasGenotypes())
        return ctx;
    if (!header_was_set)
        throw new IllegalStateException("Vcf header was not set !");
    final Predicate<Genotype> gtFilter = G -> recalculation_ignore_gt_filtered || !G.isFiltered();
    final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
    if (this.DP_flag) {
        vcb.attribute(VCFConstants.DEPTH_KEY, ctx.getGenotypes().stream().filter(gtFilter).filter(G -> G.hasDP()).mapToInt(G -> G.getDP()).sum());
    }
    final List<Integer> acL = new ArrayList<>();
    for (final Allele alt : ctx.getAlternateAlleles()) {
        final int ac = (int) ctx.getGenotypes().stream().filter(gtFilter).mapToLong(G -> G.getAlleles().stream().filter(A -> A.equals(alt)).count()).sum();
        acL.add(ac);
    }
    final int AN = (int) ctx.getGenotypes().stream().filter(gtFilter).filter(G -> G.isCalled()).mapToLong(G -> G.getAlleles().stream().filter(A -> A.isCalled()).count()).sum();
    if (this.AC_flag) {
        vcb.rmAttribute(VCFConstants.ALLELE_COUNT_KEY);
        if (!acL.isEmpty())
            vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, acL);
    }
    if (this.AN_flag) {
        vcb.rmAttribute(VCFConstants.ALLELE_NUMBER_KEY);
        vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, AN);
    }
    if (this.AF_flag) {
        vcb.rmAttribute(VCFConstants.ALLELE_FREQUENCY_KEY);
        if (AN > 0 && !acL.isEmpty()) {
            final List<Double> afL = new ArrayList<>(acL.size());
            for (int x = 0; x < acL.size(); ++x) {
                afL.add(acL.get(x) / (double) AN);
            }
            vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, afL);
        }
    }
    return vcb.make();
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) Parameter(com.beust.jcommander.Parameter) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) Predicate(java.util.function.Predicate) VCFHeader(htsjdk.variant.vcf.VCFHeader) Set(java.util.Set) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) List(java.util.List) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFConstants(htsjdk.variant.vcf.VCFConstants) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) Genotype(htsjdk.variant.variantcontext.Genotype)

Example 70 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class GenotypeBinding method objectToEntry.

@Override
public void objectToEntry(Genotype g, TupleOutput out) {
    out.writeString(g.getSampleName());
    /* ============================================= */
    if (g.hasDP()) {
        out.writeBoolean(true);
        out.writeInt(g.getDP());
    } else {
        out.writeBoolean(false);
    }
    /* ============================================= */
    if (g.hasAD()) {
        out.writeBoolean(true);
        arrayOfIntToEntry(g.getAD(), out);
    } else {
        out.writeBoolean(false);
    }
    /* ============================================= */
    if (g.hasGQ()) {
        out.writeBoolean(true);
        out.writeInt(g.getGQ());
    } else {
        out.writeBoolean(false);
    }
    /* ============================================= */
    if (g.hasPL()) {
        out.writeBoolean(true);
        arrayOfIntToEntry(g.getPL(), out);
    } else {
        out.writeBoolean(false);
    }
    /* ALLELES ======================================== */
    List<Allele> alleles = g.getAlleles();
    out.writeInt(alleles.size());
    for (Allele a : alleles) {
        this.alleleBinding.objectToEntry(a, out);
    }
    /* ATTRIBUTES ===================================== */
    Map<String, Object> xAtts = g.getExtendedAttributes();
    out.writeInt(xAtts.size());
    for (String attKey : xAtts.keySet()) {
        out.writeString(attKey);
        super.writeAttribute(out, xAtts.get(attKey));
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele)

Aggregations

Allele (htsjdk.variant.variantcontext.Allele)157 VariantContext (htsjdk.variant.variantcontext.VariantContext)82 Genotype (htsjdk.variant.variantcontext.Genotype)72 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)66 ArrayList (java.util.ArrayList)50 Test (org.testng.annotations.Test)48 VCFHeader (htsjdk.variant.vcf.VCFHeader)42 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)37 File (java.io.File)31 Collectors (java.util.stream.Collectors)31 HashSet (java.util.HashSet)30 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)29 IOException (java.io.IOException)28 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)26 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)25 VCFConstants (htsjdk.variant.vcf.VCFConstants)22 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)22 List (java.util.List)22 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)21 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)20