Search in sources :

Example 31 with GenotypeBuilder

use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk by broadinstitute.

the class DepthPerAlleleBySample method annotate.

@Override
public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final ReadLikelihoods<Allele> likelihoods) {
    Utils.nonNull(gb, "gb is null");
    Utils.nonNull(vc, "vc is null");
    if (g == null || !g.isCalled() || likelihoods == null) {
        return;
    }
    final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles());
    // make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
    Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a  subset of ReadLikelihoods alleles " + likelihoods.alleles());
    final Map<Allele, Integer> alleleCounts = new LinkedHashMap<>();
    for (final Allele allele : vc.getAlleles()) {
        alleleCounts.put(allele, 0);
    }
    final Map<Allele, List<Allele>> alleleSubset = alleles.stream().collect(Collectors.toMap(a -> a, Arrays::asList));
    final ReadLikelihoods<Allele> subsettedLikelihoods = likelihoods.marginalize(alleleSubset);
    subsettedLikelihoods.bestAlleles(g.getSampleName()).stream().filter(ba -> ba.isInformative()).forEach(ba -> alleleCounts.compute(ba.allele, (allele, prevCount) -> prevCount + 1));
    final int[] counts = new int[alleleCounts.size()];
    //first one in AD is always ref
    counts[0] = alleleCounts.get(vc.getReference());
    for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
        counts[i + 1] = alleleCounts.get(vc.getAlternateAllele(i));
    }
    gb.AD(counts);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) java.util(java.util) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Utils(org.broadinstitute.hellbender.utils.Utils) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) Collectors(java.util.stream.Collectors) VCFConstants(htsjdk.variant.vcf.VCFConstants) Allele(htsjdk.variant.variantcontext.Allele)

Example 32 with GenotypeBuilder

use of htsjdk.variant.variantcontext.GenotypeBuilder in project gatk-protected by broadinstitute.

the class StrandArtifact method annotate.

@Override
public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final ReadLikelihoods<Allele> likelihoods) {
    Utils.nonNull(gb);
    Utils.nonNull(vc);
    Utils.nonNull(likelihoods);
    // do not annotate the genotype fields for normal
    if (g.isHomRef()) {
        return;
    }
    pi.put(NO_ARTIFACT, 0.95);
    pi.put(ART_FWD, 0.025);
    pi.put(ART_REV, 0.025);
    // We use the allele with highest LOD score
    final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY, () -> null, -1);
    final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
    final Allele altAlelle = vc.getAlternateAllele(indexOfMaxTumorLod);
    final Collection<ReadLikelihoods<Allele>.BestAllele<Allele>> bestAlleles = likelihoods.bestAlleles(g.getSampleName());
    final int numFwdAltReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
    final int numRevAltReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
    final int numFwdReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative()).count();
    final int numRevReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative()).count();
    final int numAltReads = numFwdAltReads + numRevAltReads;
    final int numReads = numFwdReads + numRevReads;
    final EnumMap<StrandArtifactZ, Double> unnormalized_posterior_probabilities = new EnumMap<>(StrandArtifactZ.class);
    final EnumMap<StrandArtifactZ, Double> maximum_a_posteriori_allele_fraction_estimates = new EnumMap<>(StrandArtifactZ.class);
    /*** Compute the posterior probability of ARTIFACT_FWD and ARTIFACT_REV; it's a double integral over f and epsilon ***/
    // the integrand is a polynomial of degree n, where n is the number of reads at the locus
    // thus to integrate exactly with Gauss-Legendre we need (n/2)+1 points
    final int numIntegPointsForAlleleFraction = numReads / 2 + 1;
    final int numIntegPointsForEpsilon = (numReads + ALPHA + BETA - 2) / 2 + 1;
    final double likelihoodForArtifactFwd = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
    final double likelihoodForArtifactRev = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
    unnormalized_posterior_probabilities.put(ART_FWD, pi.get(ART_FWD) * likelihoodForArtifactFwd);
    unnormalized_posterior_probabilities.put(ART_REV, pi.get(ART_REV) * likelihoodForArtifactRev);
    /*** Compute the posterior probability of NO_ARTIFACT; evaluate a single integral over the allele fraction ***/
    final double likelihoodForNoArtifact = IntegrationUtils.integrate(f -> getIntegrandGivenNoArtifact(f, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction);
    unnormalized_posterior_probabilities.put(NO_ARTIFACT, pi.get(NO_ARTIFACT) * likelihoodForNoArtifact);
    final double[] posterior_probabilities = MathUtils.normalizeFromRealSpace(unnormalized_posterior_probabilities.values().stream().mapToDouble(Double::doubleValue).toArray());
    /*** Compute the maximum a posteriori estimate for allele fraction given strand artifact ***/
    // For a fixed f, integrate the double integral over epsilons. This gives us the likelihood p(x^+, x^- | f, z) for a fixed f, which is proportional to
    // the posterior probability of p(f | x^+, x^-, z)
    final int numSamplePoints = 100;
    final double[] samplePoints = GATKProtectedMathUtils.createEvenlySpacedPoints(0.0, 1.0, numSamplePoints);
    double[] likelihoodsGivenForwardArtifact = new double[numSamplePoints];
    double[] likelihoodsGivenReverseArtifact = new double[numSamplePoints];
    for (int i = 0; i < samplePoints.length; i++) {
        final double f = samplePoints[i];
        likelihoodsGivenForwardArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
        likelihoodsGivenReverseArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
    }
    final int maxAlleleFractionIndexFwd = MathUtils.maxElementIndex(likelihoodsGivenForwardArtifact);
    final int maxAlleleFractionIndexRev = MathUtils.maxElementIndex(likelihoodsGivenReverseArtifact);
    maximum_a_posteriori_allele_fraction_estimates.put(ART_FWD, samplePoints[maxAlleleFractionIndexFwd]);
    maximum_a_posteriori_allele_fraction_estimates.put(ART_REV, samplePoints[maxAlleleFractionIndexRev]);
    // In the absence of strand artifact, MAP estimate for f reduces to the sample alt allele fraction
    maximum_a_posteriori_allele_fraction_estimates.put(NO_ARTIFACT, (double) numAltReads / numReads);
    gb.attribute(POSTERIOR_PROBABILITIES_KEY, posterior_probabilities);
    gb.attribute(MAP_ALLELE_FRACTIONS_KEY, maximum_a_posteriori_allele_fraction_estimates.values().stream().mapToDouble(Double::doubleValue).toArray());
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) java.util(java.util) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) BetaDistribution(org.apache.commons.math3.distribution.BetaDistribution) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) StrandArtifactZ(org.broadinstitute.hellbender.tools.walkers.annotator.StrandArtifact.StrandArtifactZ) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) org.broadinstitute.hellbender.utils(org.broadinstitute.hellbender.utils) Allele(htsjdk.variant.variantcontext.Allele) StrandArtifactZ(org.broadinstitute.hellbender.tools.walkers.annotator.StrandArtifact.StrandArtifactZ)

Example 33 with GenotypeBuilder

use of htsjdk.variant.variantcontext.GenotypeBuilder in project jvarkit by lindenb.

the class FastGenotypeGVCFs method doWork.

@Override
public int doWork(final List<String> args) {
    VariantContextWriter w = null;
    try {
        this.gvcfSources.addAll(IOUtil.unrollFiles(args.stream().map(S -> new File(S)).collect(Collectors.toSet()), ".g.vcf", ".g.vcf.gz").stream().map(F -> new Source(F)).collect(Collectors.toList()));
        if (args.isEmpty()) {
            LOG.error("No gvcf file was given");
            return -1;
        }
        this.gvcfSources.stream().forEach(S -> S.open());
        this.dictionary = this.gvcfSources.get(0).vcfFileReader.getFileHeader().getSequenceDictionary();
        if (this.dictionary == null) {
            LOG.error("Dict missing in " + this.gvcfSources.get(0).gvcfFile);
            return -1;
        }
        this.gvcfSources.stream().map(S -> S.vcfFileReader.getFileHeader().getSequenceDictionary()).forEach(D -> {
            if (D == null || !SequenceUtil.areSequenceDictionariesEqual(D, dictionary)) {
                throw new JvarkitException.UserError("dict missing or dict are not the same");
            }
        });
        if (gvcfSources.stream().map(S -> S.sampleName).collect(Collectors.toSet()).stream().count() != this.gvcfSources.size()) {
            LOG.error("Duplicate sample name. check input");
            return -1;
        }
        final Set<VCFHeaderLine> metaData = new HashSet<>();
        VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.GENOTYPE_PL_KEY);
        metaData.addAll(gvcfSources.stream().flatMap(S -> S.vcfFileReader.getFileHeader().getFormatHeaderLines().stream()).collect(Collectors.toSet()));
        final VCFHeader header = new VCFHeader(metaData, gvcfSources.stream().map(S -> S.sampleName).collect(Collectors.toList()));
        w = super.openVariantContextWriter(outputFile);
        w.writeHeader(header);
        int contigTid = 0;
        while (contigTid < dictionary.size()) {
            final SAMSequenceRecord ssr = this.dictionary.getSequence(contigTid);
            int pos = 0;
            while (pos < ssr.getSequenceLength()) {
                // LOG.debug(ssr.getSequenceName()+" "+pos+" "+this.gvcfSources.size());
                GvcfVariant variantAtThisPos = null;
                int minEnd = ssr.getSequenceLength();
                // cleanup
                for (final Source src : this.gvcfSources) {
                    for (; ; ) {
                        final GvcfThing gvcfthing = src.get();
                        // LOG.debug(""+gvcfthing+" "+src.sampleName+" "+ssr.getSequenceName()+":"+pos);
                        if (gvcfthing == null) {
                            // no more variant avaialble
                            break;
                        } else if (contigTid > gvcfthing.getTid()) {
                            // observed contig is after gvcfthing.contig
                            src.current = null;
                            continue;
                        } else if (contigTid < gvcfthing.getTid()) {
                            // observed contig is before gvcfthing.contig
                            break;
                        } else if (gvcfthing.getEnd() < pos) {
                            // variant information is before observed pos
                            src.current = null;
                            continue;
                        } else if (gvcfthing.getStart() > pos) {
                            // variant information is after observed pos
                            minEnd = Math.min(minEnd, gvcfthing.getStart() - 1);
                            break;
                        } else if (gvcfthing.isVariant()) {
                            if (variantAtThisPos == null || variantContigPosRefComparator.compare(GvcfVariant.class.cast(gvcfthing).ctx, variantAtThisPos.ctx) < 0) {
                                variantAtThisPos = GvcfVariant.class.cast(gvcfthing);
                            }
                            break;
                        } else if (gvcfthing.isBlock()) {
                            minEnd = Math.min(minEnd, gvcfthing.getEnd());
                            break;
                        } else {
                            LOG.debug("??");
                        }
                    }
                }
                if (variantAtThisPos == null) {
                    pos = minEnd + 1;
                } else {
                    final VariantContext archetype = variantAtThisPos.ctx;
                    final List<VariantContext> allVariants = this.gvcfSources.stream().map(S -> S.get()).filter(G -> G != null && G.isVariant()).map(G -> GvcfVariant.class.cast(G).ctx).filter(V -> variantContigPosRefComparator.compare(V, archetype) == 0).collect(Collectors.toList());
                    ;
                    final Set<Allele> alleles = allVariants.stream().flatMap(V -> V.getGenotypes().stream()).flatMap(G -> G.getAlleles().stream()).filter(A -> !(A.equals(NON_REF) || A.isNoCall())).collect(Collectors.toSet());
                    alleles.add(archetype.getReference());
                    final VariantContextBuilder vcb = new VariantContextBuilder(getClass().getName(), archetype.getContig(), archetype.getStart(), archetype.getEnd(), alleles);
                    if (archetype.hasID()) {
                        vcb.id(archetype.getID());
                    }
                    final List<Genotype> genotypes = new ArrayList<>(allVariants.size());
                    for (VariantContext ctx : allVariants) {
                        Genotype genotype = ctx.getGenotype(0);
                        GenotypeBuilder gb = new GenotypeBuilder(genotype);
                        genotypes.add(gb.make());
                    }
                    vcb.genotypes(genotypes);
                    final VariantContext genotypedVariant;
                    try {
                        genotypedVariant = vcb.make();
                    } catch (Exception err2) {
                        LOG.debug(allVariants);
                        LOG.error(err2);
                        return -1;
                    }
                    w.add(genotypedVariant);
                    // reset source for the current variant
                    for (final Source src : this.gvcfSources) {
                        if (src.current != null && variantContigPosRefComparator.compare(src.current.ctx, archetype) == 0) {
                            src.current = null;
                        }
                    }
                }
            }
            ++contigTid;
        }
        w.close();
        w = null;
        this.gvcfSources.stream().forEach(S -> S.close());
        this.gvcfSources.clear();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        for (final Source src : this.gvcfSources) src.close();
        CloserUtil.close(w);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) IOUtil(htsjdk.samtools.util.IOUtil) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) PeekableIterator(htsjdk.samtools.util.PeekableIterator) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) Locatable(htsjdk.samtools.util.Locatable) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Comparator(java.util.Comparator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 34 with GenotypeBuilder

use of htsjdk.variant.variantcontext.GenotypeBuilder in project jvarkit by lindenb.

the class VcfMultiToOne method doWork.

@Override
public int doWork(final List<String> arguments) {
    VariantContextWriter out = null;
    Set<String> args = IOUtils.unrollFiles(arguments);
    List<VcfIterator> inputs = new ArrayList<>(args.size() + 1);
    List<String> inputFiles = new ArrayList<>(args.size() + 1);
    try {
        if (args.isEmpty() && arguments.isEmpty()) {
            inputs.add(VCFUtils.createVcfIteratorStdin());
            inputFiles.add("stdin");
        } else if (args.isEmpty()) {
            LOG.error("No vcf provided");
            return -1;
        } else {
            for (final String vcfFile : args) {
                inputs.add(VCFUtils.createVcfIterator(vcfFile));
                inputFiles.add(VCFUtils.escapeInfoField(vcfFile));
            }
        }
        SAMSequenceDictionary dict = null;
        final Set<String> sampleNames = new HashSet<String>();
        final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        for (final VcfIterator in : inputs) {
            final VCFHeader header = in.getHeader();
            if (dict == null) {
                dict = header.getSequenceDictionary();
            } else if (header.getSequenceDictionary() == null) {
                LOG.error("No Dictionary in vcf");
                return -1;
            } else if (!SequenceUtil.areSequenceDictionariesEqual(dict, header.getSequenceDictionary())) {
                LOG.error("Not the same dictionary between vcfs");
                return -1;
            }
            metaData.addAll(in.getHeader().getMetaDataInInputOrder());
            sampleNames.addAll(in.getHeader().getSampleNamesInOrder());
        }
        final Comparator<VariantContext> comparator = (dict == null ? VCFUtils.createChromPosRefComparator() : VCFUtils.createTidPosRefComparator(dict));
        // addMetaData(metaData);
        metaData.add(new VCFInfoHeaderLine(DEFAULT_SAMPLE_TAGID, 1, VCFHeaderLineType.String, "Sample Name from multi-sample vcf"));
        metaData.add(new VCFInfoHeaderLine(DEFAULT_SAMPLE_FILETAGID, 1, VCFHeaderLineType.String, "Origin of sample"));
        for (final String sample : sampleNames) {
            metaData.add(new VCFHeaderLine(SAMPLE_HEADER_DECLARATION, sample));
        }
        final VCFHeader h2 = new VCFHeader(metaData, Collections.singleton(DEFAULT_VCF_SAMPLE_NAME));
        recalculator.setHeader(h2);
        out = super.openVariantContextWriter(this.outputFile);
        out.writeHeader(h2);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        for (; ; ) {
            if (out.checkError())
                break;
            /* get 'smallest' variant */
            VariantContext smallest = null;
            int idx = 0;
            int best_idx = -1;
            while (idx < inputs.size()) {
                final VcfIterator in = inputs.get(idx);
                if (!in.hasNext()) {
                    CloserUtil.close(in);
                    inputs.remove(idx);
                    inputFiles.remove(idx);
                } else {
                    final VariantContext ctx = in.peek();
                    if (smallest == null || comparator.compare(smallest, ctx) > 0) {
                        smallest = ctx;
                        best_idx = idx;
                    }
                    ++idx;
                }
            }
            if (smallest == null)
                break;
            final VariantContext ctx = progress.watch(inputs.get(best_idx).next());
            if (ctx.getNSamples() == 0) {
                if (!this.discard_no_call) {
                    final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                    vcb.attribute(DEFAULT_SAMPLE_FILETAGID, inputFiles.get(best_idx));
                    vcb.genotypes(GenotypeBuilder.createMissing(DEFAULT_VCF_SAMPLE_NAME, 2));
                    out.add(this.recalculator.apply(vcb.make()));
                }
                continue;
            }
            for (int i = 0; i < ctx.getNSamples(); ++i) {
                final Genotype g = ctx.getGenotype(i);
                final String sample = g.getSampleName();
                if (!g.isCalled() && this.discard_no_call)
                    continue;
                if (!g.isAvailable() && this.discard_non_available)
                    continue;
                if (g.isHomRef() && this.discard_hom_ref)
                    continue;
                final GenotypeBuilder gb = new GenotypeBuilder(g);
                gb.name(DEFAULT_VCF_SAMPLE_NAME);
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                vcb.attribute(DEFAULT_SAMPLE_TAGID, sample);
                vcb.attribute(DEFAULT_SAMPLE_FILETAGID, inputFiles.get(best_idx));
                vcb.genotypes(gb.make());
                out.add(this.recalculator.apply(vcb.make()));
            }
        }
        progress.finish();
        LOG.debug("done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(inputs);
        CloserUtil.close(out);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 35 with GenotypeBuilder

use of htsjdk.variant.variantcontext.GenotypeBuilder in project jvarkit by lindenb.

the class SoftClipAnnotator method map.

public Integer map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
    if (tracker == null)
        return 0;
    final Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
    if (VCs.isEmpty()) {
        return 0;
    }
    final Collection<ReadFilter> readFilters = super.getToolkit().getFilters();
    for (final VariantContext ctx : VCs) {
        int observed_genotypes = 0;
        int num_some_clipped_genotypes = 0;
        final List<Genotype> genotypes = new ArrayList<>(ctx.getNSamples());
        for (int i = 0; i < ctx.getNSamples(); ++i) {
            final Genotype g = ctx.getGenotype(i);
            if (!g.isCalled() || g.isNoCall() || g.isHomRef()) {
                genotypes.add(g);
                continue;
            }
            final List<SamReader> bamReaderForSample = this.sample2bam.get(g.getSampleName());
            if (bamReaderForSample.isEmpty()) {
                genotypes.add(g);
                continue;
            }
            observed_genotypes++;
            int numberOfClipsOverlapping = 0;
            for (final SamReader samReader : bamReaderForSample) {
                final SAMRecordIterator iter = samReader.query(ctx.getContig(), Math.max(0, ctx.getStart() - extend4clip), ctx.getEnd() + extend4clip, false);
                while (iter.hasNext()) {
                    final SAMRecord samRecord = iter.next();
                    if (samRecord.getReadUnmappedFlag() || samRecord.getCigar() == null || !samRecord.getContig().equals(ctx.getContig()) || samRecord.getUnclippedEnd() < ctx.getStart() || samRecord.getUnclippedStart() > ctx.getEnd() || samRecord.getReadGroup() == null || !g.getSampleName().equals(samRecord.getReadGroup().getSample()))
                        continue;
                    boolean ok = true;
                    for (final ReadFilter readFilter : readFilters) {
                        // this one cannot't be correctly initialized...
                        if (readFilter instanceof MalformedReadFilter)
                            continue;
                        if (readFilter.filterOut(samRecord)) {
                            ok = false;
                            break;
                        }
                    }
                    if (!ok)
                        continue;
                    int refPos = samRecord.getUnclippedStart();
                    for (final CigarElement ce : samRecord.getCigar()) {
                        if (ce.getOperator().consumesReferenceBases() || ce.getOperator().isClipping()) {
                            final int refEnd = refPos + ce.getLength();
                            if (!(ctx.getEnd() < refPos || refEnd < ctx.getStart())) {
                                // System.err.println("IN "+ce+" "+ctx.getStart()+"-"+ctx.getEnd()+" : "+refPos+"-"+refPos+ce.getLength());
                                if (ce.getOperator().equals(CigarOperator.S)) {
                                    ++numberOfClipsOverlapping;
                                }
                            }
                            refPos += ce.getLength();
                        }
                        if (refPos > ctx.getEnd())
                            break;
                    }
                }
                iter.close();
            }
            if (numberOfClipsOverlapping == 0) {
                genotypes.add(g);
            } else {
                GenotypeBuilder gb = new GenotypeBuilder(g);
                gb.attribute(numClipInGenotypeFormatHeaderLine.getID(), numberOfClipsOverlapping);
                genotypes.add(gb.make());
                num_some_clipped_genotypes++;
            }
        }
        if (num_some_clipped_genotypes == 0) {
            vcfWriter.add(ctx);
        } else {
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.genotypes(genotypes);
            if (observed_genotypes == num_some_clipped_genotypes) {
                vcb.filter(this.filterHaveClipInVariant.getID());
            }
            vcfWriter.add(vcb.make());
        }
    }
    return VCs.size();
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) MalformedReadFilter(org.broadinstitute.gatk.engine.filters.MalformedReadFilter) SAMRecord(htsjdk.samtools.SAMRecord) MalformedReadFilter(org.broadinstitute.gatk.engine.filters.MalformedReadFilter) ReadFilter(org.broadinstitute.gatk.engine.filters.ReadFilter)

Aggregations

GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)51 Genotype (htsjdk.variant.variantcontext.Genotype)43 VariantContext (htsjdk.variant.variantcontext.VariantContext)37 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)37 Allele (htsjdk.variant.variantcontext.Allele)34 ArrayList (java.util.ArrayList)30 VCFHeader (htsjdk.variant.vcf.VCFHeader)27 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)23 HashSet (java.util.HashSet)22 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)21 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)20 File (java.io.File)17 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)15 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)14 Collectors (java.util.stream.Collectors)14 HashMap (java.util.HashMap)13 List (java.util.List)13 Set (java.util.Set)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)12 VCFHeaderLineType (htsjdk.variant.vcf.VCFHeaderLineType)11