Search in sources :

Example 6 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class AssemblyResultSetUnitTest method assemblyResults.

@DataProvider(name = "assemblyResults")
public Iterator<Object[]> assemblyResults() {
    final int size = THREE_KS_GRAPH_AND_HAPLOTYPES.length * (1 + TEN_KS_GRAPH_AND_HAPLOTYPES.length);
    final Object[][] result = new Object[size][];
    for (int i = 0; i < THREE_KS_GRAPH_AND_HAPLOTYPES.length; i++) {
        final ReadThreadingGraph rtg = new TestingReadThreadingGraph((String) THREE_KS_GRAPH_AND_HAPLOTYPES[i][0]);
        final AssemblyResult ar = new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, rtg.toSequenceGraph(), rtg);
        final Object[] haplotypeStrings = (Object[]) THREE_KS_GRAPH_AND_HAPLOTYPES[i][1];
        final Haplotype[] haplotypes = new Haplotype[haplotypeStrings.length];
        for (int j = 0; j < haplotypeStrings.length; j++) {
            haplotypes[j] = new Haplotype(((String) haplotypeStrings[j]).getBytes(), j == 0);
            haplotypes[j].setGenomeLocation(genomeLocParser.createGenomeLoc("1", 1, haplotypes[j].length() + 1));
        }
        result[i] = new Object[] { Collections.singletonList(ar), Arrays.asList(Arrays.asList(haplotypes)) };
        for (int j = 0; j < TEN_KS_GRAPH_AND_HAPLOTYPES.length; j++) {
            final ReadThreadingGraph rtg10 = new TestingReadThreadingGraph((String) TEN_KS_GRAPH_AND_HAPLOTYPES[j][0]);
            final AssemblyResult ar10 = new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, rtg10.toSequenceGraph(), rtg10);
            final Object[] haplotypeStrings10 = (Object[]) TEN_KS_GRAPH_AND_HAPLOTYPES[j][1];
            final Haplotype[] haplotype10 = new Haplotype[haplotypeStrings10.length];
            for (int k = 0; k < haplotypeStrings10.length; k++) {
                haplotype10[k] = new Haplotype(((String) haplotypeStrings10[k]).getBytes(), false);
                haplotype10[k].setGenomeLocation(genomeLocParser.createGenomeLoc("1", 1, haplotype10[k].length() + 1));
            }
            result[THREE_KS_GRAPH_AND_HAPLOTYPES.length + i * TEN_KS_GRAPH_AND_HAPLOTYPES.length + j] = new Object[] { Arrays.asList(ar, ar10), Arrays.asList(Arrays.asList(haplotypes), Arrays.asList(haplotype10)) };
        }
    }
    return Arrays.asList(result).iterator();
}
Also used : ReadThreadingGraph(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph) TestingReadThreadingGraph(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.TestingReadThreadingGraph) TestingReadThreadingGraph(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.TestingReadThreadingGraph) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) DataProvider(org.testng.annotations.DataProvider)

Example 7 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class AssemblyResultSetUnitTest method trimmingData.

@DataProvider(name = "trimmingData")
public Iterator<Object[]> trimmingData() {
    final AssemblyRegion activeRegion = new AssemblyRegion(new SimpleInterval("1", 1000, 1100), 25, header);
    final int length = activeRegion.getExtendedSpan().size();
    // keep it prepoducible by fixing the seed to lucky 13.
    final RandomDNA rnd = new RandomDNA(13);
    final AssemblyRegionTestDataSet actd = new AssemblyRegionTestDataSet(10, new String(rnd.nextBases(length)), new String[] { "Civar:*1T*" }, new String[0], new byte[0], new byte[0], new byte[0]);
    final List<Haplotype> haplotypes = actd.haplotypeList();
    for (final Haplotype h : haplotypes) h.setGenomeLocation(activeRegion.getExtendedSpan());
    final ReadThreadingGraph rtg = new ReadThreadingGraph(10);
    for (final Haplotype h : haplotypes) rtg.addSequence("seq-" + Math.abs(h.hashCode()), h.getBases(), h.isReference());
    final SeqGraph seqGraph = rtg.toSequenceGraph();
    final AssemblyResult ar = new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, seqGraph, rtg);
    final Map<Haplotype, AssemblyResult> result = new HashMap<>();
    for (final Haplotype h : haplotypes) result.put(h, ar);
    return Collections.singleton(new Object[] { result, activeRegion }).iterator();
}
Also used : ReadThreadingGraph(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph) TestingReadThreadingGraph(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.TestingReadThreadingGraph) SeqGraph(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.SeqGraph) RandomDNA(org.broadinstitute.hellbender.utils.RandomDNA) AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) DataProvider(org.testng.annotations.DataProvider)

Example 8 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class AssemblyBasedCallerGenotypingEngine method createAlleleMapper.

protected static Map<Allele, List<Haplotype>> createAlleleMapper(final List<VariantContext> eventsAtThisLoc, final VariantContext mergedVC, final int loc, final List<Haplotype> haplotypes) {
    Utils.validateArg(haplotypes.size() > eventsAtThisLoc.size(), "expected haplotypes.size() >= eventsAtThisLoc.size() + 1");
    final Map<Allele, List<Haplotype>> result = new LinkedHashMap<>();
    final Allele ref = mergedVC.getReference();
    result.put(ref, new ArrayList<>());
    //Note: we can't use the alleles implied by eventsAtThisLoc because they are not yet merged to a common reference
    //For example, a homopolymer AAAAA reference with a single and double deletion would yield (i) AA* A and (ii) AAA* A
    //in eventsAtThisLoc, when in mergedVC it would yield AAA* AA A
    mergedVC.getAlternateAlleles().stream().filter(a -> !a.isSymbolic()).forEach(a -> result.put(a, new ArrayList<>()));
    for (final Haplotype h : haplotypes) {
        final VariantContext haplotypeVC = h.getEventMap().get(loc);
        if (haplotypeVC == null) {
            //no events --> this haplotype supports the reference at this locus
            result.get(ref).add(h);
        } else {
            //TODO: that's not good enough
            for (int n = 0; n < eventsAtThisLoc.size(); n++) {
                if (haplotypeVC.hasSameAllelesAs(eventsAtThisLoc.get(n))) {
                    result.get(mergedVC.getAlternateAllele(n)).add(h);
                    break;
                }
            }
        }
    }
    return result;
}
Also used : java.util(java.util) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) Predicate(java.util.function.Predicate) SampleList(org.broadinstitute.hellbender.utils.genotyper.SampleList) htsjdk.variant.variantcontext(htsjdk.variant.variantcontext) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) EventMap(org.broadinstitute.hellbender.utils.haplotype.EventMap) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) Pair(org.apache.commons.lang3.tuple.Pair) org.broadinstitute.hellbender.tools.walkers.genotyper(org.broadinstitute.hellbender.tools.walkers.genotyper) AFCalculatorProvider(org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculatorProvider) Utils(org.broadinstitute.hellbender.utils.Utils) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) SampleList(org.broadinstitute.hellbender.utils.genotyper.SampleList) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 9 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class AssemblyBasedCallerUtils method realignReadsToTheirBestHaplotype.

/**
     * Returns a map with the original read as a key and the realigned read as the value.
     * <p>
     *     Missing keys or equivalent key and value pairs mean that the read was not realigned.
     * </p>
     * @return never {@code null}
     */
public static Map<GATKRead, GATKRead> realignReadsToTheirBestHaplotype(final ReadLikelihoods<Haplotype> originalReadLikelihoods, final Haplotype refHaplotype, final Locatable paddedReferenceLoc) {
    final Collection<ReadLikelihoods<Haplotype>.BestAllele<Haplotype>> bestAlleles = originalReadLikelihoods.bestAlleles();
    final Map<GATKRead, GATKRead> result = new HashMap<>(bestAlleles.size());
    for (final ReadLikelihoods<Haplotype>.BestAllele<Haplotype> bestAllele : bestAlleles) {
        final GATKRead originalRead = bestAllele.read;
        final Haplotype bestHaplotype = bestAllele.allele;
        final boolean isInformative = bestAllele.isInformative();
        final GATKRead realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead, bestHaplotype, refHaplotype, paddedReferenceLoc.getStart(), isInformative);
        result.put(originalRead, realignedRead);
    }
    return result;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods)

Example 10 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class PairHMM method computeLog10Likelihoods.

/**
     *  Given a list of reads and haplotypes, for every read compute the total probability of said read arising from
     *  each haplotype given base substitution, insertion, and deletion probabilities.
     *
     * @param processedReads reads to analyze instead of the ones present in the destination read-likelihoods.
     * @param logLikelihoods where to store the log likelihoods where position [a][r] is reserved for the log likelihood of {@code reads[r]}
     *             conditional to {@code alleles[a]}.
     * @param gcp penalty for gap continuations base array map for processed reads.
     *
     */
public void computeLog10Likelihoods(final LikelihoodMatrix<Haplotype> logLikelihoods, final List<GATKRead> processedReads, final Map<GATKRead, byte[]> gcp) {
    if (processedReads.isEmpty()) {
        return;
    }
    if (doProfiling) {
        startTime = System.nanoTime();
    }
    // (re)initialize the pairHMM only if necessary
    final int readMaxLength = findMaxReadLength(processedReads);
    final int haplotypeMaxLength = findMaxAlleleLength(logLikelihoods.alleles());
    if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength) {
        initialize(readMaxLength, haplotypeMaxLength);
    }
    final int readCount = processedReads.size();
    final List<Haplotype> alleles = logLikelihoods.alleles();
    final int alleleCount = alleles.size();
    mLogLikelihoodArray = new double[readCount * alleleCount];
    int idx = 0;
    int readIndex = 0;
    for (final GATKRead read : processedReads) {
        final byte[] readBases = read.getBases();
        final byte[] readQuals = read.getBaseQualities();
        final byte[] readInsQuals = ReadUtils.getBaseInsertionQualities(read);
        final byte[] readDelQuals = ReadUtils.getBaseDeletionQualities(read);
        final byte[] overallGCP = gcp.get(read);
        // peek at the next haplotype in the list (necessary to get nextHaplotypeBases, which is required for caching in the array implementation)
        final boolean isFirstHaplotype = true;
        for (int a = 0; a < alleleCount; a++) {
            final Allele allele = alleles.get(a);
            final byte[] alleleBases = allele.getBases();
            final byte[] nextAlleleBases = a == alleles.size() - 1 ? null : alleles.get(a + 1).getBases();
            final double lk = computeReadLikelihoodGivenHaplotypeLog10(alleleBases, readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, nextAlleleBases);
            logLikelihoods.set(a, readIndex, lk);
            mLogLikelihoodArray[idx++] = lk;
        }
        readIndex++;
    }
    if (doProfiling) {
        threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime);
        {
            pairHMMComputeTime += threadLocalPairHMMComputeTimeDiff;
        }
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Allele(htsjdk.variant.variantcontext.Allele) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Aggregations

Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)88 Test (org.testng.annotations.Test)31 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)28 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)28 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)12 DataProvider (org.testng.annotations.DataProvider)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)9 KBestHaplotype (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype)8 ArrayList (java.util.ArrayList)7 Cigar (htsjdk.samtools.Cigar)6 AssemblyRegion (org.broadinstitute.hellbender.engine.AssemblyRegion)6 HomogeneousPloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel)6 IndependentSampleGenotypesModel (org.broadinstitute.hellbender.tools.walkers.genotyper.IndependentSampleGenotypesModel)6 PloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.PloidyModel)6 ReadThreadingGraph (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph)6 SampleList (org.broadinstitute.hellbender.utils.genotyper.SampleList)6 EventMap (org.broadinstitute.hellbender.utils.haplotype.EventMap)6 CigarElement (htsjdk.samtools.CigarElement)4 Allele (htsjdk.variant.variantcontext.Allele)4 File (java.io.File)4