use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.
the class AssemblyResultSetUnitTest method assemblyResults.
@DataProvider(name = "assemblyResults")
public Iterator<Object[]> assemblyResults() {
final int size = THREE_KS_GRAPH_AND_HAPLOTYPES.length * (1 + TEN_KS_GRAPH_AND_HAPLOTYPES.length);
final Object[][] result = new Object[size][];
for (int i = 0; i < THREE_KS_GRAPH_AND_HAPLOTYPES.length; i++) {
final ReadThreadingGraph rtg = new TestingReadThreadingGraph((String) THREE_KS_GRAPH_AND_HAPLOTYPES[i][0]);
final AssemblyResult ar = new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, rtg.toSequenceGraph(), rtg);
final Object[] haplotypeStrings = (Object[]) THREE_KS_GRAPH_AND_HAPLOTYPES[i][1];
final Haplotype[] haplotypes = new Haplotype[haplotypeStrings.length];
for (int j = 0; j < haplotypeStrings.length; j++) {
haplotypes[j] = new Haplotype(((String) haplotypeStrings[j]).getBytes(), j == 0);
haplotypes[j].setGenomeLocation(genomeLocParser.createGenomeLoc("1", 1, haplotypes[j].length() + 1));
}
result[i] = new Object[] { Collections.singletonList(ar), Arrays.asList(Arrays.asList(haplotypes)) };
for (int j = 0; j < TEN_KS_GRAPH_AND_HAPLOTYPES.length; j++) {
final ReadThreadingGraph rtg10 = new TestingReadThreadingGraph((String) TEN_KS_GRAPH_AND_HAPLOTYPES[j][0]);
final AssemblyResult ar10 = new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, rtg10.toSequenceGraph(), rtg10);
final Object[] haplotypeStrings10 = (Object[]) TEN_KS_GRAPH_AND_HAPLOTYPES[j][1];
final Haplotype[] haplotype10 = new Haplotype[haplotypeStrings10.length];
for (int k = 0; k < haplotypeStrings10.length; k++) {
haplotype10[k] = new Haplotype(((String) haplotypeStrings10[k]).getBytes(), false);
haplotype10[k].setGenomeLocation(genomeLocParser.createGenomeLoc("1", 1, haplotype10[k].length() + 1));
}
result[THREE_KS_GRAPH_AND_HAPLOTYPES.length + i * TEN_KS_GRAPH_AND_HAPLOTYPES.length + j] = new Object[] { Arrays.asList(ar, ar10), Arrays.asList(Arrays.asList(haplotypes), Arrays.asList(haplotype10)) };
}
}
return Arrays.asList(result).iterator();
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.
the class AssemblyResultSetUnitTest method trimmingData.
@DataProvider(name = "trimmingData")
public Iterator<Object[]> trimmingData() {
final AssemblyRegion activeRegion = new AssemblyRegion(new SimpleInterval("1", 1000, 1100), 25, header);
final int length = activeRegion.getExtendedSpan().size();
// keep it prepoducible by fixing the seed to lucky 13.
final RandomDNA rnd = new RandomDNA(13);
final AssemblyRegionTestDataSet actd = new AssemblyRegionTestDataSet(10, new String(rnd.nextBases(length)), new String[] { "Civar:*1T*" }, new String[0], new byte[0], new byte[0], new byte[0]);
final List<Haplotype> haplotypes = actd.haplotypeList();
for (final Haplotype h : haplotypes) h.setGenomeLocation(activeRegion.getExtendedSpan());
final ReadThreadingGraph rtg = new ReadThreadingGraph(10);
for (final Haplotype h : haplotypes) rtg.addSequence("seq-" + Math.abs(h.hashCode()), h.getBases(), h.isReference());
final SeqGraph seqGraph = rtg.toSequenceGraph();
final AssemblyResult ar = new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, seqGraph, rtg);
final Map<Haplotype, AssemblyResult> result = new HashMap<>();
for (final Haplotype h : haplotypes) result.put(h, ar);
return Collections.singleton(new Object[] { result, activeRegion }).iterator();
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.
the class AssemblyBasedCallerGenotypingEngine method createAlleleMapper.
protected static Map<Allele, List<Haplotype>> createAlleleMapper(final List<VariantContext> eventsAtThisLoc, final VariantContext mergedVC, final int loc, final List<Haplotype> haplotypes) {
Utils.validateArg(haplotypes.size() > eventsAtThisLoc.size(), "expected haplotypes.size() >= eventsAtThisLoc.size() + 1");
final Map<Allele, List<Haplotype>> result = new LinkedHashMap<>();
final Allele ref = mergedVC.getReference();
result.put(ref, new ArrayList<>());
//Note: we can't use the alleles implied by eventsAtThisLoc because they are not yet merged to a common reference
//For example, a homopolymer AAAAA reference with a single and double deletion would yield (i) AA* A and (ii) AAA* A
//in eventsAtThisLoc, when in mergedVC it would yield AAA* AA A
mergedVC.getAlternateAlleles().stream().filter(a -> !a.isSymbolic()).forEach(a -> result.put(a, new ArrayList<>()));
for (final Haplotype h : haplotypes) {
final VariantContext haplotypeVC = h.getEventMap().get(loc);
if (haplotypeVC == null) {
//no events --> this haplotype supports the reference at this locus
result.get(ref).add(h);
} else {
//TODO: that's not good enough
for (int n = 0; n < eventsAtThisLoc.size(); n++) {
if (haplotypeVC.hasSameAllelesAs(eventsAtThisLoc.get(n))) {
result.get(mergedVC.getAlternateAllele(n)).add(h);
break;
}
}
}
}
return result;
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.
the class AssemblyBasedCallerUtils method realignReadsToTheirBestHaplotype.
/**
* Returns a map with the original read as a key and the realigned read as the value.
* <p>
* Missing keys or equivalent key and value pairs mean that the read was not realigned.
* </p>
* @return never {@code null}
*/
public static Map<GATKRead, GATKRead> realignReadsToTheirBestHaplotype(final ReadLikelihoods<Haplotype> originalReadLikelihoods, final Haplotype refHaplotype, final Locatable paddedReferenceLoc) {
final Collection<ReadLikelihoods<Haplotype>.BestAllele<Haplotype>> bestAlleles = originalReadLikelihoods.bestAlleles();
final Map<GATKRead, GATKRead> result = new HashMap<>(bestAlleles.size());
for (final ReadLikelihoods<Haplotype>.BestAllele<Haplotype> bestAllele : bestAlleles) {
final GATKRead originalRead = bestAllele.read;
final Haplotype bestHaplotype = bestAllele.allele;
final boolean isInformative = bestAllele.isInformative();
final GATKRead realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead, bestHaplotype, refHaplotype, paddedReferenceLoc.getStart(), isInformative);
result.put(originalRead, realignedRead);
}
return result;
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.
the class PairHMM method computeLog10Likelihoods.
/**
* Given a list of reads and haplotypes, for every read compute the total probability of said read arising from
* each haplotype given base substitution, insertion, and deletion probabilities.
*
* @param processedReads reads to analyze instead of the ones present in the destination read-likelihoods.
* @param logLikelihoods where to store the log likelihoods where position [a][r] is reserved for the log likelihood of {@code reads[r]}
* conditional to {@code alleles[a]}.
* @param gcp penalty for gap continuations base array map for processed reads.
*
*/
public void computeLog10Likelihoods(final LikelihoodMatrix<Haplotype> logLikelihoods, final List<GATKRead> processedReads, final Map<GATKRead, byte[]> gcp) {
if (processedReads.isEmpty()) {
return;
}
if (doProfiling) {
startTime = System.nanoTime();
}
// (re)initialize the pairHMM only if necessary
final int readMaxLength = findMaxReadLength(processedReads);
final int haplotypeMaxLength = findMaxAlleleLength(logLikelihoods.alleles());
if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength) {
initialize(readMaxLength, haplotypeMaxLength);
}
final int readCount = processedReads.size();
final List<Haplotype> alleles = logLikelihoods.alleles();
final int alleleCount = alleles.size();
mLogLikelihoodArray = new double[readCount * alleleCount];
int idx = 0;
int readIndex = 0;
for (final GATKRead read : processedReads) {
final byte[] readBases = read.getBases();
final byte[] readQuals = read.getBaseQualities();
final byte[] readInsQuals = ReadUtils.getBaseInsertionQualities(read);
final byte[] readDelQuals = ReadUtils.getBaseDeletionQualities(read);
final byte[] overallGCP = gcp.get(read);
// peek at the next haplotype in the list (necessary to get nextHaplotypeBases, which is required for caching in the array implementation)
final boolean isFirstHaplotype = true;
for (int a = 0; a < alleleCount; a++) {
final Allele allele = alleles.get(a);
final byte[] alleleBases = allele.getBases();
final byte[] nextAlleleBases = a == alleles.size() - 1 ? null : alleles.get(a + 1).getBases();
final double lk = computeReadLikelihoodGivenHaplotypeLog10(alleleBases, readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, nextAlleleBases);
logLikelihoods.set(a, readIndex, lk);
mLogLikelihoodArray[idx++] = lk;
}
readIndex++;
}
if (doProfiling) {
threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime);
{
pairHMMComputeTime += threadLocalPairHMMComputeTimeDiff;
}
}
}
Aggregations