Search in sources :

Example 61 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class AssemblyResultSetUnitTest method testAddReferenceHaplotype.

@Test
public void testAddReferenceHaplotype() {
    final Haplotype ref = new Haplotype("ACGT".getBytes(), true);
    ref.setGenomeLocation(genomeLocParser.createGenomeLoc("1", 1, ref.length() + 1));
    final AssemblyResultSet subject = new AssemblyResultSet();
    Assert.assertTrue(subject.add(ref));
    Assert.assertFalse(subject.add(ref));
    Assert.assertEquals(subject.getReferenceHaplotype(), ref);
    Assert.assertEquals(subject.getHaplotypeCount(), 1);
    Assert.assertEquals(subject.getHaplotypeList().size(), 1);
}
Also used : Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 62 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class AssemblyResultSetUnitTest method testTrimTo.

@Test(dataProvider = "trimmingData")
public void testTrimTo(final Map<Haplotype, AssemblyResult> haplotypesAndResultSets, final AssemblyRegion original) {
    final AssemblyResultSet subject = new AssemblyResultSet();
    for (final Map.Entry<Haplotype, AssemblyResult> entry : haplotypesAndResultSets.entrySet()) subject.add(entry.getKey(), entry.getValue());
    subject.setRegionForGenotyping(original);
    final SimpleInterval originalLocation = original.getExtendedSpan();
    final int length = originalLocation.size();
    final SimpleInterval newLocation = new SimpleInterval(originalLocation.getContig(), originalLocation.getStart() + length / 2, originalLocation.getEnd() - length / 2);
    final AssemblyRegion newRegion = original.trim(newLocation);
    final Map<Haplotype, Haplotype> originalHaplotypesByTrimmed = new HashMap<>(haplotypesAndResultSets.size());
    for (final Haplotype h : haplotypesAndResultSets.keySet()) originalHaplotypesByTrimmed.put(h.trim(newRegion.getExtendedSpan()), h);
    final AssemblyResultSet trimmed = subject.trimTo(newRegion);
    Assert.assertFalse(subject.wasTrimmed());
    Assert.assertTrue(trimmed.wasTrimmed());
    for (final Haplotype h : trimmed.getHaplotypeList()) {
        Assert.assertEquals(h.getGenomeLocation(), newLocation);
        Assert.assertEquals(h.getBases().length, newLocation.size());
    }
}
Also used : AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 63 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class AssemblyResultSetUnitTest method testAddManyHaplotypes.

@Test(dataProvider = "assemblyResults")
public void testAddManyHaplotypes(final List<AssemblyResult> assemblyResults, final List<List<Haplotype>> haplotypes) {
    final AssemblyResultSet subject = new AssemblyResultSet();
    for (int i = 0; i < haplotypes.size(); i++) {
        final int haplotypeCountBefore = subject.getHaplotypeCount();
        final List<Haplotype> haplos = haplotypes.get(i);
        final AssemblyResult ar = assemblyResults.get(i);
        for (final Haplotype h : haplos) {
            Assert.assertTrue(subject.add(h, ar));
            Assert.assertFalse(subject.add(h, ar));
            if (h.isReference())
                Assert.assertEquals(subject.getReferenceHaplotype(), h);
        }
        final int haplotypeCountAfter = subject.getHaplotypeCount();
        Assert.assertEquals(haplos.size(), haplotypeCountAfter - haplotypeCountBefore);
        Assert.assertTrue(subject.getMaximumKmerSize() >= ar.getKmerSize());
        Assert.assertTrue(subject.getMinimumKmerSize() <= ar.getKmerSize());
        Assert.assertEquals(subject.getUniqueReadThreadingGraph(ar.getKmerSize()), ar.getThreadingGraph());
    }
}
Also used : Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 64 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class AssemblyRegionTestDataSet method expandAllHaplotypeCombinations.

private List<Haplotype> expandAllHaplotypeCombinations(final String civarString, final String reference) {
    final Civar civar = Civar.fromCharSequence(civarString);
    final List<Civar> unrolledCivars = civar.optionalizeAll().unroll();
    List<Haplotype> result = new ArrayList<>(unrolledCivars.size());
    for (final Civar c : unrolledCivars) {
        final String baseString = c.applyTo(reference);
        final Haplotype haplotype = new Haplotype(baseString.getBytes(), baseString.equals(reference));
        haplotype.setGenomeLocation(genomeLocParser.createGenomeLoc("1", 1, reference.length()));
        try {
            haplotype.setCigar(c.toCigar(reference.length()));
        } catch (final RuntimeException ex) {
            c.applyTo(reference);
            c.toCigar(reference.length());
            throw new RuntimeException("" + c + " " + ex.getMessage(), ex);
        }
        result.add(haplotype);
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 65 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class AssemblyBasedCallerUtils method assembleReads.

/**
     * High-level function that runs the assembler on the given region's reads,
     * returning a data structure with the resulting information needed
     * for further HC steps
     */
public static AssemblyResultSet assembleReads(final AssemblyRegion region, final List<VariantContext> givenAlleles, final AssemblyBasedCallerArgumentCollection argumentCollection, final SAMFileHeader header, final SampleList sampleList, final Logger logger, final ReferenceSequenceFile referenceReader, final ReadThreadingAssembler assemblyEngine) {
    finalizeRegion(region, argumentCollection.errorCorrectReads, argumentCollection.dontUseSoftClippedBases, (byte) (argumentCollection.minBaseQualityScore - 1), header, sampleList);
    if (argumentCollection.debug) {
        logger.info("Assembling " + region.getSpan() + " with " + region.size() + " reads:    (with overlap region = " + region.getExtendedSpan() + ")");
    }
    final byte[] fullReferenceWithPadding = region.getAssemblyRegionReference(referenceReader, REFERENCE_PADDING_FOR_ASSEMBLY);
    final SimpleInterval paddedReferenceLoc = getPaddedReferenceLoc(region, REFERENCE_PADDING_FOR_ASSEMBLY, referenceReader);
    final Haplotype referenceHaplotype = createReferenceHaplotype(region, paddedReferenceLoc, referenceReader);
    final ReadErrorCorrector readErrorCorrector = argumentCollection.errorCorrectReads ? new ReadErrorCorrector(argumentCollection.assemblerArgs.kmerLengthForReadErrorCorrection, HaplotypeCallerEngine.MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION, argumentCollection.assemblerArgs.minObservationsForKmerToBeSolid, argumentCollection.debug, fullReferenceWithPadding) : null;
    try {
        final AssemblyResultSet assemblyResultSet = assemblyEngine.runLocalAssembly(region, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, givenAlleles, readErrorCorrector, header);
        assemblyResultSet.debugDump(logger);
        return assemblyResultSet;
    } catch (final Exception e) {
        // Capture any exception that might be thrown, and write out the assembly failure BAM if requested
        if (argumentCollection.captureAssemblyFailureBAM) {
            try (final SAMFileWriter writer = ReadUtils.createCommonSAMWriter(new File("assemblyFailure.bam"), null, header, false, false, false)) {
                for (final GATKRead read : region.getReads()) {
                    writer.addAlignment(read.convertToSAMRecord(header));
                }
            }
        }
        throw e;
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Aggregations

Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)88 Test (org.testng.annotations.Test)31 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)28 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)28 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)12 DataProvider (org.testng.annotations.DataProvider)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)9 KBestHaplotype (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype)8 ArrayList (java.util.ArrayList)7 Cigar (htsjdk.samtools.Cigar)6 AssemblyRegion (org.broadinstitute.hellbender.engine.AssemblyRegion)6 HomogeneousPloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel)6 IndependentSampleGenotypesModel (org.broadinstitute.hellbender.tools.walkers.genotyper.IndependentSampleGenotypesModel)6 PloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.PloidyModel)6 ReadThreadingGraph (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph)6 SampleList (org.broadinstitute.hellbender.utils.genotyper.SampleList)6 EventMap (org.broadinstitute.hellbender.utils.haplotype.EventMap)6 CigarElement (htsjdk.samtools.CigarElement)4 Allele (htsjdk.variant.variantcontext.Allele)4 File (java.io.File)4