Search in sources :

Example 6 with AssemblyRegion

use of org.broadinstitute.hellbender.engine.AssemblyRegion in project gatk by broadinstitute.

the class ActivityProfile method popReadyAssemblyRegions.

// --------------------------------------------------------------------------------
//
// routines to get active regions from the profile
//
// --------------------------------------------------------------------------------
/**
     * Get the next completed assembly regions from this profile, and remove all states supporting them from this profile
     *
     * Takes the current profile and finds all of the active / inactive from the start of the profile that are
     * ready.  By ready we mean unable to have their probability modified any longer by future additions to the
     * profile.  The regions that are popped off the profile take their states with them, so the start of this
     * profile will always be after the end of the last region returned here.
     *
     * The regions are returned sorted by genomic position.
     *
     * This function may not return anything in the list, if no regions are ready
     *
     * No returned region will be larger than maxRegionSize.
     *
     * @param assemblyRegionExtension the extension value to provide to the constructed regions
     * @param minRegionSize the minimum region size, in the case where we have to cut up regions that are too large
     * @param maxRegionSize the maximize size of the returned region
     * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the
     *                        stateList.  Used to close out the active region when we've hit some kind of end (such
     *                        as the end of the contig)
     * @return a non-null list of active regions
     */
public List<AssemblyRegion> popReadyAssemblyRegions(final int assemblyRegionExtension, final int minRegionSize, final int maxRegionSize, final boolean forceConversion) {
    Utils.validateArg(assemblyRegionExtension >= 0, () -> "assemblyRegionExtension must be >= 0 but got " + assemblyRegionExtension);
    Utils.validateArg(minRegionSize > 0, () -> "minRegionSize must be >= 1 but got " + minRegionSize);
    Utils.validateArg(maxRegionSize > 0, () -> "maxRegionSize must be >= 1 but got " + maxRegionSize);
    final List<AssemblyRegion> regions = new ArrayList<>();
    while (true) {
        final AssemblyRegion nextRegion = popNextReadyAssemblyRegion(assemblyRegionExtension, minRegionSize, maxRegionSize, forceConversion);
        if (nextRegion == null) {
            return regions;
        } else {
            if (restrictToIntervals == null) {
                regions.add(nextRegion);
            } else {
                regions.addAll(nextRegion.splitAndTrimToIntervals(restrictToIntervals));
            }
        }
    }
}
Also used : AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion)

Example 7 with AssemblyRegion

use of org.broadinstitute.hellbender.engine.AssemblyRegion in project gatk by broadinstitute.

the class ActivityProfile method popNextReadyAssemblyRegion.

/**
     * Helper function for popReadyActiveRegions that pops the first ready region off the front of this profile
     *
     * If a region is returned, modifies the state of this profile so that states used to make the region are
     * no longer part of the profile.  Associated information (like the region start position) of this profile
     * are also updated.
     *
     * @param assemblyRegionExtension the extension value to provide to the constructed regions
     * @param minRegionSize the minimum region size, in the case where we have to cut up regions that are too large
     * @param maxRegionSize the maximize size of the returned region
     * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the
     *                        stateList.  Used to close out the active region when we've hit some kind of end (such
     *                        as the end of the contig)
     * @return a fully formed assembly region, or null if none can be made
     */
private AssemblyRegion popNextReadyAssemblyRegion(final int assemblyRegionExtension, final int minRegionSize, final int maxRegionSize, final boolean forceConversion) {
    if (stateList.isEmpty()) {
        return null;
    }
    // If we are flushing the activity profile we need to trim off the excess states so that we don't create regions outside of our current processing interval
    if (forceConversion) {
        final List<ActivityProfileState> statesToTrimAway = new ArrayList<>(stateList.subList(getSpan().size(), stateList.size()));
        stateList.removeAll(statesToTrimAway);
    }
    final ActivityProfileState first = stateList.get(0);
    final boolean isActiveRegion = first.isActiveProb() > activeProbThreshold;
    final int offsetOfNextRegionEnd = findEndOfRegion(isActiveRegion, minRegionSize, maxRegionSize, forceConversion);
    if (offsetOfNextRegionEnd == -1) {
        // couldn't find a valid ending offset, so we return null
        return null;
    }
    // we need to create the active region, and clip out the states we're extracting from this profile
    final List<ActivityProfileState> sub = stateList.subList(0, offsetOfNextRegionEnd + 1);
    final List<ActivityProfileState> supportingStates = new ArrayList<>(sub);
    sub.clear();
    // update the start and stop locations as necessary
    if (stateList.isEmpty()) {
        regionStartLoc = regionStopLoc = null;
    } else {
        regionStartLoc = stateList.get(0).getLoc();
    }
    final SimpleInterval regionLoc = new SimpleInterval(first.getLoc().getContig(), first.getLoc().getStart(), first.getLoc().getStart() + offsetOfNextRegionEnd);
    return new AssemblyRegion(regionLoc, supportingStates, isActiveRegion, assemblyRegionExtension, samHeader);
}
Also used : AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 8 with AssemblyRegion

use of org.broadinstitute.hellbender.engine.AssemblyRegion in project gatk by broadinstitute.

the class ActivityProfileUnitTest method testRegionCreation.

@Test(dataProvider = "RegionCreationTests")
public void testRegionCreation(final int start, final List<Boolean> probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) {
    final ActivityProfile profile = new ActivityProfile(MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD, header);
    Assert.assertNotNull(profile.toString());
    final String contig = genomeLocParser.getSequenceDictionary().getSequences().get(0).getSequenceName();
    final List<Boolean> seenSites = new ArrayList<Boolean>(Collections.nCopies(probs.size(), false));
    AssemblyRegion lastRegion = null;
    for (int i = 0; i < probs.size(); i++) {
        final boolean isActive = probs.get(i);
        final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, i + start);
        final ActivityProfileState state = new ActivityProfileState(new SimpleInterval(loc), isActive ? 1.0 : 0.0);
        profile.add(state);
        Assert.assertNotNull(profile.toString());
        if (!waitUntilEnd) {
            final List<AssemblyRegion> regions = profile.popReadyAssemblyRegions(0, 1, maxRegionSize, false);
            lastRegion = assertGoodRegions(start, regions, maxRegionSize, lastRegion, probs, seenSites);
        }
    }
    if (waitUntilEnd || forceConversion) {
        final List<AssemblyRegion> regions = profile.popReadyAssemblyRegions(0, 1, maxRegionSize, forceConversion);
        lastRegion = assertGoodRegions(start, regions, maxRegionSize, lastRegion, probs, seenSites);
    }
    for (int i = 0; i < probs.size(); i++) {
        if (forceConversion || (i + maxRegionSize + profile.getMaxProbPropagationDistance() < probs.size()))
            // only require a site to be seen if we are forcing conversion or the site is more than maxRegionSize from the end
            Assert.assertTrue(seenSites.get(i), "Missed site " + i);
    }
    Assert.assertNotNull(profile.toString());
}
Also used : AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 9 with AssemblyRegion

use of org.broadinstitute.hellbender.engine.AssemblyRegion in project gatk-protected by broadinstitute.

the class AssemblyResultSetUnitTest method testTrimTo.

@Test(dataProvider = "trimmingData")
public void testTrimTo(final Map<Haplotype, AssemblyResult> haplotypesAndResultSets, final AssemblyRegion original) {
    final AssemblyResultSet subject = new AssemblyResultSet();
    for (final Map.Entry<Haplotype, AssemblyResult> entry : haplotypesAndResultSets.entrySet()) subject.add(entry.getKey(), entry.getValue());
    subject.setRegionForGenotyping(original);
    final SimpleInterval originalLocation = original.getExtendedSpan();
    final int length = originalLocation.size();
    final SimpleInterval newLocation = new SimpleInterval(originalLocation.getContig(), originalLocation.getStart() + length / 2, originalLocation.getEnd() - length / 2);
    final AssemblyRegion newRegion = original.trim(newLocation);
    final Map<Haplotype, Haplotype> originalHaplotypesByTrimmed = new HashMap<>(haplotypesAndResultSets.size());
    for (final Haplotype h : haplotypesAndResultSets.keySet()) originalHaplotypesByTrimmed.put(h.trim(newRegion.getExtendedSpan()), h);
    final AssemblyResultSet trimmed = subject.trimTo(newRegion);
    Assert.assertFalse(subject.wasTrimmed());
    Assert.assertTrue(trimmed.wasTrimmed());
    for (final Haplotype h : trimmed.getHaplotypeList()) {
        Assert.assertEquals(h.getGenomeLocation(), newLocation);
        Assert.assertEquals(h.getBases().length, newLocation.size());
    }
}
Also used : AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 10 with AssemblyRegion

use of org.broadinstitute.hellbender.engine.AssemblyRegion in project gatk by broadinstitute.

the class AssemblyBasedCallerUtils method assemblyRegionWithWellMappedReads.

// create the assembly using just high quality reads (eg Q20 or higher).  We may want to use lower
// quality reads in the PairHMM downstream, so we can't use a ReadFilter
public static AssemblyRegion assemblyRegionWithWellMappedReads(final AssemblyRegion originalAssemblyRegion, final int minMappingQuality, final SAMFileHeader readsHeader) {
    final AssemblyRegion result = new AssemblyRegion(originalAssemblyRegion.getSpan(), originalAssemblyRegion.getSupportingStates(), originalAssemblyRegion.isActive(), originalAssemblyRegion.getExtension(), readsHeader);
    originalAssemblyRegion.getReads().stream().filter(rec -> rec.getMappingQuality() >= minMappingQuality).forEach(result::add);
    return result;
}
Also used : java.util(java.util) AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion) ReadClipper(org.broadinstitute.hellbender.utils.clipping.ReadClipper) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) FragmentCollection(org.broadinstitute.hellbender.utils.fragments.FragmentCollection) HaplotypeBAMWriter(org.broadinstitute.hellbender.utils.haplotype.HaplotypeBAMWriter) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) ReadUtils(org.broadinstitute.hellbender.utils.read.ReadUtils) AlignmentUtils(org.broadinstitute.hellbender.utils.read.AlignmentUtils) Locatable(htsjdk.samtools.util.Locatable) GATKVariantContextUtils(org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) ReadCoordinateComparator(org.broadinstitute.hellbender.utils.read.ReadCoordinateComparator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) QualityUtils(org.broadinstitute.hellbender.utils.QualityUtils) SampleList(org.broadinstitute.hellbender.utils.genotyper.SampleList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) FragmentUtils(org.broadinstitute.hellbender.utils.fragments.FragmentUtils) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) Logger(org.apache.logging.log4j.Logger) ReadThreadingAssembler(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContext(htsjdk.variant.variantcontext.VariantContext) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion)

Aggregations

AssemblyRegion (org.broadinstitute.hellbender.engine.AssemblyRegion)13 Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)8 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)7 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)4 Test (org.testng.annotations.Test)4 Cigar (htsjdk.samtools.Cigar)2 CigarElement (htsjdk.samtools.CigarElement)2 SAMFileHeader (htsjdk.samtools.SAMFileHeader)2 SAMFileWriter (htsjdk.samtools.SAMFileWriter)2 ReferenceSequenceFile (htsjdk.samtools.reference.ReferenceSequenceFile)2 Locatable (htsjdk.samtools.util.Locatable)2 VariantContext (htsjdk.variant.variantcontext.VariantContext)2 File (java.io.File)2 FileNotFoundException (java.io.FileNotFoundException)2 java.util (java.util)2 Collectors (java.util.stream.Collectors)2 Logger (org.apache.logging.log4j.Logger)2 UserException (org.broadinstitute.hellbender.exceptions.UserException)2 AssemblyResultSet (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet)2 KBestHaplotype (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype)2