use of org.broadinstitute.hellbender.engine.AssemblyRegion in project gatk by broadinstitute.
the class ActivityProfile method popReadyAssemblyRegions.
// --------------------------------------------------------------------------------
//
// routines to get active regions from the profile
//
// --------------------------------------------------------------------------------
/**
* Get the next completed assembly regions from this profile, and remove all states supporting them from this profile
*
* Takes the current profile and finds all of the active / inactive from the start of the profile that are
* ready. By ready we mean unable to have their probability modified any longer by future additions to the
* profile. The regions that are popped off the profile take their states with them, so the start of this
* profile will always be after the end of the last region returned here.
*
* The regions are returned sorted by genomic position.
*
* This function may not return anything in the list, if no regions are ready
*
* No returned region will be larger than maxRegionSize.
*
* @param assemblyRegionExtension the extension value to provide to the constructed regions
* @param minRegionSize the minimum region size, in the case where we have to cut up regions that are too large
* @param maxRegionSize the maximize size of the returned region
* @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the
* stateList. Used to close out the active region when we've hit some kind of end (such
* as the end of the contig)
* @return a non-null list of active regions
*/
public List<AssemblyRegion> popReadyAssemblyRegions(final int assemblyRegionExtension, final int minRegionSize, final int maxRegionSize, final boolean forceConversion) {
Utils.validateArg(assemblyRegionExtension >= 0, () -> "assemblyRegionExtension must be >= 0 but got " + assemblyRegionExtension);
Utils.validateArg(minRegionSize > 0, () -> "minRegionSize must be >= 1 but got " + minRegionSize);
Utils.validateArg(maxRegionSize > 0, () -> "maxRegionSize must be >= 1 but got " + maxRegionSize);
final List<AssemblyRegion> regions = new ArrayList<>();
while (true) {
final AssemblyRegion nextRegion = popNextReadyAssemblyRegion(assemblyRegionExtension, minRegionSize, maxRegionSize, forceConversion);
if (nextRegion == null) {
return regions;
} else {
if (restrictToIntervals == null) {
regions.add(nextRegion);
} else {
regions.addAll(nextRegion.splitAndTrimToIntervals(restrictToIntervals));
}
}
}
}
use of org.broadinstitute.hellbender.engine.AssemblyRegion in project gatk by broadinstitute.
the class ActivityProfile method popNextReadyAssemblyRegion.
/**
* Helper function for popReadyActiveRegions that pops the first ready region off the front of this profile
*
* If a region is returned, modifies the state of this profile so that states used to make the region are
* no longer part of the profile. Associated information (like the region start position) of this profile
* are also updated.
*
* @param assemblyRegionExtension the extension value to provide to the constructed regions
* @param minRegionSize the minimum region size, in the case where we have to cut up regions that are too large
* @param maxRegionSize the maximize size of the returned region
* @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the
* stateList. Used to close out the active region when we've hit some kind of end (such
* as the end of the contig)
* @return a fully formed assembly region, or null if none can be made
*/
private AssemblyRegion popNextReadyAssemblyRegion(final int assemblyRegionExtension, final int minRegionSize, final int maxRegionSize, final boolean forceConversion) {
if (stateList.isEmpty()) {
return null;
}
// If we are flushing the activity profile we need to trim off the excess states so that we don't create regions outside of our current processing interval
if (forceConversion) {
final List<ActivityProfileState> statesToTrimAway = new ArrayList<>(stateList.subList(getSpan().size(), stateList.size()));
stateList.removeAll(statesToTrimAway);
}
final ActivityProfileState first = stateList.get(0);
final boolean isActiveRegion = first.isActiveProb() > activeProbThreshold;
final int offsetOfNextRegionEnd = findEndOfRegion(isActiveRegion, minRegionSize, maxRegionSize, forceConversion);
if (offsetOfNextRegionEnd == -1) {
// couldn't find a valid ending offset, so we return null
return null;
}
// we need to create the active region, and clip out the states we're extracting from this profile
final List<ActivityProfileState> sub = stateList.subList(0, offsetOfNextRegionEnd + 1);
final List<ActivityProfileState> supportingStates = new ArrayList<>(sub);
sub.clear();
// update the start and stop locations as necessary
if (stateList.isEmpty()) {
regionStartLoc = regionStopLoc = null;
} else {
regionStartLoc = stateList.get(0).getLoc();
}
final SimpleInterval regionLoc = new SimpleInterval(first.getLoc().getContig(), first.getLoc().getStart(), first.getLoc().getStart() + offsetOfNextRegionEnd);
return new AssemblyRegion(regionLoc, supportingStates, isActiveRegion, assemblyRegionExtension, samHeader);
}
use of org.broadinstitute.hellbender.engine.AssemblyRegion in project gatk by broadinstitute.
the class ActivityProfileUnitTest method testRegionCreation.
@Test(dataProvider = "RegionCreationTests")
public void testRegionCreation(final int start, final List<Boolean> probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) {
final ActivityProfile profile = new ActivityProfile(MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD, header);
Assert.assertNotNull(profile.toString());
final String contig = genomeLocParser.getSequenceDictionary().getSequences().get(0).getSequenceName();
final List<Boolean> seenSites = new ArrayList<Boolean>(Collections.nCopies(probs.size(), false));
AssemblyRegion lastRegion = null;
for (int i = 0; i < probs.size(); i++) {
final boolean isActive = probs.get(i);
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, i + start);
final ActivityProfileState state = new ActivityProfileState(new SimpleInterval(loc), isActive ? 1.0 : 0.0);
profile.add(state);
Assert.assertNotNull(profile.toString());
if (!waitUntilEnd) {
final List<AssemblyRegion> regions = profile.popReadyAssemblyRegions(0, 1, maxRegionSize, false);
lastRegion = assertGoodRegions(start, regions, maxRegionSize, lastRegion, probs, seenSites);
}
}
if (waitUntilEnd || forceConversion) {
final List<AssemblyRegion> regions = profile.popReadyAssemblyRegions(0, 1, maxRegionSize, forceConversion);
lastRegion = assertGoodRegions(start, regions, maxRegionSize, lastRegion, probs, seenSites);
}
for (int i = 0; i < probs.size(); i++) {
if (forceConversion || (i + maxRegionSize + profile.getMaxProbPropagationDistance() < probs.size()))
// only require a site to be seen if we are forcing conversion or the site is more than maxRegionSize from the end
Assert.assertTrue(seenSites.get(i), "Missed site " + i);
}
Assert.assertNotNull(profile.toString());
}
use of org.broadinstitute.hellbender.engine.AssemblyRegion in project gatk-protected by broadinstitute.
the class AssemblyResultSetUnitTest method testTrimTo.
@Test(dataProvider = "trimmingData")
public void testTrimTo(final Map<Haplotype, AssemblyResult> haplotypesAndResultSets, final AssemblyRegion original) {
final AssemblyResultSet subject = new AssemblyResultSet();
for (final Map.Entry<Haplotype, AssemblyResult> entry : haplotypesAndResultSets.entrySet()) subject.add(entry.getKey(), entry.getValue());
subject.setRegionForGenotyping(original);
final SimpleInterval originalLocation = original.getExtendedSpan();
final int length = originalLocation.size();
final SimpleInterval newLocation = new SimpleInterval(originalLocation.getContig(), originalLocation.getStart() + length / 2, originalLocation.getEnd() - length / 2);
final AssemblyRegion newRegion = original.trim(newLocation);
final Map<Haplotype, Haplotype> originalHaplotypesByTrimmed = new HashMap<>(haplotypesAndResultSets.size());
for (final Haplotype h : haplotypesAndResultSets.keySet()) originalHaplotypesByTrimmed.put(h.trim(newRegion.getExtendedSpan()), h);
final AssemblyResultSet trimmed = subject.trimTo(newRegion);
Assert.assertFalse(subject.wasTrimmed());
Assert.assertTrue(trimmed.wasTrimmed());
for (final Haplotype h : trimmed.getHaplotypeList()) {
Assert.assertEquals(h.getGenomeLocation(), newLocation);
Assert.assertEquals(h.getBases().length, newLocation.size());
}
}
use of org.broadinstitute.hellbender.engine.AssemblyRegion in project gatk by broadinstitute.
the class AssemblyBasedCallerUtils method assemblyRegionWithWellMappedReads.
// create the assembly using just high quality reads (eg Q20 or higher). We may want to use lower
// quality reads in the PairHMM downstream, so we can't use a ReadFilter
public static AssemblyRegion assemblyRegionWithWellMappedReads(final AssemblyRegion originalAssemblyRegion, final int minMappingQuality, final SAMFileHeader readsHeader) {
final AssemblyRegion result = new AssemblyRegion(originalAssemblyRegion.getSpan(), originalAssemblyRegion.getSupportingStates(), originalAssemblyRegion.isActive(), originalAssemblyRegion.getExtension(), readsHeader);
originalAssemblyRegion.getReads().stream().filter(rec -> rec.getMappingQuality() >= minMappingQuality).forEach(result::add);
return result;
}
Aggregations