use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.
the class VariantWalkerSpark method getVariants.
/**
* Loads variants and the corresponding reads, reference and features into a {@link JavaRDD} for the intervals specified.
* FOr the current implementation the reads context will always be empty.
*
* If no intervals were specified, returns all the variants.
*
* @return all variants as a {@link JavaRDD}, bounded by intervals if specified.
*/
public JavaRDD<VariantWalkerContext> getVariants(JavaSparkContext ctx) {
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
// use unpadded shards (padding is only needed for reference bases)
final List<ShardBoundary> intervalShards = intervals.stream().flatMap(interval -> Shard.divideIntervalIntoShards(interval, variantShardSize, 0, sequenceDictionary).stream()).collect(Collectors.toList());
JavaRDD<VariantContext> variants = variantsSource.getParallelVariantContexts(drivingVariantFile, getIntervals());
VariantFilter variantFilter = makeVariantFilter();
variants = variants.filter(variantFilter::test);
JavaRDD<Shard<VariantContext>> shardedVariants = SparkSharder.shard(ctx, variants, VariantContext.class, sequenceDictionary, intervalShards, variantShardSize, shuffle);
Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
return shardedVariants.flatMap(getVariantsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, variantShardPadding));
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.
the class GenotypingGivenAllelesUtils method composeGivenAllelesVariantContextFromRod.
/**
* Composes the given allele variant-context providing information about the rods and reference location.
* @param tracker the meta data tracker.
* @param loc the query location.
* @param snpsOnly whether we only should consider SNP variation.
* @param logger where to output warnings.
* @param allelesBinding the target variation context binding containing the given alleles.
* @return never {@code null}
*/
public static VariantContext composeGivenAllelesVariantContextFromRod(final FeatureContext tracker, final Locatable loc, final boolean snpsOnly, final Logger logger, final FeatureInput<VariantContext> allelesBinding) {
Utils.nonNull(tracker, "tracker may not be null");
Utils.nonNull(loc, "location may not be null");
Utils.nonNull(allelesBinding, "alleles binding may not be null");
VariantContext vc = null;
// search for usable record
for (final VariantContext rodVc : tracker.getValues(allelesBinding, new SimpleInterval(loc))) {
if (rodVc != null && !rodVc.isFiltered() && (!snpsOnly || rodVc.isSNP())) {
if (vc == null) {
vc = rodVc;
} else {
if (logger != null) {
logger.warn("Multiple valid VCF records detected in the alleles input file at site " + loc + ", only considering the first record");
}
}
}
}
return vc;
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.
the class AssemblyRegionTrimmer method nonVariantTargetRegions.
/**
* Calculates the list of region to trim away.
* @param targetRegion region for which to generate the flanking regions.
* @param variantSpan the span of the core region containing relevant variation and required padding.
* @return never {@code null}; 0, 1 or 2 element list.
*/
private Pair<SimpleInterval, SimpleInterval> nonVariantTargetRegions(final AssemblyRegion targetRegion, final SimpleInterval variantSpan) {
final SimpleInterval targetRegionRange = targetRegion.getSpan();
final int finalStart = variantSpan.getStart();
final int finalStop = variantSpan.getEnd();
final int targetStart = targetRegionRange.getStart();
final int targetStop = targetRegionRange.getEnd();
final boolean preTrimmingRequired = targetStart < finalStart;
final boolean postTrimmingRequired = targetStop > finalStop;
if (preTrimmingRequired) {
final String contig = targetRegionRange.getContig();
return postTrimmingRequired ? Pair.of(new SimpleInterval(contig, targetStart, finalStart - 1), new SimpleInterval(contig, finalStop + 1, targetStop)) : Pair.of(new SimpleInterval(contig, targetStart, finalStart - 1), null);
} else if (postTrimmingRequired) {
return Pair.of(null, new SimpleInterval(targetRegionRange.getContig(), finalStop + 1, targetStop));
} else {
return Pair.of(null, null);
}
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.
the class Haplotype method trim.
/**
* Create a new Haplotype derived from this one that exactly spans the provided location
*
* Note that this haplotype must have a contain a genome loc for this operation to be successful. If no
* GenomeLoc is contained than @throws an IllegalStateException
*
* Also loc must be fully contained within this Haplotype's genomeLoc. If not an IllegalArgumentException is
* thrown.
*
* @param loc a location completely contained within this Haplotype's location
* @return a new Haplotype within only the bases spanning the provided location, or null for some reason the haplotype would be malformed if
*/
public Haplotype trim(final Locatable loc) {
Utils.nonNull(loc, "Loc cannot be null");
Utils.nonNull(genomeLocation, "Cannot trim a Haplotype without containing GenomeLoc");
Utils.validateArg(new SimpleInterval(genomeLocation).contains(loc), () -> "Can only trim a Haplotype to a containing span. My loc is " + genomeLocation + " but wanted trim to " + loc);
Utils.nonNull(getCigar(), "Cannot trim haplotype without a cigar " + this);
final int newStart = loc.getStart() - this.genomeLocation.getStart();
final int newStop = newStart + loc.getEnd() - loc.getStart();
final byte[] newBases = AlignmentUtils.getBasesCoveringRefInterval(newStart, newStop, getBases(), 0, getCigar());
final Cigar newCigar = AlignmentUtils.trimCigarByReference(getCigar(), newStart, newStop);
if (newBases == null || AlignmentUtils.startsOrEndsWithInsertionOrDeletion(newCigar)) // we cannot meaningfully chop down the haplotype, so return null
{
return null;
}
final Haplotype ret = new Haplotype(newBases, isReference());
ret.setCigar(newCigar);
ret.setGenomeLocation(loc);
ret.setScore(score);
ret.setAlignmentStartHapwrtRef(newStart + getAlignmentStartHapwrtRef());
return ret;
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.
the class ShardedIntervalIterator method next.
@Override
public SimpleInterval next() {
if (shardedInterval == null) {
throw new NoSuchElementException();
}
final SimpleInterval result = shardedInterval;
// Advance the shard index and (if necessary) set it back to zero and get the next interval.
advanceShardInInterval();
return result;
}
Aggregations