use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.
the class BroadcastJoinReadsWithVariants method join.
public static JavaPairRDD<GATKRead, Iterable<GATKVariant>> join(final JavaRDD<GATKRead> reads, final JavaRDD<GATKVariant> variants) {
final JavaSparkContext ctx = new JavaSparkContext(reads.context());
final IntervalsSkipList<GATKVariant> variantSkipList = new IntervalsSkipList<>(variants.collect());
final Broadcast<IntervalsSkipList<GATKVariant>> variantsBroadcast = ctx.broadcast(variantSkipList);
return reads.mapToPair(r -> {
final IntervalsSkipList<GATKVariant> intervalsSkipList = variantsBroadcast.getValue();
if (SimpleInterval.isValid(r.getContig(), r.getStart(), r.getEnd())) {
return new Tuple2<>(r, intervalsSkipList.getOverlapping(new SimpleInterval(r)));
} else {
return new Tuple2<>(r, Collections.emptyList());
}
});
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.
the class IntervalWalkerSpark method getIntervalsFunction.
private static org.apache.spark.api.java.function.Function<Shard<GATKRead>, IntervalWalkerContext> getIntervalsFunction(Broadcast<ReferenceMultiSource> bReferenceSource, Broadcast<FeatureManager> bFeatureManager, SAMSequenceDictionary sequenceDictionary, int intervalShardPadding) {
return (org.apache.spark.api.java.function.Function<Shard<GATKRead>, IntervalWalkerContext>) shard -> {
SimpleInterval interval = shard.getInterval();
SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(intervalShardPadding, sequenceDictionary);
ReadsContext readsContext = new ReadsContext(new GATKDataSource<GATKRead>() {
@Override
public Iterator<GATKRead> iterator() {
return shard.iterator();
}
@Override
public Iterator<GATKRead> query(SimpleInterval interval) {
return StreamSupport.stream(shard.spliterator(), false).filter(r -> IntervalUtils.overlaps(r, interval)).iterator();
}
}, shard.getInterval());
ReferenceDataSource reference = bReferenceSource == null ? null : new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(null, paddedInterval), sequenceDictionary);
FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
return new IntervalWalkerContext(interval, readsContext, new ReferenceContext(reference, interval), new FeatureContext(features, interval));
};
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.
the class LocusWalkerSpark method getAlignments.
/**
* Loads alignments and the corresponding reference and features into a {@link JavaRDD} for the intervals specified.
*
* If no intervals were specified, returns all the alignments.
*
* @return all alignments as a {@link JavaRDD}, bounded by intervals if specified.
*/
public JavaRDD<LocusWalkerContext> getAlignments(JavaSparkContext ctx) {
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final List<ShardBoundary> intervalShards = intervals.stream().flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, readShardPadding, sequenceDictionary).stream()).collect(Collectors.toList());
int maxLocatableSize = Math.min(readShardSize, readShardPadding);
JavaRDD<Shard<GATKRead>> shardedReads = SparkSharder.shard(ctx, getReads(), GATKRead.class, sequenceDictionary, intervalShards, maxLocatableSize, shuffle);
Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
return shardedReads.flatMap(getAlignmentsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, getHeaderForReads(), getDownsamplingInfo()));
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.
the class SparkSharder method addPartitionReadExtent.
private static void addPartitionReadExtent(List<PartitionLocatable<SimpleInterval>> extents, int partitionIndex, String contig, int start, int end) {
SimpleInterval extent = new SimpleInterval(contig, start, end);
extents.add(new PartitionLocatable<>(partitionIndex, extent));
}
use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.
the class VariantWalkerSpark method getVariantsFunction.
private static FlatMapFunction<Shard<VariantContext>, VariantWalkerContext> getVariantsFunction(final Broadcast<ReferenceMultiSource> bReferenceSource, final Broadcast<FeatureManager> bFeatureManager, final SAMSequenceDictionary sequenceDictionary, final int variantShardPadding) {
return (FlatMapFunction<Shard<VariantContext>, VariantWalkerContext>) shard -> {
SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(variantShardPadding, sequenceDictionary);
ReferenceDataSource reference = bReferenceSource == null ? null : new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(null, paddedInterval), sequenceDictionary);
FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
return StreamSupport.stream(shard.spliterator(), false).filter(v -> v.getStart() >= shard.getStart() && v.getStart() <= shard.getEnd()).map(v -> {
final SimpleInterval variantInterval = new SimpleInterval(v);
return new VariantWalkerContext(v, new ReadsContext(), new ReferenceContext(reference, variantInterval), new FeatureContext(features, variantInterval));
}).iterator();
};
}
Aggregations