Search in sources :

Example 41 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class BroadcastJoinReadsWithVariants method join.

public static JavaPairRDD<GATKRead, Iterable<GATKVariant>> join(final JavaRDD<GATKRead> reads, final JavaRDD<GATKVariant> variants) {
    final JavaSparkContext ctx = new JavaSparkContext(reads.context());
    final IntervalsSkipList<GATKVariant> variantSkipList = new IntervalsSkipList<>(variants.collect());
    final Broadcast<IntervalsSkipList<GATKVariant>> variantsBroadcast = ctx.broadcast(variantSkipList);
    return reads.mapToPair(r -> {
        final IntervalsSkipList<GATKVariant> intervalsSkipList = variantsBroadcast.getValue();
        if (SimpleInterval.isValid(r.getContig(), r.getStart(), r.getEnd())) {
            return new Tuple2<>(r, intervalsSkipList.getOverlapping(new SimpleInterval(r)));
        } else {
            return new Tuple2<>(r, Collections.emptyList());
        }
    });
}
Also used : GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) IntervalsSkipList(org.broadinstitute.hellbender.utils.collections.IntervalsSkipList) Tuple2(scala.Tuple2) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 42 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class IntervalWalkerSpark method getIntervalsFunction.

private static org.apache.spark.api.java.function.Function<Shard<GATKRead>, IntervalWalkerContext> getIntervalsFunction(Broadcast<ReferenceMultiSource> bReferenceSource, Broadcast<FeatureManager> bFeatureManager, SAMSequenceDictionary sequenceDictionary, int intervalShardPadding) {
    return (org.apache.spark.api.java.function.Function<Shard<GATKRead>, IntervalWalkerContext>) shard -> {
        SimpleInterval interval = shard.getInterval();
        SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(intervalShardPadding, sequenceDictionary);
        ReadsContext readsContext = new ReadsContext(new GATKDataSource<GATKRead>() {

            @Override
            public Iterator<GATKRead> iterator() {
                return shard.iterator();
            }

            @Override
            public Iterator<GATKRead> query(SimpleInterval interval) {
                return StreamSupport.stream(shard.spliterator(), false).filter(r -> IntervalUtils.overlaps(r, interval)).iterator();
            }
        }, shard.getInterval());
        ReferenceDataSource reference = bReferenceSource == null ? null : new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(null, paddedInterval), sequenceDictionary);
        FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
        return new IntervalWalkerContext(interval, readsContext, new ReferenceContext(reference, interval), new FeatureContext(features, interval));
    };
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Iterator(java.util.Iterator) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 43 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class LocusWalkerSpark method getAlignments.

/**
     * Loads alignments and the corresponding reference and features into a {@link JavaRDD} for the intervals specified.
     *
     * If no intervals were specified, returns all the alignments.
     *
     * @return all alignments as a {@link JavaRDD}, bounded by intervals if specified.
     */
public JavaRDD<LocusWalkerContext> getAlignments(JavaSparkContext ctx) {
    SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    final List<ShardBoundary> intervalShards = intervals.stream().flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, readShardPadding, sequenceDictionary).stream()).collect(Collectors.toList());
    int maxLocatableSize = Math.min(readShardSize, readShardPadding);
    JavaRDD<Shard<GATKRead>> shardedReads = SparkSharder.shard(ctx, getReads(), GATKRead.class, sequenceDictionary, intervalShards, maxLocatableSize, shuffle);
    Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
    Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
    return shardedReads.flatMap(getAlignmentsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, getHeaderForReads(), getDownsamplingInfo()));
}
Also used : Broadcast(org.apache.spark.broadcast.Broadcast) java.util(java.util) IntervalOverlappingIterator(org.broadinstitute.hellbender.utils.iterators.IntervalOverlappingIterator) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) org.broadinstitute.hellbender.engine(org.broadinstitute.hellbender.engine) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) IntervalUtils(org.broadinstitute.hellbender.utils.IntervalUtils) ImmutableList(com.google.common.collect.ImmutableList) StreamSupport(java.util.stream.StreamSupport) LIBSDownsamplingInfo(org.broadinstitute.hellbender.utils.locusiterator.LIBSDownsamplingInfo) JavaRDD(org.apache.spark.api.java.JavaRDD) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 44 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class SparkSharder method addPartitionReadExtent.

private static void addPartitionReadExtent(List<PartitionLocatable<SimpleInterval>> extents, int partitionIndex, String contig, int start, int end) {
    SimpleInterval extent = new SimpleInterval(contig, start, end);
    extents.add(new PartitionLocatable<>(partitionIndex, extent));
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 45 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class VariantWalkerSpark method getVariantsFunction.

private static FlatMapFunction<Shard<VariantContext>, VariantWalkerContext> getVariantsFunction(final Broadcast<ReferenceMultiSource> bReferenceSource, final Broadcast<FeatureManager> bFeatureManager, final SAMSequenceDictionary sequenceDictionary, final int variantShardPadding) {
    return (FlatMapFunction<Shard<VariantContext>, VariantWalkerContext>) shard -> {
        SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(variantShardPadding, sequenceDictionary);
        ReferenceDataSource reference = bReferenceSource == null ? null : new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(null, paddedInterval), sequenceDictionary);
        FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
        return StreamSupport.stream(shard.spliterator(), false).filter(v -> v.getStart() >= shard.getStart() && v.getStart() <= shard.getEnd()).map(v -> {
            final SimpleInterval variantInterval = new SimpleInterval(v);
            return new VariantWalkerContext(v, new ReadsContext(), new ReferenceContext(reference, variantInterval), new FeatureContext(features, variantInterval));
        }).iterator();
    };
}
Also used : Broadcast(org.apache.spark.broadcast.Broadcast) VCFHeader(htsjdk.variant.vcf.VCFHeader) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) IndexUtils(org.broadinstitute.hellbender.utils.IndexUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) VariantFilterLibrary(org.broadinstitute.hellbender.engine.filters.VariantFilterLibrary) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) VariantFilter(org.broadinstitute.hellbender.engine.filters.VariantFilter) org.broadinstitute.hellbender.engine(org.broadinstitute.hellbender.engine) List(java.util.List) IntervalUtils(org.broadinstitute.hellbender.utils.IntervalUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantsSparkSource(org.broadinstitute.hellbender.engine.spark.datasources.VariantsSparkSource) StreamSupport(java.util.stream.StreamSupport) JavaRDD(org.apache.spark.api.java.JavaRDD) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) VariantContext(htsjdk.variant.variantcontext.VariantContext) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Aggregations

SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)545 Test (org.testng.annotations.Test)287 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)202 File (java.io.File)102 ArrayList (java.util.ArrayList)66 DataProvider (org.testng.annotations.DataProvider)64 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)60 Collectors (java.util.stream.Collectors)53 java.util (java.util)41 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)40 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)40 UserException (org.broadinstitute.hellbender.exceptions.UserException)39 VariantContext (htsjdk.variant.variantcontext.VariantContext)36 IntStream (java.util.stream.IntStream)34 Target (org.broadinstitute.hellbender.tools.exome.Target)34 IOException (java.io.IOException)32 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)28 Assert (org.testng.Assert)27 Locatable (htsjdk.samtools.util.Locatable)26 List (java.util.List)26