use of org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource in project gatk by broadinstitute.
the class PathSeqFilterSpark method doHostBWA.
private JavaRDD<GATKRead> doHostBWA(final JavaSparkContext ctx, final SAMFileHeader readsHeader, final JavaRDD<GATKRead> reads) {
final BwaSparkEngine engine = new BwaSparkEngine(ctx, indexImageFile, getHeaderForReads(), getReferenceSequenceDictionary());
// null if we have no api key
final GCSOptions gcsOptions = getAuthenticatedGCSOptions();
final ReferenceMultiSource hostReference = new ReferenceMultiSource(gcsOptions, HOST_REF_PATH, getReferenceWindowFunction());
final SAMSequenceDictionary hostRefDict = hostReference.getReferenceSequenceDictionary(header.getSequenceDictionary());
readsHeader.setSequenceDictionary(hostRefDict);
return engine.align(reads);
}
use of org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource in project gatk-protected by broadinstitute.
the class HaplotypeCallerSparkIntegrationTest method testReferenceAdapterIsSerializable.
@Test
public void testReferenceAdapterIsSerializable() throws IOException {
final AuthHolder auth = new AuthHolder("name", "somestring");
final ReferenceMultiSource referenceMultiSource = new ReferenceMultiSource(auth, b37_2bit_reference_20_21, ReferenceWindowFunctions.IDENTITY_FUNCTION);
SparkTestUtils.roundTripInKryo(referenceMultiSource, ReferenceMultiSource.class, SparkContextFactory.getTestSparkContext().getConf());
final HaplotypeCallerSpark.ReferenceMultiSourceAdapter adapter = new HaplotypeCallerSpark.ReferenceMultiSourceAdapter(referenceMultiSource, auth);
SparkTestUtils.roundTripInKryo(adapter, HaplotypeCallerSpark.ReferenceMultiSourceAdapter.class, SparkContextFactory.getTestSparkContext().getConf());
}
use of org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource in project gatk by broadinstitute.
the class GATKSparkTool method initializeReference.
/**
* Initializes our reference source. Does nothing if no reference was specified.
*/
private void initializeReference() {
// null if we have no api key
final GCSOptions gcsOptions = getAuthenticatedGCSOptions();
final String referenceURL = referenceArguments.getReferenceFileName();
if (referenceURL != null) {
referenceSource = new ReferenceMultiSource(gcsOptions, referenceURL, getReferenceWindowFunction());
referenceDictionary = referenceSource.getReferenceSequenceDictionary(readsHeader != null ? readsHeader.getSequenceDictionary() : null);
if (referenceDictionary == null) {
throw new UserException.MissingReferenceDictFile(referenceURL);
}
}
}
use of org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource in project gatk by broadinstitute.
the class LocusWalkerSpark method getAlignments.
/**
* Loads alignments and the corresponding reference and features into a {@link JavaRDD} for the intervals specified.
*
* If no intervals were specified, returns all the alignments.
*
* @return all alignments as a {@link JavaRDD}, bounded by intervals if specified.
*/
public JavaRDD<LocusWalkerContext> getAlignments(JavaSparkContext ctx) {
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final List<ShardBoundary> intervalShards = intervals.stream().flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, readShardPadding, sequenceDictionary).stream()).collect(Collectors.toList());
int maxLocatableSize = Math.min(readShardSize, readShardPadding);
JavaRDD<Shard<GATKRead>> shardedReads = SparkSharder.shard(ctx, getReads(), GATKRead.class, sequenceDictionary, intervalShards, maxLocatableSize, shuffle);
Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
return shardedReads.flatMap(getAlignmentsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, getHeaderForReads(), getDownsamplingInfo()));
}
use of org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource in project gatk by broadinstitute.
the class VariantWalkerSpark method getVariantsFunction.
private static FlatMapFunction<Shard<VariantContext>, VariantWalkerContext> getVariantsFunction(final Broadcast<ReferenceMultiSource> bReferenceSource, final Broadcast<FeatureManager> bFeatureManager, final SAMSequenceDictionary sequenceDictionary, final int variantShardPadding) {
return (FlatMapFunction<Shard<VariantContext>, VariantWalkerContext>) shard -> {
SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(variantShardPadding, sequenceDictionary);
ReferenceDataSource reference = bReferenceSource == null ? null : new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(null, paddedInterval), sequenceDictionary);
FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
return StreamSupport.stream(shard.spliterator(), false).filter(v -> v.getStart() >= shard.getStart() && v.getStart() <= shard.getEnd()).map(v -> {
final SimpleInterval variantInterval = new SimpleInterval(v);
return new VariantWalkerContext(v, new ReadsContext(), new ReferenceContext(reference, variantInterval), new FeatureContext(features, variantInterval));
}).iterator();
};
}
Aggregations