use of com.google.cloud.dataflow.sdk.options.PipelineOptions in project gatk by broadinstitute.
the class PathSeqKmerSpark method runTool.
/** Get the list of distinct kmers in the reference, and write them to a file as a HopScotchSet. */
@Override
protected void runTool(final JavaSparkContext ctx) {
final SAMFileHeader hdr = getHeaderForReads();
SAMSequenceDictionary dict = null;
if (hdr != null)
dict = hdr.getSequenceDictionary();
final PipelineOptions options = getAuthenticatedGCSOptions();
final ReferenceMultiSource referenceMultiSource = getReference();
final List<SVKmer> kmerList = findKmers(ctx, KMER_SIZE, referenceMultiSource, options, dict);
final HopscotchSet<SVKmer> kmerSet = new HopscotchSet<>(kmerList);
final Output output = new Output(BucketUtils.createFile(OUTPUT_FILE));
final Kryo kryo = new Kryo();
kryo.setReferences(false);
kryo.writeClassAndObject(output, kmerSet);
output.close();
}
use of com.google.cloud.dataflow.sdk.options.PipelineOptions in project gatk by broadinstitute.
the class ReferenceAPISource method fromReferenceSetAssemblyID.
/**
* Creates this ReferenceAPISource from an assembly ID by querying in the cloud APIs.
*/
public static ReferenceAPISource fromReferenceSetAssemblyID(final PipelineOptions pipelineOptions, final String referenceSetAssemblyID) {
Utils.nonNull(pipelineOptions);
Utils.nonNull(referenceSetAssemblyID);
final SearchReferenceSetsRequest content = new SearchReferenceSetsRequest();
content.setAssemblyId(referenceSetAssemblyID);
try {
final Genomics genomicsService = createGenomicsService(pipelineOptions);
final SearchReferenceSetsResponse found = genomicsService.referencesets().search(content).execute();
final Set<String> referenceSetIds = found.getReferenceSets().stream().map(rs -> rs.getId()).collect(Collectors.toSet());
if (referenceSetIds.isEmpty()) {
throw new UserException.UnknownReferenceSet(referenceSetAssemblyID);
}
if (referenceSetIds.size() > 1) {
throw new UserException.MultipleReferenceSets(referenceSetAssemblyID, referenceSetIds);
}
final Map<String, Reference> ret = new LinkedHashMap<>();
for (final String rId : referenceSetIds) {
final SearchReferencesRequest query = new SearchReferencesRequest().setReferenceSetId(rId);
ret.putAll(genomicsService.references().search(query).execute().getReferences().stream().collect(Collectors.toMap(r -> r.getName(), r -> r)));
}
return new ReferenceAPISource(pipelineOptions, ret);
} catch (final IOException e) {
throw new UserException("Error while looking up reference set " + referenceSetAssemblyID, e);
}
}
Aggregations