use of com.google.cloud.genomics.dataflow.utils.GCSOptions in project gatk by broadinstitute.
the class ReferenceAPISource method getReferenceBases.
/**
* Query the Google Genomics API for reference bases spanning the specified interval from the specified
* reference name.
*
* @param pipelineOptions -- are used to get the credentials necessary to call the Genomics API
* @param interval - the range of bases to retrieve.
* @return the reference bases specified by interval and apiData (using the Google Genomics API).
*/
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval, int pageSize) {
Utils.nonNull(interval);
if (genomicsService == null) {
if (pipelineOptions == null) {
// Fall back on the saved apiKey for Spark.
GCSOptions options = PipelineOptionsFactory.as(GCSOptions.class);
options.setApiKey(apiKey);
genomicsService = createGenomicsService(options);
} else {
genomicsService = createGenomicsService(pipelineOptions);
}
}
if (!referenceNameToIdTable.containsKey(interval.getContig())) {
throw new UserException("Contig " + interval.getContig() + " not in our set of reference names for this reference source");
}
try {
final Genomics.References.Bases.List listRequest = genomicsService.references().bases().list(referenceNameToIdTable.get(interval.getContig())).setPageSize(pageSize);
listRequest.setStart(interval.getGA4GHStart());
listRequest.setEnd(interval.getGA4GHEnd());
ListBasesResponse result = listRequest.execute();
if (result.getSequence() == null) {
throw new UserException("No reference bases returned in query for interval " + interval + ". Is the interval valid for this reference?");
}
byte[] received = result.getSequence().getBytes();
byte[] bases = received;
if (received.length < interval.size()) {
final List<byte[]> blobs = new ArrayList<>();
blobs.add(received);
while (result.getNextPageToken() != null && !result.getNextPageToken().isEmpty()) {
listRequest.setPageToken(result.getNextPageToken());
result = listRequest.execute();
blobs.add(result.getSequence().getBytes());
}
final byte[][] resultsArray = blobs.toArray(new byte[blobs.size()][]);
bases = Bytes.concat(resultsArray);
}
if (bases.length != interval.size()) {
throw new UserException.ReferenceAPIReturnedUnexpectedNumberOfBytes(interval, bases);
}
return new ReferenceBases(bases, interval);
} catch (IOException e) {
throw new UserException("Query to genomics service failed for reference interval " + interval, e);
}
}
use of com.google.cloud.genomics.dataflow.utils.GCSOptions in project gatk by broadinstitute.
the class SparkCommandLineProgram method getAuthenticatedGCSOptions.
/**
* @return a GCSOptions object authenticated with apiKey suitable for accessing files in GCS,
* or null if no apiKey is present.
*/
protected GCSOptions getAuthenticatedGCSOptions() {
if (apiKey == null) {
return null;
}
final GCSOptions options = PipelineOptionsFactory.as(GCSOptions.class);
options.setApiKey(apiKey);
return options;
}
use of com.google.cloud.genomics.dataflow.utils.GCSOptions in project gatk by broadinstitute.
the class BaseTest method getAuthenticatedPipelineOptions.
/**
* Gets a PipelineOptions object containing our API key as specified in the HELLBENDER_TEST_APIKEY
* environment variable. Useful for tests that need to access data in a GCS bucket via the
* methods in the {@link org.broadinstitute.hellbender.utils.gcs.BucketUtils} class,
* but don't need to run an actual dataflow pipeline.
*
* @return a PipelineOptions object containing our API key
*/
public static PipelineOptions getAuthenticatedPipelineOptions() {
final GCSOptions popts = PipelineOptionsFactory.as(GCSOptions.class);
popts.setApiKey(getGCPTestApiKey());
return popts;
}
Aggregations