use of com.google.api.services.genomics.Genomics in project gatk by broadinstitute.
the class ReferenceAPISource method getReferenceBases.
/**
* Query the Google Genomics API for reference bases spanning the specified interval from the specified
* reference name.
*
* @param pipelineOptions -- are used to get the credentials necessary to call the Genomics API
* @param interval - the range of bases to retrieve.
* @return the reference bases specified by interval and apiData (using the Google Genomics API).
*/
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval, int pageSize) {
Utils.nonNull(interval);
if (genomicsService == null) {
if (pipelineOptions == null) {
// Fall back on the saved apiKey for Spark.
GCSOptions options = PipelineOptionsFactory.as(GCSOptions.class);
options.setApiKey(apiKey);
genomicsService = createGenomicsService(options);
} else {
genomicsService = createGenomicsService(pipelineOptions);
}
}
if (!referenceNameToIdTable.containsKey(interval.getContig())) {
throw new UserException("Contig " + interval.getContig() + " not in our set of reference names for this reference source");
}
try {
final Genomics.References.Bases.List listRequest = genomicsService.references().bases().list(referenceNameToIdTable.get(interval.getContig())).setPageSize(pageSize);
listRequest.setStart(interval.getGA4GHStart());
listRequest.setEnd(interval.getGA4GHEnd());
ListBasesResponse result = listRequest.execute();
if (result.getSequence() == null) {
throw new UserException("No reference bases returned in query for interval " + interval + ". Is the interval valid for this reference?");
}
byte[] received = result.getSequence().getBytes();
byte[] bases = received;
if (received.length < interval.size()) {
final List<byte[]> blobs = new ArrayList<>();
blobs.add(received);
while (result.getNextPageToken() != null && !result.getNextPageToken().isEmpty()) {
listRequest.setPageToken(result.getNextPageToken());
result = listRequest.execute();
blobs.add(result.getSequence().getBytes());
}
final byte[][] resultsArray = blobs.toArray(new byte[blobs.size()][]);
bases = Bytes.concat(resultsArray);
}
if (bases.length != interval.size()) {
throw new UserException.ReferenceAPIReturnedUnexpectedNumberOfBytes(interval, bases);
}
return new ReferenceBases(bases, interval);
} catch (IOException e) {
throw new UserException("Query to genomics service failed for reference interval " + interval, e);
}
}
use of com.google.api.services.genomics.Genomics in project gatk by broadinstitute.
the class ReferenceAPISource method fromReferenceSetAssemblyID.
/**
* Creates this ReferenceAPISource from an assembly ID by querying in the cloud APIs.
*/
public static ReferenceAPISource fromReferenceSetAssemblyID(final PipelineOptions pipelineOptions, final String referenceSetAssemblyID) {
Utils.nonNull(pipelineOptions);
Utils.nonNull(referenceSetAssemblyID);
final SearchReferenceSetsRequest content = new SearchReferenceSetsRequest();
content.setAssemblyId(referenceSetAssemblyID);
try {
final Genomics genomicsService = createGenomicsService(pipelineOptions);
final SearchReferenceSetsResponse found = genomicsService.referencesets().search(content).execute();
final Set<String> referenceSetIds = found.getReferenceSets().stream().map(rs -> rs.getId()).collect(Collectors.toSet());
if (referenceSetIds.isEmpty()) {
throw new UserException.UnknownReferenceSet(referenceSetAssemblyID);
}
if (referenceSetIds.size() > 1) {
throw new UserException.MultipleReferenceSets(referenceSetAssemblyID, referenceSetIds);
}
final Map<String, Reference> ret = new LinkedHashMap<>();
for (final String rId : referenceSetIds) {
final SearchReferencesRequest query = new SearchReferencesRequest().setReferenceSetId(rId);
ret.putAll(genomicsService.references().search(query).execute().getReferences().stream().collect(Collectors.toMap(r -> r.getName(), r -> r)));
}
return new ReferenceAPISource(pipelineOptions, ret);
} catch (final IOException e) {
throw new UserException("Error while looking up reference set " + referenceSetAssemblyID, e);
}
}
Aggregations