Search in sources :

Example 1 with Genomics

use of com.google.api.services.genomics.Genomics in project gatk by broadinstitute.

the class ReferenceAPISource method getReferenceBases.

/**
       * Query the Google Genomics API for reference bases spanning the specified interval from the specified
       * reference name.
       *
       * @param pipelineOptions -- are used to get the credentials necessary to call the Genomics API
       * @param interval - the range of bases to retrieve.
       * @return the reference bases specified by interval and apiData (using the Google Genomics API).
       */
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval, int pageSize) {
    Utils.nonNull(interval);
    if (genomicsService == null) {
        if (pipelineOptions == null) {
            // Fall back on the saved apiKey for Spark.
            GCSOptions options = PipelineOptionsFactory.as(GCSOptions.class);
            options.setApiKey(apiKey);
            genomicsService = createGenomicsService(options);
        } else {
            genomicsService = createGenomicsService(pipelineOptions);
        }
    }
    if (!referenceNameToIdTable.containsKey(interval.getContig())) {
        throw new UserException("Contig " + interval.getContig() + " not in our set of reference names for this reference source");
    }
    try {
        final Genomics.References.Bases.List listRequest = genomicsService.references().bases().list(referenceNameToIdTable.get(interval.getContig())).setPageSize(pageSize);
        listRequest.setStart(interval.getGA4GHStart());
        listRequest.setEnd(interval.getGA4GHEnd());
        ListBasesResponse result = listRequest.execute();
        if (result.getSequence() == null) {
            throw new UserException("No reference bases returned in query for interval " + interval + ". Is the interval valid for this reference?");
        }
        byte[] received = result.getSequence().getBytes();
        byte[] bases = received;
        if (received.length < interval.size()) {
            final List<byte[]> blobs = new ArrayList<>();
            blobs.add(received);
            while (result.getNextPageToken() != null && !result.getNextPageToken().isEmpty()) {
                listRequest.setPageToken(result.getNextPageToken());
                result = listRequest.execute();
                blobs.add(result.getSequence().getBytes());
            }
            final byte[][] resultsArray = blobs.toArray(new byte[blobs.size()][]);
            bases = Bytes.concat(resultsArray);
        }
        if (bases.length != interval.size()) {
            throw new UserException.ReferenceAPIReturnedUnexpectedNumberOfBytes(interval, bases);
        }
        return new ReferenceBases(bases, interval);
    } catch (IOException e) {
        throw new UserException("Query to genomics service failed for reference interval " + interval, e);
    }
}
Also used : Genomics(com.google.api.services.genomics.Genomics) IOException(java.io.IOException) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) GATKGCSOptions(org.broadinstitute.hellbender.utils.gcs.GATKGCSOptions) GCSOptions(com.google.cloud.genomics.dataflow.utils.GCSOptions) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 2 with Genomics

use of com.google.api.services.genomics.Genomics in project gatk by broadinstitute.

the class ReferenceAPISource method fromReferenceSetAssemblyID.

/**
     * Creates this ReferenceAPISource from an assembly ID by querying in the cloud APIs.
     */
public static ReferenceAPISource fromReferenceSetAssemblyID(final PipelineOptions pipelineOptions, final String referenceSetAssemblyID) {
    Utils.nonNull(pipelineOptions);
    Utils.nonNull(referenceSetAssemblyID);
    final SearchReferenceSetsRequest content = new SearchReferenceSetsRequest();
    content.setAssemblyId(referenceSetAssemblyID);
    try {
        final Genomics genomicsService = createGenomicsService(pipelineOptions);
        final SearchReferenceSetsResponse found = genomicsService.referencesets().search(content).execute();
        final Set<String> referenceSetIds = found.getReferenceSets().stream().map(rs -> rs.getId()).collect(Collectors.toSet());
        if (referenceSetIds.isEmpty()) {
            throw new UserException.UnknownReferenceSet(referenceSetAssemblyID);
        }
        if (referenceSetIds.size() > 1) {
            throw new UserException.MultipleReferenceSets(referenceSetAssemblyID, referenceSetIds);
        }
        final Map<String, Reference> ret = new LinkedHashMap<>();
        for (final String rId : referenceSetIds) {
            final SearchReferencesRequest query = new SearchReferencesRequest().setReferenceSetId(rId);
            ret.putAll(genomicsService.references().search(query).execute().getReferences().stream().collect(Collectors.toMap(r -> r.getName(), r -> r)));
        }
        return new ReferenceAPISource(pipelineOptions, ret);
    } catch (final IOException e) {
        throw new UserException("Error while looking up reference set " + referenceSetAssemblyID, e);
    }
}
Also used : java.util(java.util) GATKGCSOptions(org.broadinstitute.hellbender.utils.gcs.GATKGCSOptions) ObjectInputStream(java.io.ObjectInputStream) GCSOptions(com.google.cloud.genomics.dataflow.utils.GCSOptions) Genomics(com.google.api.services.genomics.Genomics) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) GeneralSecurityException(java.security.GeneralSecurityException) ObjectOutputStream(java.io.ObjectOutputStream) GenomicsFactory(com.google.cloud.genomics.utils.GenomicsFactory) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) Bytes(com.google.common.primitives.Bytes) Serializable(java.io.Serializable) PipelineOptionsFactory(com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) JsonFactory(com.google.api.client.json.JsonFactory) PipelineOptions(com.google.cloud.dataflow.sdk.options.PipelineOptions) Utils(org.broadinstitute.hellbender.utils.Utils) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) com.google.api.services.genomics.model(com.google.api.services.genomics.model) LogManager(org.apache.logging.log4j.LogManager) Genomics(com.google.api.services.genomics.Genomics) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Aggregations

Genomics (com.google.api.services.genomics.Genomics)2 GCSOptions (com.google.cloud.genomics.dataflow.utils.GCSOptions)2 IOException (java.io.IOException)2 UserException (org.broadinstitute.hellbender.exceptions.UserException)2 GATKGCSOptions (org.broadinstitute.hellbender.utils.gcs.GATKGCSOptions)2 ReferenceBases (org.broadinstitute.hellbender.utils.reference.ReferenceBases)2 JsonFactory (com.google.api.client.json.JsonFactory)1 com.google.api.services.genomics.model (com.google.api.services.genomics.model)1 PipelineOptions (com.google.cloud.dataflow.sdk.options.PipelineOptions)1 PipelineOptionsFactory (com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory)1 GenomicsFactory (com.google.cloud.genomics.utils.GenomicsFactory)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Bytes (com.google.common.primitives.Bytes)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 ObjectInputStream (java.io.ObjectInputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 Serializable (java.io.Serializable)1 GeneralSecurityException (java.security.GeneralSecurityException)1 java.util (java.util)1