Search in sources :

Example 6 with GCSOptions

use of com.google.cloud.genomics.dataflow.utils.GCSOptions in project gatk by broadinstitute.

the class ReferenceAPISource method getReferenceBases.

/**
       * Query the Google Genomics API for reference bases spanning the specified interval from the specified
       * reference name.
       *
       * @param pipelineOptions -- are used to get the credentials necessary to call the Genomics API
       * @param interval - the range of bases to retrieve.
       * @return the reference bases specified by interval and apiData (using the Google Genomics API).
       */
public ReferenceBases getReferenceBases(final PipelineOptions pipelineOptions, final SimpleInterval interval, int pageSize) {
    Utils.nonNull(interval);
    if (genomicsService == null) {
        if (pipelineOptions == null) {
            // Fall back on the saved apiKey for Spark.
            GCSOptions options = PipelineOptionsFactory.as(GCSOptions.class);
            options.setApiKey(apiKey);
            genomicsService = createGenomicsService(options);
        } else {
            genomicsService = createGenomicsService(pipelineOptions);
        }
    }
    if (!referenceNameToIdTable.containsKey(interval.getContig())) {
        throw new UserException("Contig " + interval.getContig() + " not in our set of reference names for this reference source");
    }
    try {
        final Genomics.References.Bases.List listRequest = genomicsService.references().bases().list(referenceNameToIdTable.get(interval.getContig())).setPageSize(pageSize);
        listRequest.setStart(interval.getGA4GHStart());
        listRequest.setEnd(interval.getGA4GHEnd());
        ListBasesResponse result = listRequest.execute();
        if (result.getSequence() == null) {
            throw new UserException("No reference bases returned in query for interval " + interval + ". Is the interval valid for this reference?");
        }
        byte[] received = result.getSequence().getBytes();
        byte[] bases = received;
        if (received.length < interval.size()) {
            final List<byte[]> blobs = new ArrayList<>();
            blobs.add(received);
            while (result.getNextPageToken() != null && !result.getNextPageToken().isEmpty()) {
                listRequest.setPageToken(result.getNextPageToken());
                result = listRequest.execute();
                blobs.add(result.getSequence().getBytes());
            }
            final byte[][] resultsArray = blobs.toArray(new byte[blobs.size()][]);
            bases = Bytes.concat(resultsArray);
        }
        if (bases.length != interval.size()) {
            throw new UserException.ReferenceAPIReturnedUnexpectedNumberOfBytes(interval, bases);
        }
        return new ReferenceBases(bases, interval);
    } catch (IOException e) {
        throw new UserException("Query to genomics service failed for reference interval " + interval, e);
    }
}
Also used : Genomics(com.google.api.services.genomics.Genomics) IOException(java.io.IOException) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) GATKGCSOptions(org.broadinstitute.hellbender.utils.gcs.GATKGCSOptions) GCSOptions(com.google.cloud.genomics.dataflow.utils.GCSOptions) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 7 with GCSOptions

use of com.google.cloud.genomics.dataflow.utils.GCSOptions in project gatk by broadinstitute.

the class SparkCommandLineProgram method getAuthenticatedGCSOptions.

/**
     * @return a GCSOptions object authenticated with apiKey suitable for accessing files in GCS,
     *         or null if no apiKey is present.
     */
protected GCSOptions getAuthenticatedGCSOptions() {
    if (apiKey == null) {
        return null;
    }
    final GCSOptions options = PipelineOptionsFactory.as(GCSOptions.class);
    options.setApiKey(apiKey);
    return options;
}
Also used : GCSOptions(com.google.cloud.genomics.dataflow.utils.GCSOptions)

Example 8 with GCSOptions

use of com.google.cloud.genomics.dataflow.utils.GCSOptions in project gatk by broadinstitute.

the class BaseTest method getAuthenticatedPipelineOptions.

/**
     * Gets a PipelineOptions object containing our API key as specified in the HELLBENDER_TEST_APIKEY
     * environment variable. Useful for tests that need to access data in a GCS bucket via the
     * methods in the {@link org.broadinstitute.hellbender.utils.gcs.BucketUtils} class,
     * but don't need to run an actual dataflow pipeline.
     *
     * @return a PipelineOptions object containing our API key
     */
public static PipelineOptions getAuthenticatedPipelineOptions() {
    final GCSOptions popts = PipelineOptionsFactory.as(GCSOptions.class);
    popts.setApiKey(getGCPTestApiKey());
    return popts;
}
Also used : GCSOptions(com.google.cloud.genomics.dataflow.utils.GCSOptions)

Aggregations

GCSOptions (com.google.cloud.genomics.dataflow.utils.GCSOptions)8 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)2 Genomics (com.google.api.services.genomics.Genomics)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 IOException (java.io.IOException)1 UserException (org.broadinstitute.hellbender.exceptions.UserException)1 BwaSparkEngine (org.broadinstitute.hellbender.tools.spark.bwa.BwaSparkEngine)1 GATKGCSOptions (org.broadinstitute.hellbender.utils.gcs.GATKGCSOptions)1 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)1 RecalibrationReport (org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport)1 ReferenceBases (org.broadinstitute.hellbender.utils.reference.ReferenceBases)1