Search in sources :

Example 6 with Context

use of org.apache.beam.sdk.io.gcp.bigquery.RetryManager.Operation.Context in project beam by apache.

the class StorageApiFinalizeWritesDoFn method process.

@ProcessElement
@SuppressWarnings({ "nullness" })
public void process(PipelineOptions pipelineOptions, @Element KV<String, String> element) throws Exception {
    String tableId = element.getKey();
    String streamId = element.getValue();
    DatasetService datasetService = getDatasetService(pipelineOptions);
    RetryManager<FinalizeWriteStreamResponse, Context<FinalizeWriteStreamResponse>> retryManager = new RetryManager<>(Duration.standardSeconds(1), Duration.standardMinutes(1), 3);
    retryManager.addOperation(c -> {
        finalizeOperationsSent.inc();
        return datasetService.finalizeWriteStream(streamId);
    }, contexts -> {
        LOG.error("Finalize of stream " + streamId + " failed with " + Iterables.getFirst(contexts, null).getError());
        finalizeOperationsFailed.inc();
        return RetryType.RETRY_ALL_OPERATIONS;
    }, c -> {
        LOG.info("Finalize of stream " + streamId + " finished with " + c.getResult());
        finalizeOperationsSucceeded.inc();
        commitStreams.computeIfAbsent(tableId, d -> Lists.newArrayList()).add(streamId);
    }, new Context<>());
    retryManager.run(true);
}
Also used : Context(org.apache.beam.sdk.io.gcp.bigquery.RetryManager.Operation.Context) DoFn(org.apache.beam.sdk.transforms.DoFn) KV(org.apache.beam.sdk.values.KV) Logger(org.slf4j.Logger) Collection(java.util.Collection) Counter(org.apache.beam.sdk.metrics.Counter) Duration(org.joda.time.Duration) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) BatchCommitWriteStreamsResponse(com.google.cloud.bigquery.storage.v1.BatchCommitWriteStreamsResponse) IOException(java.io.IOException) RetryType(org.apache.beam.sdk.io.gcp.bigquery.RetryManager.RetryType) Metrics(org.apache.beam.sdk.metrics.Metrics) FinalizeWriteStreamResponse(com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamResponse) DatasetService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService) Sets(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets) Context(org.apache.beam.sdk.io.gcp.bigquery.RetryManager.Operation.Context) StorageError(com.google.cloud.bigquery.storage.v1.StorageError) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Maps(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps) StorageErrorCode(com.google.cloud.bigquery.storage.v1.StorageError.StorageErrorCode) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(javax.annotation.Nullable) FinalizeWriteStreamResponse(com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamResponse) DatasetService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService)

Example 7 with Context

use of org.apache.beam.sdk.io.gcp.bigquery.RetryManager.Operation.Context in project beam by apache.

the class StorageApiFlushAndFinalizeDoFn method process.

@SuppressWarnings({ "nullness" })
@ProcessElement
public void process(PipelineOptions pipelineOptions, @Element KV<String, Operation> element) throws Exception {
    final String streamId = element.getKey();
    final Operation operation = element.getValue();
    final DatasetService datasetService = getDatasetService(pipelineOptions);
    // Flush the stream. If the flush offset < 0, that means we only need to finalize.
    long offset = operation.flushOffset;
    if (offset >= 0) {
        Instant now = Instant.now();
        RetryManager<FlushRowsResponse, Context<FlushRowsResponse>> retryManager = new RetryManager<>(Duration.standardSeconds(1), Duration.standardMinutes(1), 3);
        retryManager.addOperation(// runOperation
        c -> {
            try {
                flushOperationsSent.inc();
                return datasetService.flush(streamId, offset);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }, // onError
        contexts -> {
            Throwable error = Iterables.getFirst(contexts, null).getError();
            LOG.warn("Flush of stream " + streamId + " to offset " + offset + " failed with " + error);
            flushOperationsFailed.inc();
            if (error instanceof ApiException) {
                Code statusCode = ((ApiException) error).getStatusCode().getCode();
                if (statusCode.equals(Code.ALREADY_EXISTS)) {
                    flushOperationsAlreadyExists.inc();
                    // Implies that we have already flushed up to this point, so don't retry.
                    return RetryType.DONT_RETRY;
                }
                if (statusCode.equals(Code.INVALID_ARGUMENT)) {
                    flushOperationsInvalidArgument.inc();
                    // TODO: Storage API should provide a more-specific way of identifying this failure.
                    return RetryType.DONT_RETRY;
                }
            }
            return RetryType.RETRY_ALL_OPERATIONS;
        }, // onSuccess
        c -> {
            flushOperationsSucceeded.inc();
        }, new Context<>());
        retryManager.run(true);
        java.time.Duration timeElapsed = java.time.Duration.between(now, Instant.now());
        flushLatencyDistribution.update(timeElapsed.toMillis());
    }
    // or we would end up with duplicates.
    if (operation.finalizeStream) {
        RetryManager<FinalizeWriteStreamResponse, Context<FinalizeWriteStreamResponse>> retryManager = new RetryManager<>(Duration.standardSeconds(1), Duration.standardMinutes(1), 3);
        retryManager.addOperation(c -> {
            finalizeOperationsSent.inc();
            return datasetService.finalizeWriteStream(streamId);
        }, contexts -> {
            LOG.warn("Finalize of stream " + streamId + " failed with " + Iterables.getFirst(contexts, null).getError());
            finalizeOperationsFailed.inc();
            return RetryType.RETRY_ALL_OPERATIONS;
        }, r -> {
            finalizeOperationsSucceeded.inc();
        }, new Context<>());
        retryManager.run(true);
    }
}
Also used : Context(org.apache.beam.sdk.io.gcp.bigquery.RetryManager.Operation.Context) FinalizeWriteStreamResponse(com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamResponse) Instant(java.time.Instant) DatasetService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService) Operation(org.apache.beam.sdk.io.gcp.bigquery.StorageApiFlushAndFinalizeDoFn.Operation) Code(com.google.api.gax.rpc.StatusCode.Code) IOException(java.io.IOException) ApiException(com.google.api.gax.rpc.ApiException) FlushRowsResponse(com.google.cloud.bigquery.storage.v1.FlushRowsResponse) ApiException(com.google.api.gax.rpc.ApiException)

Aggregations

Context (org.apache.beam.sdk.io.gcp.bigquery.RetryManager.Operation.Context)7 Test (org.junit.Test)4 DatasetService (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService)3 BatchCommitWriteStreamsResponse (com.google.cloud.bigquery.storage.v1.BatchCommitWriteStreamsResponse)2 FinalizeWriteStreamResponse (com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamResponse)2 StorageError (com.google.cloud.bigquery.storage.v1.StorageError)2 IOException (java.io.IOException)2 Collection (java.util.Collection)2 Map (java.util.Map)2 ApiException (com.google.api.gax.rpc.ApiException)1 Code (com.google.api.gax.rpc.StatusCode.Code)1 FlushRowsResponse (com.google.cloud.bigquery.storage.v1.FlushRowsResponse)1 StorageErrorCode (com.google.cloud.bigquery.storage.v1.StorageError.StorageErrorCode)1 Instant (java.time.Instant)1 Set (java.util.Set)1 Nullable (javax.annotation.Nullable)1 RetryType (org.apache.beam.sdk.io.gcp.bigquery.RetryManager.RetryType)1 Operation (org.apache.beam.sdk.io.gcp.bigquery.StorageApiFlushAndFinalizeDoFn.Operation)1 Counter (org.apache.beam.sdk.metrics.Counter)1 Metrics (org.apache.beam.sdk.metrics.Metrics)1