Search in sources :

Example 1 with FlushRowsResponse

use of com.google.cloud.bigquery.storage.v1.FlushRowsResponse in project beam by apache.

the class FakeDatasetService method flush.

@Override
public ApiFuture<FlushRowsResponse> flush(String streamName, long offset) {
    synchronized (tables) {
        Stream stream = writeStreams.get(streamName);
        if (stream == null) {
            throw new RuntimeException("No such stream: " + streamName);
        }
        stream.flush(offset);
    }
    return ApiFutures.immediateFuture(FlushRowsResponse.newBuilder().build());
}
Also used : WriteStream(com.google.cloud.bigquery.storage.v1.WriteStream)

Example 2 with FlushRowsResponse

use of com.google.cloud.bigquery.storage.v1.FlushRowsResponse in project beam by apache.

the class StorageApiFlushAndFinalizeDoFn method process.

@SuppressWarnings({ "nullness" })
@ProcessElement
public void process(PipelineOptions pipelineOptions, @Element KV<String, Operation> element) throws Exception {
    final String streamId = element.getKey();
    final Operation operation = element.getValue();
    final DatasetService datasetService = getDatasetService(pipelineOptions);
    // Flush the stream. If the flush offset < 0, that means we only need to finalize.
    long offset = operation.flushOffset;
    if (offset >= 0) {
        Instant now = Instant.now();
        RetryManager<FlushRowsResponse, Context<FlushRowsResponse>> retryManager = new RetryManager<>(Duration.standardSeconds(1), Duration.standardMinutes(1), 3);
        retryManager.addOperation(// runOperation
        c -> {
            try {
                flushOperationsSent.inc();
                return datasetService.flush(streamId, offset);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }, // onError
        contexts -> {
            Throwable error = Iterables.getFirst(contexts, null).getError();
            LOG.warn("Flush of stream " + streamId + " to offset " + offset + " failed with " + error);
            flushOperationsFailed.inc();
            if (error instanceof ApiException) {
                Code statusCode = ((ApiException) error).getStatusCode().getCode();
                if (statusCode.equals(Code.ALREADY_EXISTS)) {
                    flushOperationsAlreadyExists.inc();
                    // Implies that we have already flushed up to this point, so don't retry.
                    return RetryType.DONT_RETRY;
                }
                if (statusCode.equals(Code.INVALID_ARGUMENT)) {
                    flushOperationsInvalidArgument.inc();
                    // TODO: Storage API should provide a more-specific way of identifying this failure.
                    return RetryType.DONT_RETRY;
                }
            }
            return RetryType.RETRY_ALL_OPERATIONS;
        }, // onSuccess
        c -> {
            flushOperationsSucceeded.inc();
        }, new Context<>());
        retryManager.run(true);
        java.time.Duration timeElapsed = java.time.Duration.between(now, Instant.now());
        flushLatencyDistribution.update(timeElapsed.toMillis());
    }
    // or we would end up with duplicates.
    if (operation.finalizeStream) {
        RetryManager<FinalizeWriteStreamResponse, Context<FinalizeWriteStreamResponse>> retryManager = new RetryManager<>(Duration.standardSeconds(1), Duration.standardMinutes(1), 3);
        retryManager.addOperation(c -> {
            finalizeOperationsSent.inc();
            return datasetService.finalizeWriteStream(streamId);
        }, contexts -> {
            LOG.warn("Finalize of stream " + streamId + " failed with " + Iterables.getFirst(contexts, null).getError());
            finalizeOperationsFailed.inc();
            return RetryType.RETRY_ALL_OPERATIONS;
        }, r -> {
            finalizeOperationsSucceeded.inc();
        }, new Context<>());
        retryManager.run(true);
    }
}
Also used : Context(org.apache.beam.sdk.io.gcp.bigquery.RetryManager.Operation.Context) FinalizeWriteStreamResponse(com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamResponse) Instant(java.time.Instant) DatasetService(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService) Operation(org.apache.beam.sdk.io.gcp.bigquery.StorageApiFlushAndFinalizeDoFn.Operation) Code(com.google.api.gax.rpc.StatusCode.Code) IOException(java.io.IOException) ApiException(com.google.api.gax.rpc.ApiException) FlushRowsResponse(com.google.cloud.bigquery.storage.v1.FlushRowsResponse) ApiException(com.google.api.gax.rpc.ApiException)

Aggregations

ApiException (com.google.api.gax.rpc.ApiException)1 Code (com.google.api.gax.rpc.StatusCode.Code)1 FinalizeWriteStreamResponse (com.google.cloud.bigquery.storage.v1.FinalizeWriteStreamResponse)1 FlushRowsResponse (com.google.cloud.bigquery.storage.v1.FlushRowsResponse)1 WriteStream (com.google.cloud.bigquery.storage.v1.WriteStream)1 IOException (java.io.IOException)1 Instant (java.time.Instant)1 DatasetService (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService)1 Context (org.apache.beam.sdk.io.gcp.bigquery.RetryManager.Operation.Context)1 Operation (org.apache.beam.sdk.io.gcp.bigquery.StorageApiFlushAndFinalizeDoFn.Operation)1