use of org.apache.beam.sdk.io.gcp.bigquery.StorageApiFlushAndFinalizeDoFn.Operation in project beam by apache.
the class StorageApiWritesShardedRecords method expand.
@Override
public PCollection<Void> expand(PCollection<KV<ShardedKey<DestinationT>, Iterable<byte[]>>> input) {
String operationName = input.getName() + "/" + getName();
// Append records to the Storage API streams.
PCollection<KV<String, Operation>> written = input.apply("Write Records", ParDo.of(new WriteRecordsDoFn(operationName)).withSideInputs(dynamicDestinations.getSideInputs()));
SchemaCoder<Operation> operationCoder;
try {
SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry();
operationCoder = SchemaCoder.of(schemaRegistry.getSchema(Operation.class), TypeDescriptor.of(Operation.class), schemaRegistry.getToRowFunction(Operation.class), schemaRegistry.getFromRowFunction(Operation.class));
} catch (NoSuchSchemaException e) {
throw new RuntimeException(e);
}
// Send all successful writes to be flushed.
return written.setCoder(KvCoder.of(StringUtf8Coder.of(), operationCoder)).apply(Window.<KV<String, Operation>>configure().triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardSeconds(1)))).discardingFiredPanes()).apply("maxFlushPosition", Combine.perKey(Max.naturalOrder(new Operation(-1, false)))).apply("Flush and finalize writes", ParDo.of(new StorageApiFlushAndFinalizeDoFn(bqServices)));
}
use of org.apache.beam.sdk.io.gcp.bigquery.StorageApiFlushAndFinalizeDoFn.Operation in project beam by apache.
the class StorageApiFlushAndFinalizeDoFn method process.
@SuppressWarnings({ "nullness" })
@ProcessElement
public void process(PipelineOptions pipelineOptions, @Element KV<String, Operation> element) throws Exception {
final String streamId = element.getKey();
final Operation operation = element.getValue();
final DatasetService datasetService = getDatasetService(pipelineOptions);
// Flush the stream. If the flush offset < 0, that means we only need to finalize.
long offset = operation.flushOffset;
if (offset >= 0) {
Instant now = Instant.now();
RetryManager<FlushRowsResponse, Context<FlushRowsResponse>> retryManager = new RetryManager<>(Duration.standardSeconds(1), Duration.standardMinutes(1), 3);
retryManager.addOperation(// runOperation
c -> {
try {
flushOperationsSent.inc();
return datasetService.flush(streamId, offset);
} catch (Exception e) {
throw new RuntimeException(e);
}
}, // onError
contexts -> {
Throwable error = Iterables.getFirst(contexts, null).getError();
LOG.warn("Flush of stream " + streamId + " to offset " + offset + " failed with " + error);
flushOperationsFailed.inc();
if (error instanceof ApiException) {
Code statusCode = ((ApiException) error).getStatusCode().getCode();
if (statusCode.equals(Code.ALREADY_EXISTS)) {
flushOperationsAlreadyExists.inc();
// Implies that we have already flushed up to this point, so don't retry.
return RetryType.DONT_RETRY;
}
if (statusCode.equals(Code.INVALID_ARGUMENT)) {
flushOperationsInvalidArgument.inc();
// TODO: Storage API should provide a more-specific way of identifying this failure.
return RetryType.DONT_RETRY;
}
}
return RetryType.RETRY_ALL_OPERATIONS;
}, // onSuccess
c -> {
flushOperationsSucceeded.inc();
}, new Context<>());
retryManager.run(true);
java.time.Duration timeElapsed = java.time.Duration.between(now, Instant.now());
flushLatencyDistribution.update(timeElapsed.toMillis());
}
// or we would end up with duplicates.
if (operation.finalizeStream) {
RetryManager<FinalizeWriteStreamResponse, Context<FinalizeWriteStreamResponse>> retryManager = new RetryManager<>(Duration.standardSeconds(1), Duration.standardMinutes(1), 3);
retryManager.addOperation(c -> {
finalizeOperationsSent.inc();
return datasetService.finalizeWriteStream(streamId);
}, contexts -> {
LOG.warn("Finalize of stream " + streamId + " failed with " + Iterables.getFirst(contexts, null).getError());
finalizeOperationsFailed.inc();
return RetryType.RETRY_ALL_OPERATIONS;
}, r -> {
finalizeOperationsSucceeded.inc();
}, new Context<>());
retryManager.run(true);
}
}
Aggregations