use of org.apache.beam.sdk.fn.IdGenerator in project beam by apache.
the class ArtifactStagingService method reverseArtifactRetrievalService.
@Override
public StreamObserver<ArtifactApi.ArtifactResponseWrapper> reverseArtifactRetrievalService(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
return new StreamObserver<ArtifactApi.ArtifactResponseWrapper>() {
/**
* The maximum number of parallel threads to use to stage.
*/
public static final int THREAD_POOL_SIZE = 10;
/**
* The maximum number of bytes to buffer across all writes before throttling.
*/
// 100 MB
public static final int MAX_PENDING_BYTES = 100 << 20;
IdGenerator idGenerator = IdGenerators.incrementingLongs();
String stagingToken;
Map<String, List<RunnerApi.ArtifactInformation>> toResolve;
Map<String, List<Future<RunnerApi.ArtifactInformation>>> stagedFutures;
ExecutorService stagingExecutor;
OverflowingSemaphore totalPendingBytes;
State state = State.START;
Queue<String> pendingResolves;
String currentEnvironment;
Queue<RunnerApi.ArtifactInformation> pendingGets;
BlockingQueue<ByteString> currentOutput;
@Override
@SuppressFBWarnings(value = "SF_SWITCH_FALLTHROUGH", justification = "fallthrough intended")
public synchronized // synchronization.
void onNext(ArtifactApi.ArtifactResponseWrapper responseWrapper) {
switch(state) {
case START:
stagingToken = responseWrapper.getStagingToken();
LOG.info("Staging artifacts for {}.", stagingToken);
toResolve = toStage.get(stagingToken);
if (toResolve == null) {
responseObserver.onError(new StatusException(Status.INVALID_ARGUMENT.withDescription("Unknown staging token " + stagingToken)));
return;
}
stagedFutures = new ConcurrentHashMap<>();
pendingResolves = new ArrayDeque<>();
pendingResolves.addAll(toResolve.keySet());
stagingExecutor = Executors.newFixedThreadPool(THREAD_POOL_SIZE);
totalPendingBytes = new OverflowingSemaphore(MAX_PENDING_BYTES);
resolveNextEnvironment(responseObserver);
break;
case RESOLVE:
{
currentEnvironment = pendingResolves.remove();
stagedFutures.put(currentEnvironment, new ArrayList<>());
pendingGets = new ArrayDeque<>();
for (RunnerApi.ArtifactInformation artifact : responseWrapper.getResolveArtifactResponse().getReplacementsList()) {
Optional<RunnerApi.ArtifactInformation> fetched = getLocal();
if (fetched.isPresent()) {
stagedFutures.get(currentEnvironment).add(CompletableFuture.completedFuture(fetched.get()));
} else {
pendingGets.add(artifact);
responseObserver.onNext(ArtifactApi.ArtifactRequestWrapper.newBuilder().setGetArtifact(ArtifactApi.GetArtifactRequest.newBuilder().setArtifact(artifact)).build());
}
}
LOG.info("Getting {} artifacts for {}.{}.", pendingGets.size(), stagingToken, pendingResolves.peek());
if (pendingGets.isEmpty()) {
resolveNextEnvironment(responseObserver);
} else {
state = State.GET;
}
break;
}
case GET:
RunnerApi.ArtifactInformation currentArtifact = pendingGets.remove();
String name = createFilename(currentEnvironment, currentArtifact);
try {
LOG.debug("Storing artifacts for {} as {}", stagingToken, name);
currentOutput = new ArrayBlockingQueue<ByteString>(100);
stagedFutures.get(currentEnvironment).add(stagingExecutor.submit(new StoreArtifact(stagingToken, name, currentArtifact, currentOutput, totalPendingBytes)));
} catch (Exception exn) {
LOG.error("Error submitting.", exn);
responseObserver.onError(exn);
}
state = State.GETCHUNK;
case GETCHUNK:
try {
ByteString chunk = responseWrapper.getGetArtifactResponse().getData();
if (chunk.size() > 0) {
// Make sure we don't accidentally send the EOF value.
totalPendingBytes.aquire(chunk.size());
currentOutput.put(chunk);
}
if (responseWrapper.getIsLast()) {
// The EOF value.
currentOutput.put(ByteString.EMPTY);
if (pendingGets.isEmpty()) {
resolveNextEnvironment(responseObserver);
} else {
state = State.GET;
LOG.debug("Waiting for {}", pendingGets.peek());
}
}
} catch (Exception exn) {
LOG.error("Error submitting.", exn);
onError(exn);
}
break;
default:
responseObserver.onError(new StatusException(Status.INVALID_ARGUMENT.withDescription("Illegal state " + state)));
}
}
private void resolveNextEnvironment(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
if (pendingResolves.isEmpty()) {
finishStaging(responseObserver);
} else {
state = State.RESOLVE;
LOG.info("Resolving artifacts for {}.{}.", stagingToken, pendingResolves.peek());
responseObserver.onNext(ArtifactApi.ArtifactRequestWrapper.newBuilder().setResolveArtifact(ArtifactApi.ResolveArtifactsRequest.newBuilder().addAllArtifacts(toResolve.get(pendingResolves.peek()))).build());
}
}
private void finishStaging(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
LOG.debug("Finishing staging for {}.", stagingToken);
Map<String, List<RunnerApi.ArtifactInformation>> staged = new HashMap<>();
try {
for (Map.Entry<String, List<Future<RunnerApi.ArtifactInformation>>> entry : stagedFutures.entrySet()) {
List<RunnerApi.ArtifactInformation> envStaged = new ArrayList<>();
for (Future<RunnerApi.ArtifactInformation> future : entry.getValue()) {
envStaged.add(future.get());
}
staged.put(entry.getKey(), envStaged);
}
ArtifactStagingService.this.staged.put(stagingToken, staged);
stagingExecutor.shutdown();
state = State.DONE;
LOG.info("Artifacts fully staged for {}.", stagingToken);
responseObserver.onCompleted();
} catch (Exception exn) {
LOG.error("Error staging artifacts", exn);
responseObserver.onError(exn);
state = State.ERROR;
return;
}
}
/**
* Return an alternative artifact if we do not need to get this over the artifact API, or
* possibly at all.
*/
private Optional<RunnerApi.ArtifactInformation> getLocal() {
return Optional.empty();
}
/**
* Attempts to provide a reasonable filename for the artifact.
*
* @param index a monotonically increasing index, which provides uniqueness
* @param environment the environment id
* @param artifact the artifact itself
*/
private String createFilename(String environment, RunnerApi.ArtifactInformation artifact) {
String path;
try {
if (artifact.getRoleUrn().equals(ArtifactRetrievalService.STAGING_TO_ARTIFACT_URN)) {
path = RunnerApi.ArtifactStagingToRolePayload.parseFrom(artifact.getRolePayload()).getStagedName();
} else if (artifact.getTypeUrn().equals(ArtifactRetrievalService.FILE_ARTIFACT_URN)) {
path = RunnerApi.ArtifactFilePayload.parseFrom(artifact.getTypePayload()).getPath();
} else if (artifact.getTypeUrn().equals(ArtifactRetrievalService.URL_ARTIFACT_URN)) {
path = RunnerApi.ArtifactUrlPayload.parseFrom(artifact.getTypePayload()).getUrl();
} else {
path = "artifact";
}
} catch (InvalidProtocolBufferException exn) {
throw new RuntimeException(exn);
}
// Limit to the last contiguous alpha-numeric sequence. In particular, this will exclude
// all path separators.
List<String> components = Splitter.onPattern("[^A-Za-z-_.]]").splitToList(path);
String base = components.get(components.size() - 1);
return clip(String.format("%s-%s-%s", idGenerator.getId(), clip(environment, 25), base), 100);
}
private String clip(String s, int maxLength) {
return s.length() < maxLength ? s : s.substring(0, maxLength);
}
@Override
public void onError(Throwable throwable) {
stagingExecutor.shutdownNow();
LOG.error("Error staging artifacts", throwable);
state = State.ERROR;
}
@Override
public void onCompleted() {
Preconditions.checkArgument(state == State.DONE);
}
};
}
use of org.apache.beam.sdk.fn.IdGenerator in project beam by apache.
the class RegisterAndProcessBundleOperationTest method testProcessingBundleBlocksOnFinishWithError.
@Test
public void testProcessingBundleBlocksOnFinishWithError() throws Exception {
List<BeamFnApi.InstructionRequest> requests = new ArrayList<>();
IdGenerator idGenerator = makeIdGeneratorStartingFrom(777L);
ExecutorService executorService = Executors.newCachedThreadPool();
RegisterAndProcessBundleOperation operation = new RegisterAndProcessBundleOperation(idGenerator, new TestInstructionRequestHandler() {
@Override
public CompletionStage<InstructionResponse> handle(InstructionRequest request) {
requests.add(request);
switch(request.getRequestCase()) {
case REGISTER:
return CompletableFuture.completedFuture(responseFor(request).build());
case PROCESS_BUNDLE:
CompletableFuture<InstructionResponse> responseFuture = new CompletableFuture<>();
executorService.submit(() -> {
// Purposefully sleep simulating SDK harness doing work
Thread.sleep(100);
responseFuture.complete(responseFor(request).setError("error").build());
return null;
});
return responseFuture;
default:
// Anything else hangs; nothing else should be blocking
return new CompletableFuture<>();
}
}
}, mockBeamFnStateDelegator, REGISTER_REQUEST, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of(), ImmutableTable.of(), ImmutableMap.of(), mockContext);
operation.start();
// This method blocks till the requests are completed
operation.finish();
assertEquals(true, operation.hasFailed());
// Ensure that the messages were received
assertEquals(requests.get(0), BeamFnApi.InstructionRequest.newBuilder().setInstructionId("777").setRegister(REGISTER_REQUEST).build());
assertEquals(requests.get(1), BeamFnApi.InstructionRequest.newBuilder().setInstructionId("778").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("555")).build());
}
use of org.apache.beam.sdk.fn.IdGenerator in project beam by apache.
the class RegisterAndProcessBundleOperationTest method testProcessingBundleHandlesUserStateRequests.
@Test
public void testProcessingBundleHandlesUserStateRequests() throws Exception {
IdGenerator idGenerator = makeIdGeneratorStartingFrom(777L);
InMemoryStateInternals<ByteString> stateInternals = InMemoryStateInternals.forKey(ByteString.EMPTY);
DataflowStepContext mockStepContext = mock(DataflowStepContext.class);
DataflowStepContext mockUserStepContext = mock(DataflowStepContext.class);
when(mockStepContext.namespacedToUser()).thenReturn(mockUserStepContext);
when(mockUserStepContext.stateInternals()).thenReturn(stateInternals);
InstructionRequestHandler instructionRequestHandler = new TestInstructionRequestHandler() {
@Override
public CompletionStage<InstructionResponse> handle(InstructionRequest request) {
switch(request.getRequestCase()) {
case REGISTER:
return CompletableFuture.completedFuture(responseFor(request).build());
case PROCESS_BUNDLE:
return MoreFutures.supplyAsync(() -> {
StateRequest partialRequest = StateRequest.newBuilder().setStateKey(StateKey.newBuilder().setBagUserState(StateKey.BagUserState.newBuilder().setTransformId("testPTransformId").setWindow(ByteString.EMPTY).setUserStateId("testUserStateId"))).buildPartial();
StateRequest get = partialRequest.toBuilder().setGet(StateGetRequest.getDefaultInstance()).build();
StateRequest clear = partialRequest.toBuilder().setClear(StateClearRequest.getDefaultInstance()).build();
StateRequest append = partialRequest.toBuilder().setAppend(StateAppendRequest.newBuilder().setData(ByteString.copyFromUtf8("ABC"))).build();
StateRequestHandler stateHandler = stateHandlerCaptor.getValue();
StateResponse.Builder getWhenEmptyResponse = MoreFutures.get(stateHandler.handle(get));
assertEquals(ByteString.EMPTY, getWhenEmptyResponse.getGet().getData());
StateResponse.Builder appendWhenEmptyResponse = MoreFutures.get(stateHandler.handle(append));
assertNotNull(appendWhenEmptyResponse);
StateResponse.Builder appendWhenEmptyResponse2 = MoreFutures.get(stateHandler.handle(append));
assertNotNull(appendWhenEmptyResponse2);
StateResponse.Builder getWhenHasValueResponse = MoreFutures.get(stateHandler.handle(get));
assertEquals(ByteString.copyFromUtf8("ABC").concat(ByteString.copyFromUtf8("ABC")), getWhenHasValueResponse.getGet().getData());
StateResponse.Builder clearResponse = MoreFutures.get(stateHandler.handle(clear));
assertNotNull(clearResponse);
return responseFor(request).build();
});
default:
// block forever
return new CompletableFuture<>();
}
}
};
RegisterAndProcessBundleOperation operation = new RegisterAndProcessBundleOperation(idGenerator, instructionRequestHandler, mockBeamFnStateDelegator, REGISTER_REQUEST, ImmutableMap.of(), ImmutableMap.of("testPTransformId", mockStepContext), ImmutableMap.of(), ImmutableTable.of(), ImmutableMap.of(), mockContext);
operation.start();
verify(mockBeamFnStateDelegator).registerForProcessBundleInstructionId(eq("778"), stateHandlerCaptor.capture());
// This method blocks till the requests are completed
operation.finish();
// Ensure that the number of reigstrations matches the number of deregistrations
assertEquals(stateServiceRegisterCounter.get(), stateServiceDeregisterCounter.get());
assertEquals(0, stateServiceAbortCounter.get());
}
use of org.apache.beam.sdk.fn.IdGenerator in project beam by apache.
the class RegisterAndProcessBundleOperationTest method testProcessingBundleHandlesMultimapSideInputRequests.
@Test
public void testProcessingBundleHandlesMultimapSideInputRequests() throws Exception {
IdGenerator idGenerator = makeIdGeneratorStartingFrom(777L);
DataflowStepContext mockStepContext = mock(DataflowStepContext.class);
DataflowStepContext mockUserStepContext = mock(DataflowStepContext.class);
when(mockStepContext.namespacedToUser()).thenReturn(mockUserStepContext);
CountDownLatch waitForStateHandler = new CountDownLatch(1);
// Issues state calls to the Runner after a process bundle request is sent.
InstructionRequestHandler fakeClient = new TestInstructionRequestHandler() {
@Override
public CompletionStage<InstructionResponse> handle(InstructionRequest request) {
switch(request.getRequestCase()) {
case REGISTER:
return CompletableFuture.completedFuture(responseFor(request).build());
case PROCESS_BUNDLE:
return MoreFutures.supplyAsync(() -> {
StateKey getKey = StateKey.newBuilder().setMultimapSideInput(StateKey.MultimapSideInput.newBuilder().setTransformId("testPTransformId").setSideInputId("testSideInputId").setWindow(ByteString.copyFrom(CoderUtils.encodeToByteArray(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE))).setKey(ByteString.copyFrom(CoderUtils.encodeToByteArray(ByteArrayCoder.of(), "ABC".getBytes(StandardCharsets.UTF_8), Coder.Context.NESTED)))).build();
StateRequest getRequest = StateRequest.newBuilder().setStateKey(getKey).setGet(StateGetRequest.getDefaultInstance()).build();
waitForStateHandler.await();
StateRequestHandler stateHandler = stateHandlerCaptor.getValue();
StateResponse.Builder getResponse = MoreFutures.get(stateHandler.handle(getRequest));
assertEquals(encodeAndConcat(Arrays.asList("X", "Y", "Z"), StringUtf8Coder.of()), getResponse.getGet().getData());
return responseFor(request).build();
});
default:
// block forever on other request types
return new CompletableFuture<>();
}
}
};
SideInputReader fakeSideInputReader = new SideInputReader() {
@Override
@Nullable
public <T> T get(PCollectionView<T> view, BoundedWindow window) {
assertEquals(GlobalWindow.INSTANCE, window);
assertEquals("testSideInputId", view.getTagInternal().getId());
return (T) InMemoryMultimapSideInputView.fromIterable(ByteArrayCoder.of(), ImmutableList.of(KV.of("ABC".getBytes(StandardCharsets.UTF_8), "X"), KV.of("ABC".getBytes(StandardCharsets.UTF_8), "Y"), KV.of("ABC".getBytes(StandardCharsets.UTF_8), "Z")));
}
@Override
public <T> boolean contains(PCollectionView<T> view) {
return "testSideInputId".equals(view.getTagInternal().getId());
}
@Override
public boolean isEmpty() {
return false;
}
};
RegisterAndProcessBundleOperation operation = new RegisterAndProcessBundleOperation(idGenerator, fakeClient, mockBeamFnStateDelegator, REGISTER_REQUEST, ImmutableMap.of(), ImmutableMap.of("testPTransformId", mockStepContext), ImmutableMap.of("testPTransformId", fakeSideInputReader), ImmutableTable.of("testPTransformId", "testSideInputId", DataflowPortabilityPCollectionView.with(new TupleTag<>("testSideInputId"), FullWindowedValueCoder.of(KvCoder.of(ByteArrayCoder.of(), StringUtf8Coder.of()), GlobalWindow.Coder.INSTANCE))), ImmutableMap.of(), mockContext);
operation.start();
verify(mockBeamFnStateDelegator).registerForProcessBundleInstructionId(eq("778"), stateHandlerCaptor.capture());
waitForStateHandler.countDown();
// This method blocks till the requests are completed
operation.finish();
// Ensure that the number of reigstrations matches the number of deregistrations
assertEquals(stateServiceRegisterCounter.get(), stateServiceDeregisterCounter.get());
assertEquals(0, stateServiceAbortCounter.get());
}
use of org.apache.beam.sdk.fn.IdGenerator in project beam by apache.
the class StreamingDataflowWorker method process.
private void process(final SdkWorkerHarness worker, final ComputationState computationState, final Instant inputDataWatermark, @Nullable final Instant outputDataWatermark, @Nullable final Instant synchronizedProcessingTime, final Work work) {
final Windmill.WorkItem workItem = work.getWorkItem();
final String computationId = computationState.getComputationId();
final ByteString key = workItem.getKey();
work.setState(State.PROCESSING);
{
StringBuilder workIdBuilder = new StringBuilder(33);
workIdBuilder.append(Long.toHexString(workItem.getShardingKey()));
workIdBuilder.append('-');
workIdBuilder.append(Long.toHexString(workItem.getWorkToken()));
DataflowWorkerLoggingMDC.setWorkId(workIdBuilder.toString());
}
DataflowWorkerLoggingMDC.setStageName(computationId);
LOG.debug("Starting processing for {}:\n{}", computationId, work);
Windmill.WorkItemCommitRequest.Builder outputBuilder = initializeOutputBuilder(key, workItem);
// Before any processing starts, call any pending OnCommit callbacks. Nothing that requires
// cleanup should be done before this, since we might exit early here.
callFinalizeCallbacks(workItem);
if (workItem.getSourceState().getOnlyFinalize()) {
outputBuilder.setSourceStateUpdates(Windmill.SourceState.newBuilder().setOnlyFinalize(true));
work.setState(State.COMMIT_QUEUED);
commitQueue.put(new Commit(outputBuilder.build(), computationState, work));
return;
}
long processingStartTimeNanos = System.nanoTime();
final MapTask mapTask = computationState.getMapTask();
StageInfo stageInfo = stageInfoMap.computeIfAbsent(mapTask.getStageName(), s -> new StageInfo(s, mapTask.getSystemName(), this));
ExecutionState executionState = null;
try {
executionState = computationState.getExecutionStateQueue(worker).poll();
if (executionState == null) {
MutableNetwork<Node, Edge> mapTaskNetwork = mapTaskToNetwork.apply(mapTask);
if (LOG.isDebugEnabled()) {
LOG.debug("Network as Graphviz .dot: {}", Networks.toDot(mapTaskNetwork));
}
ParallelInstructionNode readNode = (ParallelInstructionNode) Iterables.find(mapTaskNetwork.nodes(), node -> node instanceof ParallelInstructionNode && ((ParallelInstructionNode) node).getParallelInstruction().getRead() != null);
InstructionOutputNode readOutputNode = (InstructionOutputNode) Iterables.getOnlyElement(mapTaskNetwork.successors(readNode));
DataflowExecutionContext.DataflowExecutionStateTracker executionStateTracker = new DataflowExecutionContext.DataflowExecutionStateTracker(ExecutionStateSampler.instance(), stageInfo.executionStateRegistry.getState(NameContext.forStage(mapTask.getStageName()), "other", null, ScopedProfiler.INSTANCE.emptyScope()), stageInfo.deltaCounters, options, computationId);
StreamingModeExecutionContext context = new StreamingModeExecutionContext(pendingDeltaCounters, computationId, readerCache, !computationState.getTransformUserNameToStateFamily().isEmpty() ? computationState.getTransformUserNameToStateFamily() : stateNameMap, stateCache.forComputation(computationId), stageInfo.metricsContainerRegistry, executionStateTracker, stageInfo.executionStateRegistry, maxSinkBytes);
DataflowMapTaskExecutor mapTaskExecutor = mapTaskExecutorFactory.create(worker.getControlClientHandler(), worker.getGrpcDataFnServer(), sdkHarnessRegistry.beamFnDataApiServiceDescriptor(), worker.getGrpcStateFnServer(), mapTaskNetwork, options, mapTask.getStageName(), readerRegistry, sinkRegistry, context, pendingDeltaCounters, idGenerator);
ReadOperation readOperation = mapTaskExecutor.getReadOperation();
// Disable progress updates since its results are unused for streaming
// and involves starting a thread.
readOperation.setProgressUpdatePeriodMs(ReadOperation.DONT_UPDATE_PERIODICALLY);
Preconditions.checkState(mapTaskExecutor.supportsRestart(), "Streaming runner requires all operations support restart.");
Coder<?> readCoder;
readCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(readOutputNode.getInstructionOutput().getCodec()));
Coder<?> keyCoder = extractKeyCoder(readCoder);
// If using a custom source, count bytes read for autoscaling.
if (CustomSources.class.getName().equals(readNode.getParallelInstruction().getRead().getSource().getSpec().get("@type"))) {
NameContext nameContext = NameContext.create(mapTask.getStageName(), readNode.getParallelInstruction().getOriginalName(), readNode.getParallelInstruction().getSystemName(), readNode.getParallelInstruction().getName());
readOperation.receivers[0].addOutputCounter(new OutputObjectAndByteCounter(new IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder<>(readCoder), mapTaskExecutor.getOutputCounters(), nameContext).setSamplingPeriod(100).countBytes("dataflow_input_size-" + mapTask.getSystemName()));
}
executionState = new ExecutionState(mapTaskExecutor, context, keyCoder, executionStateTracker);
}
WindmillStateReader stateReader = new WindmillStateReader(metricTrackingWindmillServer, computationId, key, workItem.getShardingKey(), workItem.getWorkToken());
StateFetcher localStateFetcher = stateFetcher.byteTrackingView();
// If the read output KVs, then we can decode Windmill's byte key into a userland
// key object and provide it to the execution context for use with per-key state.
// Otherwise, we pass null.
//
// The coder type that will be present is:
// WindowedValueCoder(TimerOrElementCoder(KvCoder))
@Nullable Coder<?> keyCoder = executionState.getKeyCoder();
@Nullable Object executionKey = keyCoder == null ? null : keyCoder.decode(key.newInput(), Coder.Context.OUTER);
if (workItem.hasHotKeyInfo()) {
Windmill.HotKeyInfo hotKeyInfo = workItem.getHotKeyInfo();
Duration hotKeyAge = Duration.millis(hotKeyInfo.getHotKeyAgeUsec() / 1000);
// The MapTask instruction is ordered by dependencies, such that the first element is
// always going to be the shuffle task.
String stepName = computationState.getMapTask().getInstructions().get(0).getName();
if (options.isHotKeyLoggingEnabled() && keyCoder != null) {
hotKeyLogger.logHotKeyDetection(stepName, hotKeyAge, executionKey);
} else {
hotKeyLogger.logHotKeyDetection(stepName, hotKeyAge);
}
}
executionState.getContext().start(executionKey, workItem, inputDataWatermark, outputDataWatermark, synchronizedProcessingTime, stateReader, localStateFetcher, outputBuilder);
// Blocks while executing work.
executionState.getWorkExecutor().execute();
Iterables.addAll(this.pendingMonitoringInfos, executionState.getWorkExecutor().extractMetricUpdates());
commitCallbacks.putAll(executionState.getContext().flushState());
// Release the execution state for another thread to use.
computationState.getExecutionStateQueue(worker).offer(executionState);
executionState = null;
// Add the output to the commit queue.
work.setState(State.COMMIT_QUEUED);
WorkItemCommitRequest commitRequest = outputBuilder.build();
int byteLimit = maxWorkItemCommitBytes;
int commitSize = commitRequest.getSerializedSize();
int estimatedCommitSize = commitSize < 0 ? Integer.MAX_VALUE : commitSize;
// Detect overflow of integer serialized size or if the byte limit was exceeded.
windmillMaxObservedWorkItemCommitBytes.addValue(estimatedCommitSize);
if (commitSize < 0 || commitSize > byteLimit) {
KeyCommitTooLargeException e = KeyCommitTooLargeException.causedBy(computationId, byteLimit, commitRequest);
reportFailure(computationId, workItem, e);
LOG.error(e.toString());
// Drop the current request in favor of a new, minimal one requesting truncation.
// Messages, timers, counters, and other commit content will not be used by the service
// so we're purposefully dropping them here
commitRequest = buildWorkItemTruncationRequest(key, workItem, estimatedCommitSize);
}
commitQueue.put(new Commit(commitRequest, computationState, work));
// Compute shuffle and state byte statistics these will be flushed asynchronously.
long stateBytesWritten = outputBuilder.clearOutputMessages().build().getSerializedSize();
long shuffleBytesRead = 0;
for (Windmill.InputMessageBundle bundle : workItem.getMessageBundlesList()) {
for (Windmill.Message message : bundle.getMessagesList()) {
shuffleBytesRead += message.getSerializedSize();
}
}
long stateBytesRead = stateReader.getBytesRead() + localStateFetcher.getBytesRead();
windmillShuffleBytesRead.addValue(shuffleBytesRead);
windmillStateBytesRead.addValue(stateBytesRead);
windmillStateBytesWritten.addValue(stateBytesWritten);
LOG.debug("Processing done for work token: {}", workItem.getWorkToken());
} catch (Throwable t) {
if (executionState != null) {
try {
executionState.getContext().invalidateCache();
executionState.getWorkExecutor().close();
} catch (Exception e) {
LOG.warn("Failed to close map task executor: ", e);
} finally {
// Release references to potentially large objects early.
executionState = null;
}
}
t = t instanceof UserCodeException ? t.getCause() : t;
boolean retryLocally = false;
if (KeyTokenInvalidException.isKeyTokenInvalidException(t)) {
LOG.debug("Execution of work for computation '{}' on key '{}' failed due to token expiration. " + "Work will not be retried locally.", computationId, key.toStringUtf8());
} else {
LastExceptionDataProvider.reportException(t);
LOG.debug("Failed work: {}", work);
Duration elapsedTimeSinceStart = new Duration(Instant.now(), work.getStartTime());
if (!reportFailure(computationId, workItem, t)) {
LOG.error("Execution of work for computation '{}' on key '{}' failed with uncaught exception, " + "and Windmill indicated not to retry locally.", computationId, key.toStringUtf8(), t);
} else if (isOutOfMemoryError(t)) {
File heapDump = memoryMonitor.tryToDumpHeap();
LOG.error("Execution of work for computation '{}' for key '{}' failed with out-of-memory. " + "Work will not be retried locally. Heap dump {}.", computationId, key.toStringUtf8(), heapDump == null ? "not written" : ("written to '" + heapDump + "'"), t);
} else if (elapsedTimeSinceStart.isLongerThan(MAX_LOCAL_PROCESSING_RETRY_DURATION)) {
LOG.error("Execution of work for computation '{}' for key '{}' failed with uncaught exception, " + "and it will not be retried locally because the elapsed time since start {} " + "exceeds {}.", computationId, key.toStringUtf8(), elapsedTimeSinceStart, MAX_LOCAL_PROCESSING_RETRY_DURATION, t);
} else {
LOG.error("Execution of work for computation '{}' on key '{}' failed with uncaught exception. " + "Work will be retried locally.", computationId, key.toStringUtf8(), t);
retryLocally = true;
}
}
if (retryLocally) {
// Try again after some delay and at the end of the queue to avoid a tight loop.
sleep(retryLocallyDelayMs);
workUnitExecutor.forceExecute(work, work.getWorkItem().getSerializedSize());
} else {
// Consider the item invalid. It will eventually be retried by Windmill if it still needs to
// be processed.
computationState.completeWork(ShardedKey.create(key, workItem.getShardingKey()), workItem.getWorkToken());
}
} finally {
// Update total processing time counters. Updating in finally clause ensures that
// work items causing exceptions are also accounted in time spent.
long processingTimeMsecs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - processingStartTimeNanos);
stageInfo.totalProcessingMsecs.addValue(processingTimeMsecs);
// either here or in DFE.
if (work.getWorkItem().hasTimers()) {
stageInfo.timerProcessingMsecs.addValue(processingTimeMsecs);
}
DataflowWorkerLoggingMDC.setWorkId(null);
DataflowWorkerLoggingMDC.setStageName(null);
}
}
Aggregations