Search in sources :

Example 6 with WorkItemStatus

use of com.google.api.services.dataflow.model.WorkItemStatus in project beam by apache.

the class StreamingDataflowWorkerTest method testExceptions.

@Test(timeout = 30000)
public void testExceptions() throws Exception {
    if (streamingEngine) {
        // TODO: This test needs to be adapted to work with streamingEngine=true.
        return;
    }
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(new TestExceptionFn(), 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 1));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setExpectedExceptionCount(1);
    String keyString = keyStringForIndex(0);
    server.addWorkToOffer(buildInput("work {" + "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" + "  input_data_watermark: 0" + "  work {" + "    key: \"" + keyString + "\"" + "    sharding_key: 1" + "    work_token: 0" + "    cache_token: 1" + "    message_bundles {" + "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" + "      messages {" + "        timestamp: 0" + "        data: \"0\"" + "      }" + "    }" + "  }" + "}", CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()), Arrays.asList(DEFAULT_WINDOW))));
    StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), true);
    worker.start();
    server.waitForEmptyWorkQueue();
    // Wait until the worker has given up.
    int maxTries = 10;
    while (maxTries-- > 0 && !worker.workExecutorIsEmpty()) {
        Uninterruptibles.sleepUninterruptibly(1000, TimeUnit.MILLISECONDS);
    }
    assertTrue(worker.workExecutorIsEmpty());
    // Spam worker updates a few times.
    maxTries = 10;
    while (maxTries-- > 0) {
        worker.reportPeriodicWorkerUpdates();
        Uninterruptibles.sleepUninterruptibly(1000, TimeUnit.MILLISECONDS);
    }
    // We should see our update only one time with the exceptions we are expecting.
    ArgumentCaptor<WorkItemStatus> workItemStatusCaptor = ArgumentCaptor.forClass(WorkItemStatus.class);
    verify(mockWorkUnitClient, atLeast(1)).reportWorkItemStatus(workItemStatusCaptor.capture());
    List<WorkItemStatus> capturedStatuses = workItemStatusCaptor.getAllValues();
    boolean foundErrors = false;
    int lastUpdateWithoutErrors = 0;
    int lastUpdateWithErrors = 0;
    for (WorkItemStatus status : capturedStatuses) {
        if (status.getErrors().isEmpty()) {
            lastUpdateWithoutErrors++;
            continue;
        }
        lastUpdateWithErrors++;
        assertFalse(foundErrors);
        foundErrors = true;
        String stacktrace = status.getErrors().get(0).getMessage();
        assertThat(stacktrace, Matchers.containsString("Exception!"));
        assertThat(stacktrace, Matchers.containsString("Another exception!"));
        assertThat(stacktrace, Matchers.containsString("processElement"));
    }
    assertTrue(foundErrors);
    // The last update we see should not have any errors. This indicates we've retried the workitem.
    assertTrue(lastUpdateWithoutErrors > lastUpdateWithErrors);
    // Confirm we've received the expected stats. There is no guarantee stats will only be reported
    // once.
    assertThat(server.getStatsReceived().size(), Matchers.greaterThanOrEqualTo(1));
    Windmill.ReportStatsRequest stats = server.getStatsReceived().get(0);
    assertEquals(DEFAULT_COMPUTATION_ID, stats.getComputationId());
    assertEquals(keyString, stats.getKey().toStringUtf8());
    assertEquals(0, stats.getWorkToken());
    assertEquals(1, stats.getShardingKey());
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) Test(org.junit.Test)

Example 7 with WorkItemStatus

use of com.google.api.services.dataflow.model.WorkItemStatus in project beam by apache.

the class WorkItemStatusClient method createStatusUpdate.

private synchronized WorkItemStatus createStatusUpdate(boolean isFinal) {
    WorkItemStatus status = new WorkItemStatus();
    status.setWorkItemId(Long.toString(workItem.getId()));
    status.setCompleted(isFinal);
    status.setReportIndex(checkNotNull(nextReportIndex, "nextReportIndex should be non-null when sending an update"));
    if (worker != null) {
        populateMetricUpdates(status);
        populateCounterUpdates(status);
    }
    double throttleTime = extractThrottleTime();
    status.setTotalThrottlerWaitTimeSeconds(throttleTime);
    return status;
}
Also used : WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus)

Example 8 with WorkItemStatus

use of com.google.api.services.dataflow.model.WorkItemStatus in project beam by apache.

the class WorkItemStatusClient method reportError.

/**
 * Return the {@link WorkItemServiceState} resulting from sending an error completion status.
 */
public synchronized WorkItemServiceState reportError(Throwable e) throws IOException {
    checkState(!finalStateSent, "cannot reportUpdates after sending a final state");
    WorkItemStatus status = createStatusUpdate(true);
    // TODO: Provide more structure representation of error, e.g., the serialized exception object.
    // TODO: Look into moving the stack trace thinning into the client.
    Throwable t = e instanceof UserCodeException ? e.getCause() : e;
    Status error = new Status();
    // Code.UNKNOWN.  TODO: Replace with a generated definition.
    error.setCode(2);
    // TODO: Attach the stack trace as exception details, not to the message.
    String logPrefix = String.format("Failure processing work item %s", uniqueWorkId());
    if (isOutOfMemoryError(t)) {
        String message = "An OutOfMemoryException occurred. Consider specifying higher memory " + "instances in PipelineOptions.\n";
        LOG.error("{}: {}", logPrefix, message);
        error.setMessage(message + DataflowWorkerLoggingHandler.formatException(t));
    } else {
        LOG.error("{}: Uncaught exception occurred during work unit execution. This will be retried.", logPrefix, t);
        error.setMessage(DataflowWorkerLoggingHandler.formatException(t));
    }
    status.setErrors(ImmutableList.of(error));
    return execute(status);
}
Also used : Status(com.google.api.services.dataflow.model.Status) WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) UserCodeException(org.apache.beam.sdk.util.UserCodeException)

Example 9 with WorkItemStatus

use of com.google.api.services.dataflow.model.WorkItemStatus in project beam by apache.

the class WorkItemStatusClient method reportUpdate.

/**
 * Return the {@link WorkItemServiceState} resulting from sending a progress update.
 */
public synchronized WorkItemServiceState reportUpdate(@Nullable DynamicSplitResult dynamicSplitResult, Duration requestedLeaseDuration) throws Exception {
    checkState(worker != null, "setWorker should be called before reportUpdate");
    checkState(!finalStateSent, "cannot reportUpdates after sending a final state");
    checkArgument(requestedLeaseDuration != null, "requestLeaseDuration must be non-null");
    WorkItemStatus status = createStatusUpdate(false);
    status.setRequestedLeaseDuration(TimeUtil.toCloudDuration(requestedLeaseDuration));
    populateProgress(status);
    populateSplitResult(status, dynamicSplitResult);
    return execute(status);
}
Also used : WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus)

Example 10 with WorkItemStatus

use of com.google.api.services.dataflow.model.WorkItemStatus in project beam by apache.

the class WorkItemStatusClientTest method populateCounterUpdatesWithMsecCounter.

@Test
public void populateCounterUpdatesWithMsecCounter() throws Exception {
    final CounterUpdate expectedMsec = new CounterUpdate().setStructuredNameAndMetadata(new CounterStructuredNameAndMetadata().setName(new CounterStructuredName().setOrigin("SYSTEM").setName("start-msecs").setOriginalStepName("step")).setMetadata(new CounterMetadata().setKind(Kind.SUM.toString()))).setCumulative(true).setInteger(DataflowCounterUpdateExtractor.longToSplitInt(42));
    BatchModeExecutionContext context = mock(BatchModeExecutionContext.class);
    when(context.extractMetricUpdates(anyBoolean())).thenReturn(ImmutableList.of());
    when(context.extractMsecCounters(anyBoolean())).thenReturn(ImmutableList.of(expectedMsec));
    WorkItemStatus status = new WorkItemStatus();
    when(worker.extractMetricUpdates()).thenReturn(Collections.emptyList());
    statusClient.setWorker(worker, context);
    statusClient.populateCounterUpdates(status);
    assertThat(status.getCounterUpdates(), containsInAnyOrder(expectedMsec));
}
Also used : CounterMetadata(com.google.api.services.dataflow.model.CounterMetadata) WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) CounterStructuredName(com.google.api.services.dataflow.model.CounterStructuredName) CounterStructuredNameAndMetadata(com.google.api.services.dataflow.model.CounterStructuredNameAndMetadata) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) Test(org.junit.Test)

Aggregations

WorkItemStatus (com.google.api.services.dataflow.model.WorkItemStatus)32 Test (org.junit.Test)24 CounterUpdate (com.google.api.services.dataflow.model.CounterUpdate)9 Status (com.google.api.services.dataflow.model.Status)5 CounterStructuredName (com.google.api.services.dataflow.model.CounterStructuredName)3 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)3 ReportWorkItemStatusRequest (com.google.api.services.dataflow.model.ReportWorkItemStatusRequest)3 ReportWorkItemStatusResponse (com.google.api.services.dataflow.model.ReportWorkItemStatusResponse)3 ArrayList (java.util.ArrayList)3 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)3 CounterMetadata (com.google.api.services.dataflow.model.CounterMetadata)2 CounterStructuredNameAndMetadata (com.google.api.services.dataflow.model.CounterStructuredNameAndMetadata)2 MapTask (com.google.api.services.dataflow.model.MapTask)2 NameAndKind (com.google.api.services.dataflow.model.NameAndKind)2 WorkItem (com.google.api.services.dataflow.model.WorkItem)2 Map (java.util.Map)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 CounterSet (org.apache.beam.runners.dataflow.worker.counters.CounterSet)2 WorkItemCommitRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest)2 MetricShortId (com.google.api.services.dataflow.model.MetricShortId)1