Search in sources :

Example 1 with WorkItem

use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.

the class BatchDataflowWorkerTest method testWhenProcessingWorkUnitFailsWeReportStatus.

@Test
public void testWhenProcessingWorkUnitFailsWeReportStatus() throws Exception {
    BatchDataflowWorker worker = new BatchDataflowWorker(null, /* pipeline */
    SdkHarnessRegistries.emptySdkHarnessRegistry(), mockWorkUnitClient, IntrinsicMapTaskExecutorFactory.defaultFactory(), options);
    // In practice this value is always 1, but for the sake of testing send a different value.
    long initialReportIndex = 4L;
    WorkItem workItem = new WorkItem().setId(1L).setJobId("Expected to fail the job").setInitialReportIndex(initialReportIndex);
    WorkItemStatusClient workItemStatusClient = mock(WorkItemStatusClient.class);
    worker.doWork(workItem, workItemStatusClient);
    ArgumentCaptor<Throwable> errorCaptor = ArgumentCaptor.forClass(Throwable.class);
    verify(workItemStatusClient).reportError(errorCaptor.capture());
    Throwable error = errorCaptor.getValue();
    assertThat(error, notNullValue());
    assertThat(error.getMessage(), equalTo("Unknown kind of work item: " + workItem.toString()));
}
Also used : WorkItem(com.google.api.services.dataflow.model.WorkItem) Test(org.junit.Test)

Example 2 with WorkItem

use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.

the class BatchDataflowWorkerTest method testWhenNoWorkIsReturnedThatWeImmediatelyRetry.

@Test
public void testWhenNoWorkIsReturnedThatWeImmediatelyRetry() throws Exception {
    final String workItemId = "14";
    BatchDataflowWorker worker = new BatchDataflowWorker(null, /* pipeline */
    SdkHarnessRegistries.emptySdkHarnessRegistry(), mockWorkUnitClient, IntrinsicMapTaskExecutorFactory.defaultFactory(), options);
    WorkItem workItem = new WorkItem();
    workItem.setId(Long.parseLong(workItemId));
    workItem.setJobId("SuccessfulEmptyMapTask");
    workItem.setInitialReportIndex(12L);
    workItem.setMapTask(new MapTask().setInstructions(new ArrayList<ParallelInstruction>()).setStageName("testStage"));
    workItem.setLeaseExpireTime(TimeUtil.toCloudTime(Instant.now()));
    workItem.setReportStatusInterval(TimeUtil.toCloudDuration(Duration.standardMinutes(1)));
    when(mockWorkUnitClient.getWorkItem()).thenReturn(Optional.<WorkItem>absent()).thenReturn(Optional.of(workItem));
    assertTrue(worker.getAndPerformWork());
    verify(mockWorkUnitClient).reportWorkItemStatus(MockitoHamcrest.argThat(new TypeSafeMatcher<WorkItemStatus>() {

        @Override
        public void describeTo(Description description) {
        }

        @Override
        protected boolean matchesSafely(WorkItemStatus item) {
            assertTrue(item.getCompleted());
            assertEquals(workItemId, item.getWorkItemId());
            return true;
        }
    }));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) Description(org.hamcrest.Description) WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) MapTask(com.google.api.services.dataflow.model.MapTask) WorkItem(com.google.api.services.dataflow.model.WorkItem) Test(org.junit.Test)

Example 3 with WorkItem

use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.

the class DataflowWorkUnitClient method getWorkItem.

/**
 * Gets a {@link WorkItem} from the Dataflow service, or returns {@link Optional#absent()} if no
 * work was found.
 *
 * <p>If work is returned, the calling thread should call reportWorkItemStatus after completing it
 * and before requesting another work item.
 */
@Override
public Optional<WorkItem> getWorkItem() throws IOException {
    List<String> workItemTypes = ImmutableList.of(WORK_ITEM_TYPE_MAP_TASK, WORK_ITEM_TYPE_SEQ_MAP_TASK, WORK_ITEM_TYPE_REMOTE_SOURCE_TASK);
    // All remote sources require the "remote_source" capability. Dataflow's
    // custom sources are further tagged with the format "custom_source".
    List<String> capabilities = new ArrayList<String>(Arrays.asList(options.getWorkerId(), CAPABILITY_REMOTE_SOURCE, PropertyNames.CUSTOM_SOURCE_FORMAT));
    if (options.getWorkerPool() != null) {
        capabilities.add(options.getWorkerPool());
    }
    Optional<WorkItem> workItem = getWorkItemInternal(workItemTypes, capabilities);
    if (!workItem.isPresent()) {
        // at this time.
        return Optional.absent();
    }
    if (workItem.isPresent() && workItem.get().getId() == null) {
        logger.debug("Discarding invalid work item {}", workItem.orNull());
        return Optional.absent();
    }
    WorkItem work = workItem.get();
    final String stage;
    if (work.getMapTask() != null) {
        stage = work.getMapTask().getStageName();
        logger.info("Starting MapTask stage {}", stage);
    } else if (work.getSeqMapTask() != null) {
        stage = work.getSeqMapTask().getStageName();
        logger.info("Starting SeqMapTask stage {}", stage);
    } else if (work.getSourceOperationTask() != null) {
        stage = work.getSourceOperationTask().getStageName();
        logger.info("Starting SourceOperationTask stage {}", stage);
    } else {
        stage = null;
    }
    DataflowWorkerLoggingMDC.setStageName(stage);
    stageStartTime.set(DateTime.now());
    DataflowWorkerLoggingMDC.setWorkId(Long.toString(work.getId()));
    return workItem;
}
Also used : ArrayList(java.util.ArrayList) WorkItem(com.google.api.services.dataflow.model.WorkItem)

Example 4 with WorkItem

use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.

the class StreamingDataflowWorkerTest method testBasic.

@Test
public void testBasic() throws Exception {
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setIsReady(false);
    StreamingConfigTask streamingConfig = new StreamingConfigTask();
    streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
    streamingConfig.setWindmillServiceEndpoint("foo");
    WorkItem workItem = new WorkItem();
    workItem.setStreamingConfigTask(streamingConfig);
    when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
    }
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
    worker.stop();
    for (int i = 0; i < numIters; ++i) {
        assertTrue(result.containsKey((long) i));
        assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
    }
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) WorkItem(com.google.api.services.dataflow.model.WorkItem) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 5 with WorkItem

use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.

the class StreamingDataflowWorkerTest method testHotKeyLogging.

@Test
public void testHotKeyLogging() throws Exception {
    // This is to test that the worker can correctly log the key from a hot key.
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())), makeSinkInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setIsReady(false);
    StreamingConfigTask streamingConfig = new StreamingConfigTask();
    streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
    streamingConfig.setWindmillServiceEndpoint("foo");
    WorkItem workItem = new WorkItem();
    workItem.setStreamingConfigTask(streamingConfig);
    when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server, "--hotKeyLoggingEnabled=true");
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
    }
    server.waitForAndGetCommits(numIters);
    worker.stop();
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any(), eq("key"));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) WorkItem(com.google.api.services.dataflow.model.WorkItem) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Aggregations

WorkItem (com.google.api.services.dataflow.model.WorkItem)19 Test (org.junit.Test)11 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)7 StreamingConfigTask (com.google.api.services.dataflow.model.StreamingConfigTask)6 MapTask (com.google.api.services.dataflow.model.MapTask)5 LeaseWorkItemRequest (com.google.api.services.dataflow.model.LeaseWorkItemRequest)4 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)4 Instant (org.joda.time.Instant)4 StreamingComputationConfig (com.google.api.services.dataflow.model.StreamingComputationConfig)3 WorkItemStatus (com.google.api.services.dataflow.model.WorkItemStatus)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)3 StreamingDataflowWorkerOptions (org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions)3 GetWorkStream (org.apache.beam.runners.dataflow.worker.windmill.WindmillServerStub.GetWorkStream)3 CounterStructuredName (com.google.api.services.dataflow.model.CounterStructuredName)2 CounterUpdate (com.google.api.services.dataflow.model.CounterUpdate)2 Status (com.google.api.services.dataflow.model.Status)2 AutoValue (com.google.auto.value.AutoValue)2