use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.
the class BatchDataflowWorkerTest method testWhenProcessingWorkUnitFailsWeReportStatus.
@Test
public void testWhenProcessingWorkUnitFailsWeReportStatus() throws Exception {
BatchDataflowWorker worker = new BatchDataflowWorker(null, /* pipeline */
SdkHarnessRegistries.emptySdkHarnessRegistry(), mockWorkUnitClient, IntrinsicMapTaskExecutorFactory.defaultFactory(), options);
// In practice this value is always 1, but for the sake of testing send a different value.
long initialReportIndex = 4L;
WorkItem workItem = new WorkItem().setId(1L).setJobId("Expected to fail the job").setInitialReportIndex(initialReportIndex);
WorkItemStatusClient workItemStatusClient = mock(WorkItemStatusClient.class);
worker.doWork(workItem, workItemStatusClient);
ArgumentCaptor<Throwable> errorCaptor = ArgumentCaptor.forClass(Throwable.class);
verify(workItemStatusClient).reportError(errorCaptor.capture());
Throwable error = errorCaptor.getValue();
assertThat(error, notNullValue());
assertThat(error.getMessage(), equalTo("Unknown kind of work item: " + workItem.toString()));
}
use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.
the class BatchDataflowWorkerTest method testWhenNoWorkIsReturnedThatWeImmediatelyRetry.
@Test
public void testWhenNoWorkIsReturnedThatWeImmediatelyRetry() throws Exception {
final String workItemId = "14";
BatchDataflowWorker worker = new BatchDataflowWorker(null, /* pipeline */
SdkHarnessRegistries.emptySdkHarnessRegistry(), mockWorkUnitClient, IntrinsicMapTaskExecutorFactory.defaultFactory(), options);
WorkItem workItem = new WorkItem();
workItem.setId(Long.parseLong(workItemId));
workItem.setJobId("SuccessfulEmptyMapTask");
workItem.setInitialReportIndex(12L);
workItem.setMapTask(new MapTask().setInstructions(new ArrayList<ParallelInstruction>()).setStageName("testStage"));
workItem.setLeaseExpireTime(TimeUtil.toCloudTime(Instant.now()));
workItem.setReportStatusInterval(TimeUtil.toCloudDuration(Duration.standardMinutes(1)));
when(mockWorkUnitClient.getWorkItem()).thenReturn(Optional.<WorkItem>absent()).thenReturn(Optional.of(workItem));
assertTrue(worker.getAndPerformWork());
verify(mockWorkUnitClient).reportWorkItemStatus(MockitoHamcrest.argThat(new TypeSafeMatcher<WorkItemStatus>() {
@Override
public void describeTo(Description description) {
}
@Override
protected boolean matchesSafely(WorkItemStatus item) {
assertTrue(item.getCompleted());
assertEquals(workItemId, item.getWorkItemId());
return true;
}
}));
}
use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.
the class DataflowWorkUnitClient method getWorkItem.
/**
* Gets a {@link WorkItem} from the Dataflow service, or returns {@link Optional#absent()} if no
* work was found.
*
* <p>If work is returned, the calling thread should call reportWorkItemStatus after completing it
* and before requesting another work item.
*/
@Override
public Optional<WorkItem> getWorkItem() throws IOException {
List<String> workItemTypes = ImmutableList.of(WORK_ITEM_TYPE_MAP_TASK, WORK_ITEM_TYPE_SEQ_MAP_TASK, WORK_ITEM_TYPE_REMOTE_SOURCE_TASK);
// All remote sources require the "remote_source" capability. Dataflow's
// custom sources are further tagged with the format "custom_source".
List<String> capabilities = new ArrayList<String>(Arrays.asList(options.getWorkerId(), CAPABILITY_REMOTE_SOURCE, PropertyNames.CUSTOM_SOURCE_FORMAT));
if (options.getWorkerPool() != null) {
capabilities.add(options.getWorkerPool());
}
Optional<WorkItem> workItem = getWorkItemInternal(workItemTypes, capabilities);
if (!workItem.isPresent()) {
// at this time.
return Optional.absent();
}
if (workItem.isPresent() && workItem.get().getId() == null) {
logger.debug("Discarding invalid work item {}", workItem.orNull());
return Optional.absent();
}
WorkItem work = workItem.get();
final String stage;
if (work.getMapTask() != null) {
stage = work.getMapTask().getStageName();
logger.info("Starting MapTask stage {}", stage);
} else if (work.getSeqMapTask() != null) {
stage = work.getSeqMapTask().getStageName();
logger.info("Starting SeqMapTask stage {}", stage);
} else if (work.getSourceOperationTask() != null) {
stage = work.getSourceOperationTask().getStageName();
logger.info("Starting SourceOperationTask stage {}", stage);
} else {
stage = null;
}
DataflowWorkerLoggingMDC.setStageName(stage);
stageStartTime.set(DateTime.now());
DataflowWorkerLoggingMDC.setWorkId(Long.toString(work.getId()));
return workItem;
}
use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.
the class StreamingDataflowWorkerTest method testBasic.
@Test
public void testBasic() throws Exception {
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
server.setIsReady(false);
StreamingConfigTask streamingConfig = new StreamingConfigTask();
streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
streamingConfig.setWindmillServiceEndpoint("foo");
WorkItem workItem = new WorkItem();
workItem.setStreamingConfigTask(streamingConfig);
when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
StreamingDataflowWorker worker = makeWorker(instructions, options, true);
worker.start();
final int numIters = 2000;
for (int i = 0; i < numIters; ++i) {
server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
}
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
worker.stop();
for (int i = 0; i < numIters; ++i) {
assertTrue(result.containsKey((long) i));
assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
}
verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.
the class StreamingDataflowWorkerTest method testHotKeyLogging.
@Test
public void testHotKeyLogging() throws Exception {
// This is to test that the worker can correctly log the key from a hot key.
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())), makeSinkInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), 0));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
server.setIsReady(false);
StreamingConfigTask streamingConfig = new StreamingConfigTask();
streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
streamingConfig.setWindmillServiceEndpoint("foo");
WorkItem workItem = new WorkItem();
workItem.setStreamingConfigTask(streamingConfig);
when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server, "--hotKeyLoggingEnabled=true");
StreamingDataflowWorker worker = makeWorker(instructions, options, true);
worker.start();
final int numIters = 2000;
for (int i = 0; i < numIters; ++i) {
server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
}
server.waitForAndGetCommits(numIters);
worker.stop();
verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any(), eq("key"));
}
Aggregations