use of com.google.api.services.dataflow.model.MapTask in project beam by apache.
the class BatchDataflowWorkerTest method testWhenNoWorkIsReturnedThatWeImmediatelyRetry.
@Test
public void testWhenNoWorkIsReturnedThatWeImmediatelyRetry() throws Exception {
final String workItemId = "14";
BatchDataflowWorker worker = new BatchDataflowWorker(null, /* pipeline */
SdkHarnessRegistries.emptySdkHarnessRegistry(), mockWorkUnitClient, IntrinsicMapTaskExecutorFactory.defaultFactory(), options);
WorkItem workItem = new WorkItem();
workItem.setId(Long.parseLong(workItemId));
workItem.setJobId("SuccessfulEmptyMapTask");
workItem.setInitialReportIndex(12L);
workItem.setMapTask(new MapTask().setInstructions(new ArrayList<ParallelInstruction>()).setStageName("testStage"));
workItem.setLeaseExpireTime(TimeUtil.toCloudTime(Instant.now()));
workItem.setReportStatusInterval(TimeUtil.toCloudDuration(Duration.standardMinutes(1)));
when(mockWorkUnitClient.getWorkItem()).thenReturn(Optional.<WorkItem>absent()).thenReturn(Optional.of(workItem));
assertTrue(worker.getAndPerformWork());
verify(mockWorkUnitClient).reportWorkItemStatus(MockitoHamcrest.argThat(new TypeSafeMatcher<WorkItemStatus>() {
@Override
public void describeTo(Description description) {
}
@Override
protected boolean matchesSafely(WorkItemStatus item) {
assertTrue(item.getCompleted());
assertEquals(workItemId, item.getWorkItemId());
return true;
}
}));
}
use of com.google.api.services.dataflow.model.MapTask in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method testCreateMapTaskExecutor.
@Test
public void testCreateMapTaskExecutor() throws Exception {
List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"), createParDoInstruction(0, 0, "DoFn1"), createParDoInstruction(0, 0, "DoFnWithContext"), createFlattenInstruction(1, 0, 2, 0, "Flatten"), createWriteInstruction(3, 0, "Write"));
MapTask mapTask = new MapTask();
mapTask.setStageName(STAGE);
mapTask.setSystemName("systemName");
mapTask.setInstructions(instructions);
mapTask.setFactory(Transport.getJsonFactory());
try (DataflowMapTaskExecutor executor = mapTaskExecutorFactory.create(null, /* beamFnControlClientHandler */
null, /* GrpcFnServer<GrpcDataService> */
null, /* ApiServiceDescriptor */
null, /* GrpcFnServer<GrpcStateService> */
mapTaskToNetwork.apply(mapTask), options, STAGE, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage"), counterSet, idGenerator)) {
// Safe covariant cast not expressible without rawtypes.
@SuppressWarnings({ // TODO(https://issues.apache.org/jira/browse/BEAM-10556)
"rawtypes", "unchecked" }) List<Object> operations = (List) executor.operations;
assertThat(operations, hasItems(instanceOf(ReadOperation.class), instanceOf(ParDoOperation.class), instanceOf(ParDoOperation.class), instanceOf(FlattenOperation.class), instanceOf(WriteOperation.class)));
// Verify that the inputs are attached.
ReadOperation readOperation = Iterables.getOnlyElement(Iterables.filter(operations, ReadOperation.class));
assertEquals(2, readOperation.receivers[0].getReceiverCount());
FlattenOperation flattenOperation = Iterables.getOnlyElement(Iterables.filter(operations, FlattenOperation.class));
for (ParDoOperation operation : Iterables.filter(operations, ParDoOperation.class)) {
assertSame(flattenOperation, operation.receivers[0].getOnlyReceiver());
}
WriteOperation writeOperation = Iterables.getOnlyElement(Iterables.filter(operations, WriteOperation.class));
assertSame(writeOperation, flattenOperation.receivers[0].getOnlyReceiver());
}
@SuppressWarnings("unchecked") Counter<Long, ?> otherMsecCounter = (Counter<Long, ?>) counterSet.getExistingCounter("test-other-msecs");
// "other" state only got created upon MapTaskExecutor.execute().
assertNull(otherMsecCounter);
counterSet.extractUpdates(false, updateExtractor);
verifyOutputCounters(updateExtractor, "read_output_name", "DoFn1_output", "DoFnWithContext_output", "flatten_output_name");
verify(updateExtractor).longSum(eq(named("Read-ByteCount")), anyBoolean(), anyLong());
verify(updateExtractor).longSum(eq(named("Write-ByteCount")), anyBoolean(), anyLong());
verifyNoMoreInteractions(updateExtractor);
}
use of com.google.api.services.dataflow.model.MapTask in project beam by apache.
the class StreamingDataflowWorker method getConfigFromDataflowService.
/**
* Sends a request to get configuration from Dataflow, either for a specific computation (if
* computation is not null) or global configuration (if computation is null).
*
* @throws IOException if the RPC fails.
*/
private void getConfigFromDataflowService(@Nullable String computation) throws IOException {
Optional<WorkItem> workItem;
if (computation != null) {
workItem = workUnitClient.getStreamingConfigWorkItem(computation);
} else {
workItem = workUnitClient.getGlobalStreamingConfigWorkItem();
}
if (workItem == null || !workItem.isPresent() || workItem.get() == null) {
return;
}
StreamingConfigTask config = workItem.get().getStreamingConfigTask();
Preconditions.checkState(config != null);
if (config.getUserStepToStateFamilyNameMap() != null) {
stateNameMap.putAll(config.getUserStepToStateFamilyNameMap());
}
if (computation == null) {
if (config.getMaxWorkItemCommitBytes() != null && config.getMaxWorkItemCommitBytes() > 0 && config.getMaxWorkItemCommitBytes() <= Integer.MAX_VALUE) {
setMaxWorkItemCommitBytes(config.getMaxWorkItemCommitBytes().intValue());
} else {
setMaxWorkItemCommitBytes(180 << 20);
}
}
List<StreamingComputationConfig> configs = config.getStreamingComputationConfigs();
if (configs != null) {
for (StreamingComputationConfig computationConfig : configs) {
MapTask mapTask = new MapTask();
mapTask.setSystemName(computationConfig.getSystemName());
mapTask.setStageName(computationConfig.getStageName());
mapTask.setInstructions(computationConfig.getInstructions());
addComputation(computationConfig.getComputationId(), mapTask, computationConfig.getTransformUserNameToStateFamily());
}
}
if (config.getWindmillServiceEndpoint() != null && !config.getWindmillServiceEndpoint().isEmpty()) {
int port = 443;
if (config.getWindmillServicePort() != null && config.getWindmillServicePort() != 0) {
port = config.getWindmillServicePort().intValue();
}
HashSet<HostAndPort> endpoints = new HashSet<>();
for (String endpoint : Splitter.on(',').split(config.getWindmillServiceEndpoint())) {
endpoints.add(HostAndPort.fromString(endpoint).withDefaultPort(port));
}
windmillServer.setWindmillServiceEndpoints(endpoints);
}
}
use of com.google.api.services.dataflow.model.MapTask in project beam by apache.
the class FixMultiOutputInfosOnParDoInstructionsTest method testDefaultOutputHasDifferentIdsForEachMapTask.
@Test
public void testDefaultOutputHasDifferentIdsForEachMapTask() {
FixMultiOutputInfosOnParDoInstructions function = new FixMultiOutputInfosOnParDoInstructions(IdGenerators.decrementingLongs());
MapTask output = function.apply(createMapTaskWithParDo(1));
assertEquals(createMapTaskWithParDo(1, "-1"), output);
output = function.apply(createMapTaskWithParDo(1));
assertEquals(createMapTaskWithParDo(1, "-2"), output);
}
use of com.google.api.services.dataflow.model.MapTask in project beam by apache.
the class FixMultiOutputInfosOnParDoInstructionsTest method createMapTaskWithParDo.
private static MapTask createMapTaskWithParDo(int numOutputs, String... tags) {
ParDoInstruction parDoInstruction = new ParDoInstruction();
parDoInstruction.setNumOutputs(numOutputs);
List<MultiOutputInfo> multiOutputInfos = new ArrayList<>(tags.length);
for (String tag : tags) {
MultiOutputInfo multiOutputInfo = new MultiOutputInfo();
multiOutputInfo.setTag(tag);
multiOutputInfos.add(multiOutputInfo);
}
parDoInstruction.setMultiOutputInfos(multiOutputInfos);
ParallelInstruction instruction = new ParallelInstruction();
instruction.setParDo(parDoInstruction);
MapTask mapTask = new MapTask();
mapTask.setInstructions(ImmutableList.of(instruction));
return mapTask;
}
Aggregations