Search in sources :

Example 6 with WorkItem

use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.

the class StreamingDataflowWorkerTest method testHotKeyLoggingNotEnabled.

@Test
public void testHotKeyLoggingNotEnabled() throws Exception {
    // This is to test that the worker can correctly log the key from a hot key.
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())), makeSinkInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setIsReady(false);
    StreamingConfigTask streamingConfig = new StreamingConfigTask();
    streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
    streamingConfig.setWindmillServiceEndpoint("foo");
    WorkItem workItem = new WorkItem();
    workItem.setStreamingConfigTask(streamingConfig);
    when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
    }
    server.waitForAndGetCommits(numIters);
    worker.stop();
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) WorkItem(com.google.api.services.dataflow.model.WorkItem) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 7 with WorkItem

use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.

the class DataflowWorkUnitClientTest method testCloudServiceCallNoWorkId.

@Test
public void testCloudServiceCallNoWorkId() throws Exception {
    // If there's no work the service should return an empty work item.
    WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID);
    workItem.setId(null);
    when(request.execute()).thenReturn(generateMockResponse(workItem));
    WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG);
    assertEquals(Optional.absent(), client.getWorkItem());
    LeaseWorkItemRequest actualRequest = Transport.getJsonFactory().fromString(request.getContentAsString(), LeaseWorkItemRequest.class);
    assertEquals(WORKER_ID, actualRequest.getWorkerId());
    assertEquals(ImmutableList.<String>of(WORKER_ID, "remote_source", "custom_source"), actualRequest.getWorkerCapabilities());
    assertEquals(ImmutableList.<String>of("map_task", "seq_map_task", "remote_source_task"), actualRequest.getWorkItemTypes());
}
Also used : LeaseWorkItemRequest(com.google.api.services.dataflow.model.LeaseWorkItemRequest) WorkItem(com.google.api.services.dataflow.model.WorkItem) Test(org.junit.Test)

Example 8 with WorkItem

use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.

the class DataflowWorkUnitClientTest method testCloudServiceCallSeqMapTaskStagePropagation.

@Test
public void testCloudServiceCallSeqMapTaskStagePropagation() throws Exception {
    WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG);
    // Publish and acquire a seq map task work item, and verify we're now processing that stage.
    final String stageName = "test_stage_name";
    SeqMapTask seqMapTask = new SeqMapTask();
    seqMapTask.setStageName(stageName);
    WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID);
    workItem.setSeqMapTask(seqMapTask);
    when(request.execute()).thenReturn(generateMockResponse(workItem));
    assertEquals(Optional.of(workItem), client.getWorkItem());
    assertEquals(stageName, DataflowWorkerLoggingMDC.getStageName());
}
Also used : ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) WorkItem(com.google.api.services.dataflow.model.WorkItem) SeqMapTask(com.google.api.services.dataflow.model.SeqMapTask) Test(org.junit.Test)

Example 9 with WorkItem

use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.

the class DataflowWorkUnitClientTest method testCloudServiceCallMultipleWorkItems.

@Test
public void testCloudServiceCallMultipleWorkItems() throws Exception {
    expectedException.expect(IOException.class);
    expectedException.expectMessage("This version of the SDK expects no more than one work item from the service");
    WorkItem workItem1 = createWorkItem(PROJECT_ID, JOB_ID);
    WorkItem workItem2 = createWorkItem(PROJECT_ID, JOB_ID);
    when(request.execute()).thenReturn(generateMockResponse(workItem1, workItem2));
    WorkUnitClient client = new DataflowWorkUnitClient(pipelineOptions, LOG);
    client.getWorkItem();
}
Also used : WorkItem(com.google.api.services.dataflow.model.WorkItem) Test(org.junit.Test)

Example 10 with WorkItem

use of com.google.api.services.dataflow.model.WorkItem in project beam by apache.

the class StreamingDataflowWorker method getConfigFromDataflowService.

/**
 * Sends a request to get configuration from Dataflow, either for a specific computation (if
 * computation is not null) or global configuration (if computation is null).
 *
 * @throws IOException if the RPC fails.
 */
private void getConfigFromDataflowService(@Nullable String computation) throws IOException {
    Optional<WorkItem> workItem;
    if (computation != null) {
        workItem = workUnitClient.getStreamingConfigWorkItem(computation);
    } else {
        workItem = workUnitClient.getGlobalStreamingConfigWorkItem();
    }
    if (workItem == null || !workItem.isPresent() || workItem.get() == null) {
        return;
    }
    StreamingConfigTask config = workItem.get().getStreamingConfigTask();
    Preconditions.checkState(config != null);
    if (config.getUserStepToStateFamilyNameMap() != null) {
        stateNameMap.putAll(config.getUserStepToStateFamilyNameMap());
    }
    if (computation == null) {
        if (config.getMaxWorkItemCommitBytes() != null && config.getMaxWorkItemCommitBytes() > 0 && config.getMaxWorkItemCommitBytes() <= Integer.MAX_VALUE) {
            setMaxWorkItemCommitBytes(config.getMaxWorkItemCommitBytes().intValue());
        } else {
            setMaxWorkItemCommitBytes(180 << 20);
        }
    }
    List<StreamingComputationConfig> configs = config.getStreamingComputationConfigs();
    if (configs != null) {
        for (StreamingComputationConfig computationConfig : configs) {
            MapTask mapTask = new MapTask();
            mapTask.setSystemName(computationConfig.getSystemName());
            mapTask.setStageName(computationConfig.getStageName());
            mapTask.setInstructions(computationConfig.getInstructions());
            addComputation(computationConfig.getComputationId(), mapTask, computationConfig.getTransformUserNameToStateFamily());
        }
    }
    if (config.getWindmillServiceEndpoint() != null && !config.getWindmillServiceEndpoint().isEmpty()) {
        int port = 443;
        if (config.getWindmillServicePort() != null && config.getWindmillServicePort() != 0) {
            port = config.getWindmillServicePort().intValue();
        }
        HashSet<HostAndPort> endpoints = new HashSet<>();
        for (String endpoint : Splitter.on(',').split(config.getWindmillServiceEndpoint())) {
            endpoints.add(HostAndPort.fromString(endpoint).withDefaultPort(port));
        }
        windmillServer.setWindmillServiceEndpoints(endpoints);
    }
}
Also used : HostAndPort(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.net.HostAndPort) MapTask(com.google.api.services.dataflow.model.MapTask) StreamingComputationConfig(com.google.api.services.dataflow.model.StreamingComputationConfig) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) WorkItem(com.google.api.services.dataflow.model.WorkItem) HashSet(java.util.HashSet)

Aggregations

WorkItem (com.google.api.services.dataflow.model.WorkItem)19 Test (org.junit.Test)11 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)7 StreamingConfigTask (com.google.api.services.dataflow.model.StreamingConfigTask)6 MapTask (com.google.api.services.dataflow.model.MapTask)5 LeaseWorkItemRequest (com.google.api.services.dataflow.model.LeaseWorkItemRequest)4 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)4 Instant (org.joda.time.Instant)4 StreamingComputationConfig (com.google.api.services.dataflow.model.StreamingComputationConfig)3 WorkItemStatus (com.google.api.services.dataflow.model.WorkItemStatus)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)3 StreamingDataflowWorkerOptions (org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions)3 GetWorkStream (org.apache.beam.runners.dataflow.worker.windmill.WindmillServerStub.GetWorkStream)3 CounterStructuredName (com.google.api.services.dataflow.model.CounterStructuredName)2 CounterUpdate (com.google.api.services.dataflow.model.CounterUpdate)2 Status (com.google.api.services.dataflow.model.Status)2 AutoValue (com.google.auto.value.AutoValue)2