Search in sources :

Example 1 with StreamingConfigTask

use of com.google.api.services.dataflow.model.StreamingConfigTask in project beam by apache.

the class StreamingDataflowWorkerTest method testBasic.

@Test
public void testBasic() throws Exception {
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setIsReady(false);
    StreamingConfigTask streamingConfig = new StreamingConfigTask();
    streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
    streamingConfig.setWindmillServiceEndpoint("foo");
    WorkItem workItem = new WorkItem();
    workItem.setStreamingConfigTask(streamingConfig);
    when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
    }
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
    worker.stop();
    for (int i = 0; i < numIters; ++i) {
        assertTrue(result.containsKey((long) i));
        assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
    }
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) WorkItem(com.google.api.services.dataflow.model.WorkItem) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 2 with StreamingConfigTask

use of com.google.api.services.dataflow.model.StreamingConfigTask in project beam by apache.

the class StreamingDataflowWorkerTest method testHotKeyLogging.

@Test
public void testHotKeyLogging() throws Exception {
    // This is to test that the worker can correctly log the key from a hot key.
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())), makeSinkInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setIsReady(false);
    StreamingConfigTask streamingConfig = new StreamingConfigTask();
    streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
    streamingConfig.setWindmillServiceEndpoint("foo");
    WorkItem workItem = new WorkItem();
    workItem.setStreamingConfigTask(streamingConfig);
    when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server, "--hotKeyLoggingEnabled=true");
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
    }
    server.waitForAndGetCommits(numIters);
    worker.stop();
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any(), eq("key"));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) WorkItem(com.google.api.services.dataflow.model.WorkItem) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 3 with StreamingConfigTask

use of com.google.api.services.dataflow.model.StreamingConfigTask in project beam by apache.

the class StreamingDataflowWorkerTest method testHotKeyLoggingNotEnabled.

@Test
public void testHotKeyLoggingNotEnabled() throws Exception {
    // This is to test that the worker can correctly log the key from a hot key.
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())), makeSinkInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setIsReady(false);
    StreamingConfigTask streamingConfig = new StreamingConfigTask();
    streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
    streamingConfig.setWindmillServiceEndpoint("foo");
    WorkItem workItem = new WorkItem();
    workItem.setStreamingConfigTask(streamingConfig);
    when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
    }
    server.waitForAndGetCommits(numIters);
    worker.stop();
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) WorkItem(com.google.api.services.dataflow.model.WorkItem) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 4 with StreamingConfigTask

use of com.google.api.services.dataflow.model.StreamingConfigTask in project beam by apache.

the class StreamingDataflowWorker method getConfigFromDataflowService.

/**
 * Sends a request to get configuration from Dataflow, either for a specific computation (if
 * computation is not null) or global configuration (if computation is null).
 *
 * @throws IOException if the RPC fails.
 */
private void getConfigFromDataflowService(@Nullable String computation) throws IOException {
    Optional<WorkItem> workItem;
    if (computation != null) {
        workItem = workUnitClient.getStreamingConfigWorkItem(computation);
    } else {
        workItem = workUnitClient.getGlobalStreamingConfigWorkItem();
    }
    if (workItem == null || !workItem.isPresent() || workItem.get() == null) {
        return;
    }
    StreamingConfigTask config = workItem.get().getStreamingConfigTask();
    Preconditions.checkState(config != null);
    if (config.getUserStepToStateFamilyNameMap() != null) {
        stateNameMap.putAll(config.getUserStepToStateFamilyNameMap());
    }
    if (computation == null) {
        if (config.getMaxWorkItemCommitBytes() != null && config.getMaxWorkItemCommitBytes() > 0 && config.getMaxWorkItemCommitBytes() <= Integer.MAX_VALUE) {
            setMaxWorkItemCommitBytes(config.getMaxWorkItemCommitBytes().intValue());
        } else {
            setMaxWorkItemCommitBytes(180 << 20);
        }
    }
    List<StreamingComputationConfig> configs = config.getStreamingComputationConfigs();
    if (configs != null) {
        for (StreamingComputationConfig computationConfig : configs) {
            MapTask mapTask = new MapTask();
            mapTask.setSystemName(computationConfig.getSystemName());
            mapTask.setStageName(computationConfig.getStageName());
            mapTask.setInstructions(computationConfig.getInstructions());
            addComputation(computationConfig.getComputationId(), mapTask, computationConfig.getTransformUserNameToStateFamily());
        }
    }
    if (config.getWindmillServiceEndpoint() != null && !config.getWindmillServiceEndpoint().isEmpty()) {
        int port = 443;
        if (config.getWindmillServicePort() != null && config.getWindmillServicePort() != 0) {
            port = config.getWindmillServicePort().intValue();
        }
        HashSet<HostAndPort> endpoints = new HashSet<>();
        for (String endpoint : Splitter.on(',').split(config.getWindmillServiceEndpoint())) {
            endpoints.add(HostAndPort.fromString(endpoint).withDefaultPort(port));
        }
        windmillServer.setWindmillServiceEndpoints(endpoints);
    }
}
Also used : HostAndPort(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.net.HostAndPort) MapTask(com.google.api.services.dataflow.model.MapTask) StreamingComputationConfig(com.google.api.services.dataflow.model.StreamingComputationConfig) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) WorkItem(com.google.api.services.dataflow.model.WorkItem) HashSet(java.util.HashSet)

Aggregations

StreamingConfigTask (com.google.api.services.dataflow.model.StreamingConfigTask)4 WorkItem (com.google.api.services.dataflow.model.WorkItem)4 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)4 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)3 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)3 StreamingDataflowWorkerOptions (org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions)3 Test (org.junit.Test)3 MapTask (com.google.api.services.dataflow.model.MapTask)1 StreamingComputationConfig (com.google.api.services.dataflow.model.StreamingComputationConfig)1 HashSet (java.util.HashSet)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 DataflowCounterUpdateExtractor.splitIntToLong (org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong)1 WorkItemCommitRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest)1 HostAndPort (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.net.HostAndPort)1 UnsignedLong (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong)1