Search in sources :

Example 6 with StreamingDataflowWorkerOptions

use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.

the class StreamingDataflowWorkerTest method testStuckCommit.

@Test
public void testStuckCommit() throws Exception {
    if (!streamingEngine) {
        // not implemented for non-streaming engine.
        return;
    }
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    options.setStuckCommitDurationMillis(2000);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    // Prevent commit callbacks from being called to simulate a stuck commit.
    server.setDropStreamingCommits(true);
    // Add some work for key 1.
    server.addWorkToOffer(makeInput(10, TimeUnit.MILLISECONDS.toMicros(2), DEFAULT_KEY_STRING, 1));
    server.addWorkToOffer(makeInput(15, TimeUnit.MILLISECONDS.toMicros(3), DEFAULT_KEY_STRING, 5));
    ConcurrentHashMap<Long, Consumer<CommitStatus>> droppedCommits = server.waitForDroppedCommits(2);
    server.setDropStreamingCommits(false);
    // Enqueue another work item for key 1.
    server.addWorkToOffer(makeInput(1, TimeUnit.MILLISECONDS.toMicros(1), DEFAULT_KEY_STRING, 1));
    // Ensure that the this work item processes.
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
    // Now ensure that nothing happens if a dropped commit actually completes.
    droppedCommits.values().iterator().next().accept(CommitStatus.OK);
    worker.stop();
    assertTrue(result.containsKey(1L));
    assertEquals(makeExpectedOutput(1, TimeUnit.MILLISECONDS.toMicros(1), DEFAULT_KEY_STRING, 1, DEFAULT_KEY_STRING).build(), result.get(1L));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) Consumer(java.util.function.Consumer) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 7 with StreamingDataflowWorkerOptions

use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.

the class StreamingDataflowWorkerTest method testHotKeyLogging.

@Test
public void testHotKeyLogging() throws Exception {
    // This is to test that the worker can correctly log the key from a hot key.
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())), makeSinkInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setIsReady(false);
    StreamingConfigTask streamingConfig = new StreamingConfigTask();
    streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
    streamingConfig.setWindmillServiceEndpoint("foo");
    WorkItem workItem = new WorkItem();
    workItem.setStreamingConfigTask(streamingConfig);
    when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server, "--hotKeyLoggingEnabled=true");
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
    }
    server.waitForAndGetCommits(numIters);
    worker.stop();
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any(), eq("key"));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) WorkItem(com.google.api.services.dataflow.model.WorkItem) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 8 with StreamingDataflowWorkerOptions

use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.

the class StreamingDataflowWorkerTest method testHotKeyLoggingNotEnabled.

@Test
public void testHotKeyLoggingNotEnabled() throws Exception {
    // This is to test that the worker can correctly log the key from a hot key.
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())), makeSinkInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setIsReady(false);
    StreamingConfigTask streamingConfig = new StreamingConfigTask();
    streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
    streamingConfig.setWindmillServiceEndpoint("foo");
    WorkItem workItem = new WorkItem();
    workItem.setStreamingConfigTask(streamingConfig);
    when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
    }
    server.waitForAndGetCommits(numIters);
    worker.stop();
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) WorkItem(com.google.api.services.dataflow.model.WorkItem) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 9 with StreamingDataflowWorkerOptions

use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.

the class StreamingDataflowWorkerTest method testUnboundedSourceWorkRetry.

// Regression test to ensure that a reader is not used from the cache
// on work item retry.
@Test
public void testUnboundedSourceWorkRetry() throws Exception {
    List<Integer> finalizeTracker = Lists.newArrayList();
    TestCountingSource.setFinalizeTracker(finalizeTracker);
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    // Disable state cache so it doesn't detect retry.
    options.setWorkerCacheMb(0);
    StreamingDataflowWorker worker = makeWorker(makeUnboundedSourcePipeline(), options, false);
    worker.start();
    // Test new key.
    Windmill.GetWorkResponse work = buildInput("work {" + "  computation_id: \"computation\"" + "  input_data_watermark: 0" + "  work {" + "    key: \"0000000000000001\"" + "    sharding_key: 1" + "    work_token: 1" + "    cache_token: 1" + "  }" + "}", null);
    server.addWorkToOffer(work);
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
    Iterable<CounterUpdate> counters = worker.buildCounters();
    Windmill.WorkItemCommitRequest commit = result.get(1L);
    UnsignedLong finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
    Windmill.WorkItemCommitRequest expectedCommit = setMessagesMetadata(PaneInfo.NO_FIRING, CoderUtils.encodeToByteArray(CollectionCoder.of(GlobalWindow.Coder.INSTANCE), Arrays.asList(GlobalWindow.INSTANCE)), parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 1 " + "cache_token: 1 " + "source_backlog_bytes: 7 " + "output_messages {" + "  destination_stream_id: \"out\"" + "  bundles {" + "    key: \"0000000000000001\"" + "    messages {" + "      timestamp: 0" + "      data: \"0:0\"" + "    }" + "    messages_ids: \"\"" + "  }" + "} " + "source_state_updates {" + "  state: \"\000\"" + "  finalize_ids: " + finalizeId + "} " + "source_watermark: 1000")).build();
    assertThat(commit, equalTo(expectedCommit));
    assertEquals(18L, splitIntToLong(getCounter(counters, "dataflow_input_size-computation").getInteger()));
    // Test retry of work item, it should return the same result and not start the reader from the
    // position it was left at.
    server.clearCommitsReceived();
    server.addWorkToOffer(work);
    result = server.waitForAndGetCommits(1);
    commit = result.get(1L);
    finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
    Windmill.WorkItemCommitRequest.Builder commitBuilder = expectedCommit.toBuilder();
    commitBuilder.getSourceStateUpdatesBuilder().setFinalizeIds(0, commit.getSourceStateUpdates().getFinalizeIds(0));
    expectedCommit = commitBuilder.build();
    assertThat(commit, equalTo(expectedCommit));
    // Continue with processing.
    server.addWorkToOffer(buildInput("work {" + "  computation_id: \"computation\"" + "  input_data_watermark: 0" + "  work {" + "    key: \"0000000000000001\"" + "    sharding_key: 1" + "    work_token: 2" + "    cache_token: 1" + "    source_state {" + "      state: \"\001\"" + "      finalize_ids: " + finalizeId + "    } " + "  }" + "}", null));
    result = server.waitForAndGetCommits(1);
    commit = result.get(2L);
    finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
    assertThat(commit, equalTo(parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 2 " + "cache_token: 1 " + "source_backlog_bytes: 7 " + "source_state_updates {" + "  state: \"\000\"" + "  finalize_ids: " + finalizeId + "} " + "source_watermark: 1000").build()));
    assertThat(finalizeTracker, contains(0));
}
Also used : UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) CounterUpdate(com.google.api.services.dataflow.model.CounterUpdate) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) GetWorkResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkResponse) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) Test(org.junit.Test)

Example 10 with StreamingDataflowWorkerOptions

use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.

the class StreamingDataflowWorkerTest method testNumberOfWorkerHarnessThreadsIsHonored.

@Test(timeout = 10000)
public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
    int expectedNumberOfThreads = 5;
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    options.setNumberOfWorkerHarnessThreads(expectedNumberOfThreads);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    for (int i = 0; i < expectedNumberOfThreads * 2; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
    }
    // This will fail to complete if the number of threads is less than the amount of work.
    // Forcing this test to timeout.
    BlockingFn.counter.acquire(expectedNumberOfThreads);
    // too many items were being processed concurrently.
    if (BlockingFn.counter.tryAcquire(500, TimeUnit.MILLISECONDS)) {
        fail("Expected number of threads " + expectedNumberOfThreads + " does not match actual " + "number of work items processed concurrently " + BlockingFn.callCounter.get() + ".");
    }
    BlockingFn.blocker.countDown();
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Aggregations

StreamingDataflowWorkerOptions (org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions)12 Test (org.junit.Test)10 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)9 AtomicLong (java.util.concurrent.atomic.AtomicLong)5 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)5 DataflowCounterUpdateExtractor.splitIntToLong (org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong)5 WorkItemCommitRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest)5 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)5 UnsignedLong (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong)5 StreamingConfigTask (com.google.api.services.dataflow.model.StreamingConfigTask)3 WorkItem (com.google.api.services.dataflow.model.WorkItem)3 CounterUpdate (com.google.api.services.dataflow.model.CounterUpdate)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Consumer (java.util.function.Consumer)1 Windmill (org.apache.beam.runners.dataflow.worker.windmill.Windmill)1 GetWorkResponse (org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkResponse)1