use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.
the class StreamingDataflowWorkerTest method testStuckCommit.
@Test
public void testStuckCommit() throws Exception {
if (!streamingEngine) {
// not implemented for non-streaming engine.
return;
}
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
options.setStuckCommitDurationMillis(2000);
StreamingDataflowWorker worker = makeWorker(instructions, options, true);
worker.start();
// Prevent commit callbacks from being called to simulate a stuck commit.
server.setDropStreamingCommits(true);
// Add some work for key 1.
server.addWorkToOffer(makeInput(10, TimeUnit.MILLISECONDS.toMicros(2), DEFAULT_KEY_STRING, 1));
server.addWorkToOffer(makeInput(15, TimeUnit.MILLISECONDS.toMicros(3), DEFAULT_KEY_STRING, 5));
ConcurrentHashMap<Long, Consumer<CommitStatus>> droppedCommits = server.waitForDroppedCommits(2);
server.setDropStreamingCommits(false);
// Enqueue another work item for key 1.
server.addWorkToOffer(makeInput(1, TimeUnit.MILLISECONDS.toMicros(1), DEFAULT_KEY_STRING, 1));
// Ensure that the this work item processes.
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
// Now ensure that nothing happens if a dropped commit actually completes.
droppedCommits.values().iterator().next().accept(CommitStatus.OK);
worker.stop();
assertTrue(result.containsKey(1L));
assertEquals(makeExpectedOutput(1, TimeUnit.MILLISECONDS.toMicros(1), DEFAULT_KEY_STRING, 1, DEFAULT_KEY_STRING).build(), result.get(1L));
}
use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.
the class StreamingDataflowWorkerTest method testHotKeyLogging.
@Test
public void testHotKeyLogging() throws Exception {
// This is to test that the worker can correctly log the key from a hot key.
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())), makeSinkInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), 0));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
server.setIsReady(false);
StreamingConfigTask streamingConfig = new StreamingConfigTask();
streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
streamingConfig.setWindmillServiceEndpoint("foo");
WorkItem workItem = new WorkItem();
workItem.setStreamingConfigTask(streamingConfig);
when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server, "--hotKeyLoggingEnabled=true");
StreamingDataflowWorker worker = makeWorker(instructions, options, true);
worker.start();
final int numIters = 2000;
for (int i = 0; i < numIters; ++i) {
server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
}
server.waitForAndGetCommits(numIters);
worker.stop();
verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any(), eq("key"));
}
use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.
the class StreamingDataflowWorkerTest method testHotKeyLoggingNotEnabled.
@Test
public void testHotKeyLoggingNotEnabled() throws Exception {
// This is to test that the worker can correctly log the key from a hot key.
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())), makeSinkInstruction(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), 0));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
server.setIsReady(false);
StreamingConfigTask streamingConfig = new StreamingConfigTask();
streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
streamingConfig.setWindmillServiceEndpoint("foo");
WorkItem workItem = new WorkItem();
workItem.setStreamingConfigTask(streamingConfig);
when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
StreamingDataflowWorker worker = makeWorker(instructions, options, true);
worker.start();
final int numIters = 2000;
for (int i = 0; i < numIters; ++i) {
server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), "key", DEFAULT_SHARDING_KEY));
}
server.waitForAndGetCommits(numIters);
worker.stop();
verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.
the class StreamingDataflowWorkerTest method testUnboundedSourceWorkRetry.
// Regression test to ensure that a reader is not used from the cache
// on work item retry.
@Test
public void testUnboundedSourceWorkRetry() throws Exception {
List<Integer> finalizeTracker = Lists.newArrayList();
TestCountingSource.setFinalizeTracker(finalizeTracker);
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
// Disable state cache so it doesn't detect retry.
options.setWorkerCacheMb(0);
StreamingDataflowWorker worker = makeWorker(makeUnboundedSourcePipeline(), options, false);
worker.start();
// Test new key.
Windmill.GetWorkResponse work = buildInput("work {" + " computation_id: \"computation\"" + " input_data_watermark: 0" + " work {" + " key: \"0000000000000001\"" + " sharding_key: 1" + " work_token: 1" + " cache_token: 1" + " }" + "}", null);
server.addWorkToOffer(work);
Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
Iterable<CounterUpdate> counters = worker.buildCounters();
Windmill.WorkItemCommitRequest commit = result.get(1L);
UnsignedLong finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
Windmill.WorkItemCommitRequest expectedCommit = setMessagesMetadata(PaneInfo.NO_FIRING, CoderUtils.encodeToByteArray(CollectionCoder.of(GlobalWindow.Coder.INSTANCE), Arrays.asList(GlobalWindow.INSTANCE)), parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 1 " + "cache_token: 1 " + "source_backlog_bytes: 7 " + "output_messages {" + " destination_stream_id: \"out\"" + " bundles {" + " key: \"0000000000000001\"" + " messages {" + " timestamp: 0" + " data: \"0:0\"" + " }" + " messages_ids: \"\"" + " }" + "} " + "source_state_updates {" + " state: \"\000\"" + " finalize_ids: " + finalizeId + "} " + "source_watermark: 1000")).build();
assertThat(commit, equalTo(expectedCommit));
assertEquals(18L, splitIntToLong(getCounter(counters, "dataflow_input_size-computation").getInteger()));
// Test retry of work item, it should return the same result and not start the reader from the
// position it was left at.
server.clearCommitsReceived();
server.addWorkToOffer(work);
result = server.waitForAndGetCommits(1);
commit = result.get(1L);
finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
Windmill.WorkItemCommitRequest.Builder commitBuilder = expectedCommit.toBuilder();
commitBuilder.getSourceStateUpdatesBuilder().setFinalizeIds(0, commit.getSourceStateUpdates().getFinalizeIds(0));
expectedCommit = commitBuilder.build();
assertThat(commit, equalTo(expectedCommit));
// Continue with processing.
server.addWorkToOffer(buildInput("work {" + " computation_id: \"computation\"" + " input_data_watermark: 0" + " work {" + " key: \"0000000000000001\"" + " sharding_key: 1" + " work_token: 2" + " cache_token: 1" + " source_state {" + " state: \"\001\"" + " finalize_ids: " + finalizeId + " } " + " }" + "}", null));
result = server.waitForAndGetCommits(1);
commit = result.get(2L);
finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
assertThat(commit, equalTo(parseCommitRequest("key: \"0000000000000001\" " + "sharding_key: 1 " + "work_token: 2 " + "cache_token: 1 " + "source_backlog_bytes: 7 " + "source_state_updates {" + " state: \"\000\"" + " finalize_ids: " + finalizeId + "} " + "source_watermark: 1000").build()));
assertThat(finalizeTracker, contains(0));
}
use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.
the class StreamingDataflowWorkerTest method testNumberOfWorkerHarnessThreadsIsHonored.
@Test(timeout = 10000)
public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
int expectedNumberOfThreads = 5;
List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
FakeWindmillServer server = new FakeWindmillServer(errorCollector);
StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
options.setNumberOfWorkerHarnessThreads(expectedNumberOfThreads);
StreamingDataflowWorker worker = makeWorker(instructions, options, true);
worker.start();
for (int i = 0; i < expectedNumberOfThreads * 2; ++i) {
server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
}
// This will fail to complete if the number of threads is less than the amount of work.
// Forcing this test to timeout.
BlockingFn.counter.acquire(expectedNumberOfThreads);
// too many items were being processed concurrently.
if (BlockingFn.counter.tryAcquire(500, TimeUnit.MILLISECONDS)) {
fail("Expected number of threads " + expectedNumberOfThreads + " does not match actual " + "number of work items processed concurrently " + BlockingFn.callCounter.get() + ".");
}
BlockingFn.blocker.countDown();
}
Aggregations