Search in sources :

Example 1 with StreamingDataflowWorkerOptions

use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.

the class DebugCaptureTest method buildDataflowWorkerOptions.

private StreamingDataflowWorkerOptions buildDataflowWorkerOptions() throws Exception {
    StreamingDataflowWorkerOptions options = PipelineOptionsFactory.as(StreamingDataflowWorkerOptions.class);
    options.setProject(PROJECT_ID);
    options.setRegion(REGION);
    options.setJobId(JOB_ID);
    options.setWorkerId(WORKER_ID);
    options.setDataflowClient(buildMockDataflow());
    return options;
}
Also used : StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions)

Example 2 with StreamingDataflowWorkerOptions

use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.

the class StreamingDataflowWorkerTest method testIgnoreRetriedKeys.

@Test
public void testIgnoreRetriedKeys() throws Exception {
    final int numIters = 4;
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY));
        // Also add work for a different shard of the same key.
        server.addWorkToOffer(makeInput(i + 1000, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY + 1));
    }
    // Wait for keys to schedule.  They will be blocked.
    BlockingFn.counter.acquire(numIters * 2);
    // Re-add the work, it should be ignored due to the keys being active.
    for (int i = 0; i < numIters; ++i) {
        // Same work token.
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
        server.addWorkToOffer(makeInput(i + 1000, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY + 1));
    }
    // Give all added calls a chance to run.
    server.waitForEmptyWorkQueue();
    for (int i = 0; i < numIters; ++i) {
        // Different work token same keys.
        server.addWorkToOffer(makeInput(i + numIters, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY));
    }
    // Give all added calls a chance to run.
    server.waitForEmptyWorkQueue();
    // Release the blocked calls.
    BlockingFn.blocker.countDown();
    // Verify the output
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters * 3);
    for (int i = 0; i < numIters; ++i) {
        assertTrue(result.containsKey((long) i));
        assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
        assertTrue(result.containsKey((long) i + 1000));
        assertEquals(makeExpectedOutput(i + 1000, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY + 1, keyStringForIndex(i)).build(), result.get((long) i + 1000));
        assertTrue(result.containsKey((long) i + numIters));
        assertEquals(makeExpectedOutput(i + numIters, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY, keyStringForIndex(i)).build(), result.get((long) i + numIters));
    }
    // Re-add the work, it should process due to the keys no longer being active.
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i + numIters * 2, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY));
    }
    result = server.waitForAndGetCommits(numIters);
    worker.stop();
    for (int i = 0; i < numIters; ++i) {
        assertTrue(result.containsKey((long) i + numIters * 2));
        assertEquals(makeExpectedOutput(i + numIters * 2, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY, keyStringForIndex(i)).build(), result.get((long) i + numIters * 2));
    }
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 3 with StreamingDataflowWorkerOptions

use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.

the class StreamingDataflowWorkerTest method testBasicHarness.

@Test
public void testBasicHarness() throws Exception {
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
    }
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
    worker.stop();
    for (int i = 0; i < numIters; ++i) {
        assertTrue(result.containsKey((long) i));
        assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
    }
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 4 with StreamingDataflowWorkerOptions

use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.

the class StreamingDataflowWorkerTest method createTestingPipelineOptions.

private StreamingDataflowWorkerOptions createTestingPipelineOptions(FakeWindmillServer server, String... args) {
    List<String> argsList = Lists.newArrayList(args);
    if (streamingEngine) {
        argsList.add("--experiments=enable_streaming_engine");
    }
    StreamingDataflowWorkerOptions options = PipelineOptionsFactory.fromArgs(argsList.toArray(new String[0])).as(StreamingDataflowWorkerOptions.class);
    options.setAppName("StreamingWorkerHarnessTest");
    options.setJobId("test_job_id");
    options.setStreaming(true);
    options.setWindmillServerStub(server);
    options.setActiveWorkRefreshPeriodMillis(0);
    return options;
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions)

Example 5 with StreamingDataflowWorkerOptions

use of org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions in project beam by apache.

the class StreamingDataflowWorkerTest method testBasic.

@Test
public void testBasic() throws Exception {
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setIsReady(false);
    StreamingConfigTask streamingConfig = new StreamingConfigTask();
    streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
    streamingConfig.setWindmillServiceEndpoint("foo");
    WorkItem workItem = new WorkItem();
    workItem.setStreamingConfigTask(streamingConfig);
    when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
    }
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
    worker.stop();
    for (int i = 0; i < numIters; ++i) {
        assertTrue(result.containsKey((long) i));
        assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
    }
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) WorkItem(com.google.api.services.dataflow.model.WorkItem) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Aggregations

StreamingDataflowWorkerOptions (org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions)12 Test (org.junit.Test)10 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)9 AtomicLong (java.util.concurrent.atomic.AtomicLong)5 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)5 DataflowCounterUpdateExtractor.splitIntToLong (org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong)5 WorkItemCommitRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest)5 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)5 UnsignedLong (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong)5 StreamingConfigTask (com.google.api.services.dataflow.model.StreamingConfigTask)3 WorkItem (com.google.api.services.dataflow.model.WorkItem)3 CounterUpdate (com.google.api.services.dataflow.model.CounterUpdate)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Consumer (java.util.function.Consumer)1 Windmill (org.apache.beam.runners.dataflow.worker.windmill.Windmill)1 GetWorkResponse (org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkResponse)1