Search in sources :

Example 11 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class RemoveFlattenInstructionsFunctionTest method testRemoveFlattenOnMultiOutputInstruction.

@Test
public void testRemoveFlattenOnMultiOutputInstruction() {
    Node a = ParallelInstructionNode.create(new ParallelInstruction().setName("A"), Nodes.ExecutionLocation.UNKNOWN);
    Node aOut1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out1"), PCOLLECTION_ID);
    Node aOut2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out2"), PCOLLECTION_ID);
    Node aOut3PCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out3"), PCOLLECTION_ID);
    Edge aOut1 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out1"));
    Edge aOut2 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out2"));
    Edge aOut3 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out3"));
    Edge aOut1PCollectionEdge = DefaultEdge.create();
    Node b = ParallelInstructionNode.create(new ParallelInstruction().setName("B"), Nodes.ExecutionLocation.UNKNOWN);
    Node bOut1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
    Node bOut2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
    Edge bOut1 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out1"));
    Edge bOut2 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out2"));
    Edge bOut1PCollectionEdge = DefaultEdge.create();
    Node flatten = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
    Node flattenPCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten.out"), PCOLLECTION_ID);
    Node c = ParallelInstructionNode.create(new ParallelInstruction().setName("C"), Nodes.ExecutionLocation.UNKNOWN);
    Edge cOutput = DefaultEdge.create();
    Node cPCollection = InstructionOutputNode.create(new InstructionOutput().setName("C.out"), PCOLLECTION_ID);
    Node d = ParallelInstructionNode.create(new ParallelInstruction().setName("D"), Nodes.ExecutionLocation.UNKNOWN);
    Edge dOutput = DefaultEdge.create();
    Node dPCollection = InstructionOutputNode.create(new InstructionOutput().setName("D.out"), PCOLLECTION_ID);
    Node e = ParallelInstructionNode.create(new ParallelInstruction().setName("E"), Nodes.ExecutionLocation.UNKNOWN);
    Edge eOutput = DefaultEdge.create();
    Node ePCollection = InstructionOutputNode.create(new InstructionOutput().setName("E.out"), PCOLLECTION_ID);
    // /-out1-> C
    // A -out2-\
    // \-out3--> Flatten --> D
    // B -out2-/
    // \-out1-> E
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(a);
    network.addNode(aOut1PCollection);
    network.addNode(aOut2PCollection);
    network.addNode(aOut3PCollection);
    network.addNode(b);
    network.addNode(bOut1PCollection);
    network.addNode(bOut2PCollection);
    network.addNode(flatten);
    network.addNode(flattenPCollection);
    network.addNode(c);
    network.addNode(cPCollection);
    network.addNode(d);
    network.addNode(dPCollection);
    network.addNode(e);
    network.addNode(ePCollection);
    network.addEdge(a, aOut1PCollection, aOut1);
    network.addEdge(a, aOut2PCollection, aOut2);
    network.addEdge(a, aOut3PCollection, aOut3);
    network.addEdge(aOut1PCollection, c, aOut1PCollectionEdge);
    network.addEdge(aOut2PCollection, flatten, DefaultEdge.create());
    network.addEdge(aOut3PCollection, flatten, DefaultEdge.create());
    network.addEdge(b, bOut1PCollection, bOut1);
    network.addEdge(b, bOut2PCollection, bOut2);
    network.addEdge(bOut1PCollection, e, bOut1PCollectionEdge);
    network.addEdge(bOut2PCollection, flatten, DefaultEdge.create());
    network.addEdge(flatten, flattenPCollection, DefaultEdge.create());
    network.addEdge(flattenPCollection, d, DefaultEdge.create());
    network.addEdge(c, cPCollection, cOutput);
    network.addEdge(d, dPCollection, dOutput);
    network.addEdge(e, ePCollection, eOutput);
    // /-out1-> C
    // A -out2-\
    // \-out3--> D
    // B -out2-/
    // \-out1-> E
    assertThatFlattenIsProperlyRemoved(network);
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) MultiOutputInfo(com.google.api.services.dataflow.model.MultiOutputInfo) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) FlattenInstruction(com.google.api.services.dataflow.model.FlattenInstruction) Test(org.junit.Test)

Example 12 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method testIgnoreRetriedKeys.

@Test
public void testIgnoreRetriedKeys() throws Exception {
    final int numIters = 4;
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY));
        // Also add work for a different shard of the same key.
        server.addWorkToOffer(makeInput(i + 1000, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY + 1));
    }
    // Wait for keys to schedule.  They will be blocked.
    BlockingFn.counter.acquire(numIters * 2);
    // Re-add the work, it should be ignored due to the keys being active.
    for (int i = 0; i < numIters; ++i) {
        // Same work token.
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
        server.addWorkToOffer(makeInput(i + 1000, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY + 1));
    }
    // Give all added calls a chance to run.
    server.waitForEmptyWorkQueue();
    for (int i = 0; i < numIters; ++i) {
        // Different work token same keys.
        server.addWorkToOffer(makeInput(i + numIters, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY));
    }
    // Give all added calls a chance to run.
    server.waitForEmptyWorkQueue();
    // Release the blocked calls.
    BlockingFn.blocker.countDown();
    // Verify the output
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters * 3);
    for (int i = 0; i < numIters; ++i) {
        assertTrue(result.containsKey((long) i));
        assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
        assertTrue(result.containsKey((long) i + 1000));
        assertEquals(makeExpectedOutput(i + 1000, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY + 1, keyStringForIndex(i)).build(), result.get((long) i + 1000));
        assertTrue(result.containsKey((long) i + numIters));
        assertEquals(makeExpectedOutput(i + numIters, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY, keyStringForIndex(i)).build(), result.get((long) i + numIters));
    }
    // Re-add the work, it should process due to the keys no longer being active.
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i + numIters * 2, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY));
    }
    result = server.waitForAndGetCommits(numIters);
    worker.stop();
    for (int i = 0; i < numIters; ++i) {
        assertTrue(result.containsKey((long) i + numIters * 2));
        assertEquals(makeExpectedOutput(i + numIters * 2, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i), DEFAULT_SHARDING_KEY, keyStringForIndex(i)).build(), result.get((long) i + numIters * 2));
    }
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 13 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method testBasicHarness.

@Test
public void testBasicHarness() throws Exception {
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
    }
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
    worker.stop();
    for (int i = 0; i < numIters; ++i) {
        assertTrue(result.containsKey((long) i));
        assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
    }
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Example 14 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method testExceptions.

@Test(timeout = 30000)
public void testExceptions() throws Exception {
    if (streamingEngine) {
        // TODO: This test needs to be adapted to work with streamingEngine=true.
        return;
    }
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeDoFnInstruction(new TestExceptionFn(), 0, StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 1));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setExpectedExceptionCount(1);
    String keyString = keyStringForIndex(0);
    server.addWorkToOffer(buildInput("work {" + "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" + "  input_data_watermark: 0" + "  work {" + "    key: \"" + keyString + "\"" + "    sharding_key: 1" + "    work_token: 0" + "    cache_token: 1" + "    message_bundles {" + "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" + "      messages {" + "        timestamp: 0" + "        data: \"0\"" + "      }" + "    }" + "  }" + "}", CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()), Arrays.asList(DEFAULT_WINDOW))));
    StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), true);
    worker.start();
    server.waitForEmptyWorkQueue();
    // Wait until the worker has given up.
    int maxTries = 10;
    while (maxTries-- > 0 && !worker.workExecutorIsEmpty()) {
        Uninterruptibles.sleepUninterruptibly(1000, TimeUnit.MILLISECONDS);
    }
    assertTrue(worker.workExecutorIsEmpty());
    // Spam worker updates a few times.
    maxTries = 10;
    while (maxTries-- > 0) {
        worker.reportPeriodicWorkerUpdates();
        Uninterruptibles.sleepUninterruptibly(1000, TimeUnit.MILLISECONDS);
    }
    // We should see our update only one time with the exceptions we are expecting.
    ArgumentCaptor<WorkItemStatus> workItemStatusCaptor = ArgumentCaptor.forClass(WorkItemStatus.class);
    verify(mockWorkUnitClient, atLeast(1)).reportWorkItemStatus(workItemStatusCaptor.capture());
    List<WorkItemStatus> capturedStatuses = workItemStatusCaptor.getAllValues();
    boolean foundErrors = false;
    int lastUpdateWithoutErrors = 0;
    int lastUpdateWithErrors = 0;
    for (WorkItemStatus status : capturedStatuses) {
        if (status.getErrors().isEmpty()) {
            lastUpdateWithoutErrors++;
            continue;
        }
        lastUpdateWithErrors++;
        assertFalse(foundErrors);
        foundErrors = true;
        String stacktrace = status.getErrors().get(0).getMessage();
        assertThat(stacktrace, Matchers.containsString("Exception!"));
        assertThat(stacktrace, Matchers.containsString("Another exception!"));
        assertThat(stacktrace, Matchers.containsString("processElement"));
    }
    assertTrue(foundErrors);
    // The last update we see should not have any errors. This indicates we've retried the workitem.
    assertTrue(lastUpdateWithoutErrors > lastUpdateWithErrors);
    // Confirm we've received the expected stats. There is no guarantee stats will only be reported
    // once.
    assertThat(server.getStatsReceived().size(), Matchers.greaterThanOrEqualTo(1));
    Windmill.ReportStatsRequest stats = server.getStatsReceived().get(0);
    assertEquals(DEFAULT_COMPUTATION_ID, stats.getComputationId());
    assertEquals(keyString, stats.getKey().toStringUtf8());
    assertEquals(0, stats.getWorkToken());
    assertEquals(1, stats.getShardingKey());
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemStatus(com.google.api.services.dataflow.model.WorkItemStatus) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) Test(org.junit.Test)

Example 15 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class StreamingDataflowWorkerTest method testBasic.

@Test
public void testBasic() throws Exception {
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), makeSinkInstruction(StringUtf8Coder.of(), 0));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    server.setIsReady(false);
    StreamingConfigTask streamingConfig = new StreamingConfigTask();
    streamingConfig.setStreamingComputationConfigs(ImmutableList.of(makeDefaultStreamingComputationConfig(instructions)));
    streamingConfig.setWindmillServiceEndpoint("foo");
    WorkItem workItem = new WorkItem();
    workItem.setStreamingConfigTask(streamingConfig);
    when(mockWorkUnitClient.getGlobalStreamingConfigWorkItem()).thenReturn(Optional.of(workItem));
    StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
    StreamingDataflowWorker worker = makeWorker(instructions, options, true);
    worker.start();
    final int numIters = 2000;
    for (int i = 0; i < numIters; ++i) {
        server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
    }
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
    worker.stop();
    for (int i = 0; i < numIters; ++i) {
        assertTrue(result.containsKey((long) i));
        assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(), result.get((long) i));
    }
    verify(hotKeyLogger, atLeastOnce()).logHotKeyDetection(nullable(String.class), any());
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) StreamingConfigTask(com.google.api.services.dataflow.model.StreamingConfigTask) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) WorkItem(com.google.api.services.dataflow.model.WorkItem) StreamingDataflowWorkerOptions(org.apache.beam.runners.dataflow.worker.options.StreamingDataflowWorkerOptions) Test(org.junit.Test)

Aggregations

ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)73 Test (org.junit.Test)39 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)27 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)26 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)24 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)22 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)21 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)20 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)18 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)17 DefaultEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)17 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)16 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)12 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)12 InstructionInput (com.google.api.services.dataflow.model.InstructionInput)11 MapTask (com.google.api.services.dataflow.model.MapTask)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)11 DataflowCounterUpdateExtractor.splitIntToLong (org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong)11 WorkItemCommitRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest)11 UnsignedLong (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong)11