Search in sources :

Example 11 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class OneInputStreamTaskTest method testWatermarksNotForwardedWithinChainWhenIdle.

/**
	 * This test verifies that watermarks are not forwarded when the task is idle.
	 * It also verifies that when task is idle, watermarks generated in the middle of chains are also blocked and
	 * never forwarded.
	 *
	 * The tested chain will be: (HEAD: normal operator) --> (watermark generating operator) --> (normal operator).
	 * The operators will throw an exception and fail the test if either of them were forwarded watermarks when
	 * the task is idle.
	 */
@Test
public void testWatermarksNotForwardedWithinChainWhenIdle() throws Exception {
    final OneInputStreamTask<String, String> testTask = new OneInputStreamTask<>();
    final OneInputStreamTaskTestHarness<String, String> testHarness = new OneInputStreamTaskTestHarness<String, String>(testTask, 1, 1, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    // ------------------ setup the chain ------------------
    TriggerableFailOnWatermarkTestOperator headOperator = new TriggerableFailOnWatermarkTestOperator();
    StreamConfig headOperatorConfig = testHarness.getStreamConfig();
    WatermarkGeneratingTestOperator watermarkOperator = new WatermarkGeneratingTestOperator();
    StreamConfig watermarkOperatorConfig = new StreamConfig(new Configuration());
    TriggerableFailOnWatermarkTestOperator tailOperator = new TriggerableFailOnWatermarkTestOperator();
    StreamConfig tailOperatorConfig = new StreamConfig(new Configuration());
    headOperatorConfig.setStreamOperator(headOperator);
    headOperatorConfig.setChainStart();
    headOperatorConfig.setChainIndex(0);
    headOperatorConfig.setChainedOutputs(Collections.singletonList(new StreamEdge(new StreamNode(null, 0, null, null, null, null, null), new StreamNode(null, 1, null, null, null, null, null), 0, Collections.<String>emptyList(), null, null)));
    watermarkOperatorConfig.setStreamOperator(watermarkOperator);
    watermarkOperatorConfig.setTypeSerializerIn1(StringSerializer.INSTANCE);
    watermarkOperatorConfig.setChainIndex(1);
    watermarkOperatorConfig.setChainedOutputs(Collections.singletonList(new StreamEdge(new StreamNode(null, 1, null, null, null, null, null), new StreamNode(null, 2, null, null, null, null, null), 0, Collections.<String>emptyList(), null, null)));
    List<StreamEdge> outEdgesInOrder = new LinkedList<StreamEdge>();
    outEdgesInOrder.add(new StreamEdge(new StreamNode(null, 2, null, null, null, null, null), new StreamNode(null, 3, null, null, null, null, null), 0, Collections.<String>emptyList(), new BroadcastPartitioner<Object>(), null));
    tailOperatorConfig.setStreamOperator(tailOperator);
    tailOperatorConfig.setTypeSerializerIn1(StringSerializer.INSTANCE);
    tailOperatorConfig.setBufferTimeout(0);
    tailOperatorConfig.setChainIndex(2);
    tailOperatorConfig.setChainEnd();
    tailOperatorConfig.setOutputSelectors(Collections.<OutputSelector<?>>emptyList());
    tailOperatorConfig.setNumberOfOutputs(1);
    tailOperatorConfig.setOutEdgesInOrder(outEdgesInOrder);
    tailOperatorConfig.setNonChainedOutputs(outEdgesInOrder);
    tailOperatorConfig.setTypeSerializerOut(StringSerializer.INSTANCE);
    Map<Integer, StreamConfig> chainedConfigs = new HashMap<>(2);
    chainedConfigs.put(1, watermarkOperatorConfig);
    chainedConfigs.put(2, tailOperatorConfig);
    headOperatorConfig.setTransitiveChainedTaskConfigs(chainedConfigs);
    headOperatorConfig.setOutEdgesInOrder(outEdgesInOrder);
    // -----------------------------------------------------
    // --------------------- begin test ---------------------
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<Object>();
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    // the task starts as active, so all generated watermarks should be forwarded
    testHarness.processElement(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
    testHarness.processElement(new StreamRecord<>("10"), 0, 0);
    // this watermark will be forwarded since the task is currently active,
    // but should not be in the final output because it should be blocked by the watermark generator in the chain
    testHarness.processElement(new Watermark(15));
    testHarness.processElement(new StreamRecord<>("20"), 0, 0);
    testHarness.processElement(new StreamRecord<>("30"), 0, 0);
    testHarness.waitForInputProcessing();
    expectedOutput.add(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
    expectedOutput.add(new StreamRecord<>("10"));
    expectedOutput.add(new Watermark(10));
    expectedOutput.add(new StreamRecord<>("20"));
    expectedOutput.add(new Watermark(20));
    expectedOutput.add(new StreamRecord<>("30"));
    expectedOutput.add(new Watermark(30));
    TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
    // now, toggle the task to be idle, and let the watermark generator produce some watermarks
    testHarness.processElement(StreamStatus.IDLE);
    // after this, the operators will throw an exception if they are forwarded watermarks anywhere in the chain
    testHarness.processElement(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.NO_FORWARDED_WATERMARKS_MARKER));
    // NOTE: normally, tasks will not have records to process while idle;
    // we're doing this here only to mimic watermark generating in operators
    testHarness.processElement(new StreamRecord<>("40"), 0, 0);
    testHarness.processElement(new StreamRecord<>("50"), 0, 0);
    testHarness.processElement(new StreamRecord<>("60"), 0, 0);
    // the test will fail if any of the operators were forwarded this
    testHarness.processElement(new Watermark(65));
    testHarness.waitForInputProcessing();
    // the 40 - 60 watermarks should not be forwarded, only the stream status toggle element and records
    expectedOutput.add(StreamStatus.IDLE);
    expectedOutput.add(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.NO_FORWARDED_WATERMARKS_MARKER));
    expectedOutput.add(new StreamRecord<>("40"));
    expectedOutput.add(new StreamRecord<>("50"));
    expectedOutput.add(new StreamRecord<>("60"));
    TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
    // re-toggle the task to be active and see if new watermarks are correctly forwarded again
    testHarness.processElement(StreamStatus.ACTIVE);
    testHarness.processElement(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
    testHarness.processElement(new StreamRecord<>("70"), 0, 0);
    testHarness.processElement(new StreamRecord<>("80"), 0, 0);
    testHarness.processElement(new StreamRecord<>("90"), 0, 0);
    testHarness.waitForInputProcessing();
    expectedOutput.add(StreamStatus.ACTIVE);
    expectedOutput.add(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
    expectedOutput.add(new StreamRecord<>("70"));
    expectedOutput.add(new Watermark(70));
    expectedOutput.add(new StreamRecord<>("80"));
    expectedOutput.add(new Watermark(80));
    expectedOutput.add(new StreamRecord<>("90"));
    expectedOutput.add(new Watermark(90));
    TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.endInput();
    testHarness.waitForTaskCompletion();
    List<String> resultElements = TestHarnessUtil.getRawElementsFromOutput(testHarness.getOutput());
    assertEquals(12, resultElements.size());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 12 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class OneInputStreamTaskTest method configureChainedTestingStreamOperator.

//==============================================================================================
// Utility functions and classes
//==============================================================================================
private void configureChainedTestingStreamOperator(StreamConfig streamConfig, int numberChainedTasks, long seed, long recoveryTimestamp) {
    Preconditions.checkArgument(numberChainedTasks >= 1, "The operator chain must at least " + "contain one operator.");
    Random random = new Random(seed);
    TestingStreamOperator<Integer, Integer> previousOperator = new TestingStreamOperator<>(random.nextLong(), recoveryTimestamp);
    streamConfig.setStreamOperator(previousOperator);
    // create the chain of operators
    Map<Integer, StreamConfig> chainedTaskConfigs = new HashMap<>(numberChainedTasks - 1);
    List<StreamEdge> outputEdges = new ArrayList<>(numberChainedTasks - 1);
    for (int chainedIndex = 1; chainedIndex < numberChainedTasks; chainedIndex++) {
        TestingStreamOperator<Integer, Integer> chainedOperator = new TestingStreamOperator<>(random.nextLong(), recoveryTimestamp);
        StreamConfig chainedConfig = new StreamConfig(new Configuration());
        chainedConfig.setStreamOperator(chainedOperator);
        chainedTaskConfigs.put(chainedIndex, chainedConfig);
        StreamEdge outputEdge = new StreamEdge(new StreamNode(null, chainedIndex - 1, null, null, null, null, null), new StreamNode(null, chainedIndex, null, null, null, null, null), 0, Collections.<String>emptyList(), null, null);
        outputEdges.add(outputEdge);
    }
    streamConfig.setChainedOutputs(outputEdges);
    streamConfig.setTransitiveChainedTaskConfigs(chainedTaskConfigs);
}
Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamNode(org.apache.flink.streaming.api.graph.StreamNode)

Example 13 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class IterateITCase method testmultipleHeadsTailsSimple.

@Test
public void testmultipleHeadsTailsSimple() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> source1 = env.fromElements(1, 2, 3, 4, 5).shuffle().map(NoOpIntMap).name("ParallelizeMapShuffle");
    DataStream<Integer> source2 = env.fromElements(1, 2, 3, 4, 5).map(NoOpIntMap).name("ParallelizeMapRebalance");
    IterativeStream<Integer> iter1 = source1.union(source2).iterate();
    DataStream<Integer> head1 = iter1.map(NoOpIntMap).name("IterRebalanceMap").setParallelism(DEFAULT_PARALLELISM / 2);
    DataStream<Integer> head2 = iter1.map(NoOpIntMap).name("IterForwardMap");
    DataStreamSink<Integer> head3 = iter1.map(NoOpIntMap).setParallelism(DEFAULT_PARALLELISM / 2).addSink(new ReceiveCheckNoOpSink<Integer>());
    DataStreamSink<Integer> head4 = iter1.map(NoOpIntMap).addSink(new ReceiveCheckNoOpSink<Integer>());
    SplitStream<Integer> source3 = env.fromElements(1, 2, 3, 4, 5).map(NoOpIntMap).name("EvenOddSourceMap").split(new EvenOddOutputSelector());
    iter1.closeWith(source3.select("even").union(head1.rebalance().map(NoOpIntMap).broadcast(), head2.shuffle()));
    StreamGraph graph = env.getStreamGraph();
    JobGraph jg = graph.getJobGraph();
    assertEquals(1, graph.getIterationSourceSinkPairs().size());
    Tuple2<StreamNode, StreamNode> sourceSinkPair = graph.getIterationSourceSinkPairs().iterator().next();
    StreamNode itSource = sourceSinkPair.f0;
    StreamNode itSink = sourceSinkPair.f1;
    assertEquals(4, itSource.getOutEdges().size());
    assertEquals(3, itSink.getInEdges().size());
    assertEquals(itSource.getParallelism(), itSink.getParallelism());
    for (StreamEdge edge : itSource.getOutEdges()) {
        if (edge.getTargetVertex().getOperatorName().equals("IterRebalanceMap")) {
            assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
        } else if (edge.getTargetVertex().getOperatorName().equals("IterForwardMap")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        }
    }
    for (StreamEdge edge : itSink.getInEdges()) {
        if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("ParallelizeMapShuffle")) {
            assertTrue(edge.getPartitioner() instanceof ShufflePartitioner);
        }
        if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("ParallelizeMapForward")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        }
        if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("EvenOddSourceMap")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
            assertTrue(edge.getSelectedNames().contains("even"));
        }
    }
    // Test co-location
    JobVertex itSource1 = null;
    JobVertex itSink1 = null;
    for (JobVertex vertex : jg.getVertices()) {
        if (vertex.getName().contains("IterationSource")) {
            itSource1 = vertex;
        } else if (vertex.getName().contains("IterationSink")) {
            itSink1 = vertex;
        }
    }
    assertTrue(itSource1.getCoLocationGroup() != null);
    assertEquals(itSource1.getCoLocationGroup(), itSink1.getCoLocationGroup());
}
Also used : RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) EvenOddOutputSelector(org.apache.flink.test.streaming.runtime.util.EvenOddOutputSelector) Test(org.junit.Test)

Aggregations

StreamEdge (org.apache.flink.streaming.api.graph.StreamEdge)13 StreamNode (org.apache.flink.streaming.api.graph.StreamNode)8 BroadcastPartitioner (org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner)5 Test (org.junit.Test)5 OutputSelector (org.apache.flink.streaming.api.collector.selector.OutputSelector)4 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)4 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)4 ArrayList (java.util.ArrayList)3 StreamGraph (org.apache.flink.streaming.api.graph.StreamGraph)3 ForwardPartitioner (org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner)3 RebalancePartitioner (org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner)3 LinkedList (java.util.LinkedList)2 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)2 MapFunction (org.apache.flink.api.common.functions.MapFunction)2 Configuration (org.apache.flink.configuration.Configuration)2 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)2 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)2 CoFlatMapFunction (org.apache.flink.streaming.api.functions.co.CoFlatMapFunction)2 CoMapFunction (org.apache.flink.streaming.api.functions.co.CoMapFunction)2 AbstractStreamOperator (org.apache.flink.streaming.api.operators.AbstractStreamOperator)2