Search in sources :

Example 6 with StreamNode

use of org.apache.flink.streaming.api.graph.StreamNode in project flink by apache.

the class StreamGraphHasherV1 method generateDeterministicHash.

/**
	 * Generates a deterministic hash from node-local properties and input and
	 * output edges.
	 */
private byte[] generateDeterministicHash(StreamNode node, Hasher hasher, Map<Integer, byte[]> hashes, boolean isChainingEnabled) {
    // Include stream node to hash. We use the current size of the computed
    // hashes as the ID. We cannot use the node's ID, because it is
    // assigned from a static counter. This will result in two identical
    // programs having different hashes.
    generateNodeLocalHash(node, hasher, hashes.size());
    // Include chained nodes to hash
    for (StreamEdge outEdge : node.getOutEdges()) {
        if (isChainable(outEdge, isChainingEnabled)) {
            StreamNode chainedNode = outEdge.getTargetVertex();
            // Use the hash size again, because the nodes are chained to
            // this node. This does not add a hash for the chained nodes.
            generateNodeLocalHash(chainedNode, hasher, hashes.size());
        }
    }
    byte[] hash = hasher.hash().asBytes();
    // this loop (calling this method).
    for (StreamEdge inEdge : node.getInEdges()) {
        byte[] otherHash = hashes.get(inEdge.getSourceId());
        // Sanity check
        if (otherHash == null) {
            throw new IllegalStateException("Missing hash for input node " + inEdge.getSourceVertex() + ". Cannot generate hash for " + node + ".");
        }
        for (int j = 0; j < hash.length; j++) {
            hash[j] = (byte) (hash[j] * 37 ^ otherHash[j]);
        }
    }
    if (LOG.isDebugEnabled()) {
        String udfClassName = "";
        if (node.getOperator() instanceof AbstractUdfStreamOperator) {
            udfClassName = ((AbstractUdfStreamOperator<?, ?>) node.getOperator()).getUserFunction().getClass().getName();
        }
        LOG.debug("Generated hash '" + byteToHexString(hash) + "' for node " + "'" + node.toString() + "' {id: " + node.getId() + ", " + "parallelism: " + node.getParallelism() + ", " + "user function: " + udfClassName + "}");
    }
    return hash;
}
Also used : AbstractUdfStreamOperator(org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) StringUtils.byteToHexString(org.apache.flink.util.StringUtils.byteToHexString)

Example 7 with StreamNode

use of org.apache.flink.streaming.api.graph.StreamNode in project flink by apache.

the class StreamTaskTestHarness method setupOutputForSingletonOperatorChain.

/**
	 * Users of the test harness can call this utility method to setup the stream config
	 * if there will only be a single operator to be tested. The method will setup the
	 * outgoing network connection for the operator.
	 *
	 * For more advanced test cases such as testing chains of multiple operators with the harness,
	 * please manually configure the stream config.
	 */
public void setupOutputForSingletonOperatorChain() {
    streamConfig.setChainStart();
    streamConfig.setBufferTimeout(0);
    streamConfig.setTimeCharacteristic(TimeCharacteristic.EventTime);
    streamConfig.setOutputSelectors(Collections.<OutputSelector<?>>emptyList());
    streamConfig.setNumberOfOutputs(1);
    streamConfig.setTypeSerializerOut(outputSerializer);
    streamConfig.setVertexID(0);
    StreamOperator<OUT> dummyOperator = new AbstractStreamOperator<OUT>() {

        private static final long serialVersionUID = 1L;
    };
    List<StreamEdge> outEdgesInOrder = new LinkedList<StreamEdge>();
    StreamNode sourceVertexDummy = new StreamNode(null, 0, "group", dummyOperator, "source dummy", new LinkedList<OutputSelector<?>>(), SourceStreamTask.class);
    StreamNode targetVertexDummy = new StreamNode(null, 1, "group", dummyOperator, "target dummy", new LinkedList<OutputSelector<?>>(), SourceStreamTask.class);
    outEdgesInOrder.add(new StreamEdge(sourceVertexDummy, targetVertexDummy, 0, new LinkedList<String>(), new BroadcastPartitioner<Object>(), null));
    streamConfig.setOutEdgesInOrder(outEdgesInOrder);
    streamConfig.setNonChainedOutputs(outEdgesInOrder);
}
Also used : OutputSelector(org.apache.flink.streaming.api.collector.selector.OutputSelector) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) LinkedList(java.util.LinkedList) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner)

Example 8 with StreamNode

use of org.apache.flink.streaming.api.graph.StreamNode in project flink by apache.

the class OneInputStreamTaskTest method testWatermarksNotForwardedWithinChainWhenIdle.

/**
	 * This test verifies that watermarks are not forwarded when the task is idle.
	 * It also verifies that when task is idle, watermarks generated in the middle of chains are also blocked and
	 * never forwarded.
	 *
	 * The tested chain will be: (HEAD: normal operator) --> (watermark generating operator) --> (normal operator).
	 * The operators will throw an exception and fail the test if either of them were forwarded watermarks when
	 * the task is idle.
	 */
@Test
public void testWatermarksNotForwardedWithinChainWhenIdle() throws Exception {
    final OneInputStreamTask<String, String> testTask = new OneInputStreamTask<>();
    final OneInputStreamTaskTestHarness<String, String> testHarness = new OneInputStreamTaskTestHarness<String, String>(testTask, 1, 1, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    // ------------------ setup the chain ------------------
    TriggerableFailOnWatermarkTestOperator headOperator = new TriggerableFailOnWatermarkTestOperator();
    StreamConfig headOperatorConfig = testHarness.getStreamConfig();
    WatermarkGeneratingTestOperator watermarkOperator = new WatermarkGeneratingTestOperator();
    StreamConfig watermarkOperatorConfig = new StreamConfig(new Configuration());
    TriggerableFailOnWatermarkTestOperator tailOperator = new TriggerableFailOnWatermarkTestOperator();
    StreamConfig tailOperatorConfig = new StreamConfig(new Configuration());
    headOperatorConfig.setStreamOperator(headOperator);
    headOperatorConfig.setChainStart();
    headOperatorConfig.setChainIndex(0);
    headOperatorConfig.setChainedOutputs(Collections.singletonList(new StreamEdge(new StreamNode(null, 0, null, null, null, null, null), new StreamNode(null, 1, null, null, null, null, null), 0, Collections.<String>emptyList(), null, null)));
    watermarkOperatorConfig.setStreamOperator(watermarkOperator);
    watermarkOperatorConfig.setTypeSerializerIn1(StringSerializer.INSTANCE);
    watermarkOperatorConfig.setChainIndex(1);
    watermarkOperatorConfig.setChainedOutputs(Collections.singletonList(new StreamEdge(new StreamNode(null, 1, null, null, null, null, null), new StreamNode(null, 2, null, null, null, null, null), 0, Collections.<String>emptyList(), null, null)));
    List<StreamEdge> outEdgesInOrder = new LinkedList<StreamEdge>();
    outEdgesInOrder.add(new StreamEdge(new StreamNode(null, 2, null, null, null, null, null), new StreamNode(null, 3, null, null, null, null, null), 0, Collections.<String>emptyList(), new BroadcastPartitioner<Object>(), null));
    tailOperatorConfig.setStreamOperator(tailOperator);
    tailOperatorConfig.setTypeSerializerIn1(StringSerializer.INSTANCE);
    tailOperatorConfig.setBufferTimeout(0);
    tailOperatorConfig.setChainIndex(2);
    tailOperatorConfig.setChainEnd();
    tailOperatorConfig.setOutputSelectors(Collections.<OutputSelector<?>>emptyList());
    tailOperatorConfig.setNumberOfOutputs(1);
    tailOperatorConfig.setOutEdgesInOrder(outEdgesInOrder);
    tailOperatorConfig.setNonChainedOutputs(outEdgesInOrder);
    tailOperatorConfig.setTypeSerializerOut(StringSerializer.INSTANCE);
    Map<Integer, StreamConfig> chainedConfigs = new HashMap<>(2);
    chainedConfigs.put(1, watermarkOperatorConfig);
    chainedConfigs.put(2, tailOperatorConfig);
    headOperatorConfig.setTransitiveChainedTaskConfigs(chainedConfigs);
    headOperatorConfig.setOutEdgesInOrder(outEdgesInOrder);
    // -----------------------------------------------------
    // --------------------- begin test ---------------------
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<Object>();
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    // the task starts as active, so all generated watermarks should be forwarded
    testHarness.processElement(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
    testHarness.processElement(new StreamRecord<>("10"), 0, 0);
    // this watermark will be forwarded since the task is currently active,
    // but should not be in the final output because it should be blocked by the watermark generator in the chain
    testHarness.processElement(new Watermark(15));
    testHarness.processElement(new StreamRecord<>("20"), 0, 0);
    testHarness.processElement(new StreamRecord<>("30"), 0, 0);
    testHarness.waitForInputProcessing();
    expectedOutput.add(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
    expectedOutput.add(new StreamRecord<>("10"));
    expectedOutput.add(new Watermark(10));
    expectedOutput.add(new StreamRecord<>("20"));
    expectedOutput.add(new Watermark(20));
    expectedOutput.add(new StreamRecord<>("30"));
    expectedOutput.add(new Watermark(30));
    TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
    // now, toggle the task to be idle, and let the watermark generator produce some watermarks
    testHarness.processElement(StreamStatus.IDLE);
    // after this, the operators will throw an exception if they are forwarded watermarks anywhere in the chain
    testHarness.processElement(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.NO_FORWARDED_WATERMARKS_MARKER));
    // NOTE: normally, tasks will not have records to process while idle;
    // we're doing this here only to mimic watermark generating in operators
    testHarness.processElement(new StreamRecord<>("40"), 0, 0);
    testHarness.processElement(new StreamRecord<>("50"), 0, 0);
    testHarness.processElement(new StreamRecord<>("60"), 0, 0);
    // the test will fail if any of the operators were forwarded this
    testHarness.processElement(new Watermark(65));
    testHarness.waitForInputProcessing();
    // the 40 - 60 watermarks should not be forwarded, only the stream status toggle element and records
    expectedOutput.add(StreamStatus.IDLE);
    expectedOutput.add(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.NO_FORWARDED_WATERMARKS_MARKER));
    expectedOutput.add(new StreamRecord<>("40"));
    expectedOutput.add(new StreamRecord<>("50"));
    expectedOutput.add(new StreamRecord<>("60"));
    TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
    // re-toggle the task to be active and see if new watermarks are correctly forwarded again
    testHarness.processElement(StreamStatus.ACTIVE);
    testHarness.processElement(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
    testHarness.processElement(new StreamRecord<>("70"), 0, 0);
    testHarness.processElement(new StreamRecord<>("80"), 0, 0);
    testHarness.processElement(new StreamRecord<>("90"), 0, 0);
    testHarness.waitForInputProcessing();
    expectedOutput.add(StreamStatus.ACTIVE);
    expectedOutput.add(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
    expectedOutput.add(new StreamRecord<>("70"));
    expectedOutput.add(new Watermark(70));
    expectedOutput.add(new StreamRecord<>("80"));
    expectedOutput.add(new Watermark(80));
    expectedOutput.add(new StreamRecord<>("90"));
    expectedOutput.add(new Watermark(90));
    TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.endInput();
    testHarness.waitForTaskCompletion();
    List<String> resultElements = TestHarnessUtil.getRawElementsFromOutput(testHarness.getOutput());
    assertEquals(12, resultElements.size());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 9 with StreamNode

use of org.apache.flink.streaming.api.graph.StreamNode in project flink by apache.

the class OneInputStreamTaskTest method configureChainedTestingStreamOperator.

//==============================================================================================
// Utility functions and classes
//==============================================================================================
private void configureChainedTestingStreamOperator(StreamConfig streamConfig, int numberChainedTasks, long seed, long recoveryTimestamp) {
    Preconditions.checkArgument(numberChainedTasks >= 1, "The operator chain must at least " + "contain one operator.");
    Random random = new Random(seed);
    TestingStreamOperator<Integer, Integer> previousOperator = new TestingStreamOperator<>(random.nextLong(), recoveryTimestamp);
    streamConfig.setStreamOperator(previousOperator);
    // create the chain of operators
    Map<Integer, StreamConfig> chainedTaskConfigs = new HashMap<>(numberChainedTasks - 1);
    List<StreamEdge> outputEdges = new ArrayList<>(numberChainedTasks - 1);
    for (int chainedIndex = 1; chainedIndex < numberChainedTasks; chainedIndex++) {
        TestingStreamOperator<Integer, Integer> chainedOperator = new TestingStreamOperator<>(random.nextLong(), recoveryTimestamp);
        StreamConfig chainedConfig = new StreamConfig(new Configuration());
        chainedConfig.setStreamOperator(chainedOperator);
        chainedTaskConfigs.put(chainedIndex, chainedConfig);
        StreamEdge outputEdge = new StreamEdge(new StreamNode(null, chainedIndex - 1, null, null, null, null, null), new StreamNode(null, chainedIndex, null, null, null, null, null), 0, Collections.<String>emptyList(), null, null);
        outputEdges.add(outputEdge);
    }
    streamConfig.setChainedOutputs(outputEdges);
    streamConfig.setTransitiveChainedTaskConfigs(chainedTaskConfigs);
}
Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamNode(org.apache.flink.streaming.api.graph.StreamNode)

Example 10 with StreamNode

use of org.apache.flink.streaming.api.graph.StreamNode in project flink by apache.

the class IterateITCase method testmultipleHeadsTailsSimple.

@Test
public void testmultipleHeadsTailsSimple() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> source1 = env.fromElements(1, 2, 3, 4, 5).shuffle().map(NoOpIntMap).name("ParallelizeMapShuffle");
    DataStream<Integer> source2 = env.fromElements(1, 2, 3, 4, 5).map(NoOpIntMap).name("ParallelizeMapRebalance");
    IterativeStream<Integer> iter1 = source1.union(source2).iterate();
    DataStream<Integer> head1 = iter1.map(NoOpIntMap).name("IterRebalanceMap").setParallelism(DEFAULT_PARALLELISM / 2);
    DataStream<Integer> head2 = iter1.map(NoOpIntMap).name("IterForwardMap");
    DataStreamSink<Integer> head3 = iter1.map(NoOpIntMap).setParallelism(DEFAULT_PARALLELISM / 2).addSink(new ReceiveCheckNoOpSink<Integer>());
    DataStreamSink<Integer> head4 = iter1.map(NoOpIntMap).addSink(new ReceiveCheckNoOpSink<Integer>());
    SplitStream<Integer> source3 = env.fromElements(1, 2, 3, 4, 5).map(NoOpIntMap).name("EvenOddSourceMap").split(new EvenOddOutputSelector());
    iter1.closeWith(source3.select("even").union(head1.rebalance().map(NoOpIntMap).broadcast(), head2.shuffle()));
    StreamGraph graph = env.getStreamGraph();
    JobGraph jg = graph.getJobGraph();
    assertEquals(1, graph.getIterationSourceSinkPairs().size());
    Tuple2<StreamNode, StreamNode> sourceSinkPair = graph.getIterationSourceSinkPairs().iterator().next();
    StreamNode itSource = sourceSinkPair.f0;
    StreamNode itSink = sourceSinkPair.f1;
    assertEquals(4, itSource.getOutEdges().size());
    assertEquals(3, itSink.getInEdges().size());
    assertEquals(itSource.getParallelism(), itSink.getParallelism());
    for (StreamEdge edge : itSource.getOutEdges()) {
        if (edge.getTargetVertex().getOperatorName().equals("IterRebalanceMap")) {
            assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
        } else if (edge.getTargetVertex().getOperatorName().equals("IterForwardMap")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        }
    }
    for (StreamEdge edge : itSink.getInEdges()) {
        if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("ParallelizeMapShuffle")) {
            assertTrue(edge.getPartitioner() instanceof ShufflePartitioner);
        }
        if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("ParallelizeMapForward")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        }
        if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("EvenOddSourceMap")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
            assertTrue(edge.getSelectedNames().contains("even"));
        }
    }
    // Test co-location
    JobVertex itSource1 = null;
    JobVertex itSink1 = null;
    for (JobVertex vertex : jg.getVertices()) {
        if (vertex.getName().contains("IterationSource")) {
            itSource1 = vertex;
        } else if (vertex.getName().contains("IterationSink")) {
            itSink1 = vertex;
        }
    }
    assertTrue(itSource1.getCoLocationGroup() != null);
    assertEquals(itSource1.getCoLocationGroup(), itSink1.getCoLocationGroup());
}
Also used : RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) EvenOddOutputSelector(org.apache.flink.test.streaming.runtime.util.EvenOddOutputSelector) Test(org.junit.Test)

Aggregations

StreamNode (org.apache.flink.streaming.api.graph.StreamNode)10 StreamEdge (org.apache.flink.streaming.api.graph.StreamEdge)8 BroadcastPartitioner (org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner)4 Test (org.junit.Test)4 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)3 StreamGraph (org.apache.flink.streaming.api.graph.StreamGraph)3 ForwardPartitioner (org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner)3 LinkedList (java.util.LinkedList)2 Configuration (org.apache.flink.configuration.Configuration)2 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)2 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)2 OutputSelector (org.apache.flink.streaming.api.collector.selector.OutputSelector)2 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)2 AbstractStreamOperator (org.apache.flink.streaming.api.operators.AbstractStreamOperator)2 RebalancePartitioner (org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner)2 ShufflePartitioner (org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner)2 EvenOddOutputSelector (org.apache.flink.test.streaming.runtime.util.EvenOddOutputSelector)2 HashFunction (com.google.common.hash.HashFunction)1 ArrayDeque (java.util.ArrayDeque)1 ArrayList (java.util.ArrayList)1