use of org.apache.flink.streaming.api.graph.StreamNode in project flink by apache.
the class StreamGraphHasherV1 method generateDeterministicHash.
/**
* Generates a deterministic hash from node-local properties and input and
* output edges.
*/
private byte[] generateDeterministicHash(StreamNode node, Hasher hasher, Map<Integer, byte[]> hashes, boolean isChainingEnabled) {
// Include stream node to hash. We use the current size of the computed
// hashes as the ID. We cannot use the node's ID, because it is
// assigned from a static counter. This will result in two identical
// programs having different hashes.
generateNodeLocalHash(node, hasher, hashes.size());
// Include chained nodes to hash
for (StreamEdge outEdge : node.getOutEdges()) {
if (isChainable(outEdge, isChainingEnabled)) {
StreamNode chainedNode = outEdge.getTargetVertex();
// Use the hash size again, because the nodes are chained to
// this node. This does not add a hash for the chained nodes.
generateNodeLocalHash(chainedNode, hasher, hashes.size());
}
}
byte[] hash = hasher.hash().asBytes();
// this loop (calling this method).
for (StreamEdge inEdge : node.getInEdges()) {
byte[] otherHash = hashes.get(inEdge.getSourceId());
// Sanity check
if (otherHash == null) {
throw new IllegalStateException("Missing hash for input node " + inEdge.getSourceVertex() + ". Cannot generate hash for " + node + ".");
}
for (int j = 0; j < hash.length; j++) {
hash[j] = (byte) (hash[j] * 37 ^ otherHash[j]);
}
}
if (LOG.isDebugEnabled()) {
String udfClassName = "";
if (node.getOperator() instanceof AbstractUdfStreamOperator) {
udfClassName = ((AbstractUdfStreamOperator<?, ?>) node.getOperator()).getUserFunction().getClass().getName();
}
LOG.debug("Generated hash '" + byteToHexString(hash) + "' for node " + "'" + node.toString() + "' {id: " + node.getId() + ", " + "parallelism: " + node.getParallelism() + ", " + "user function: " + udfClassName + "}");
}
return hash;
}
use of org.apache.flink.streaming.api.graph.StreamNode in project flink by apache.
the class StreamTaskTestHarness method setupOutputForSingletonOperatorChain.
/**
* Users of the test harness can call this utility method to setup the stream config
* if there will only be a single operator to be tested. The method will setup the
* outgoing network connection for the operator.
*
* For more advanced test cases such as testing chains of multiple operators with the harness,
* please manually configure the stream config.
*/
public void setupOutputForSingletonOperatorChain() {
streamConfig.setChainStart();
streamConfig.setBufferTimeout(0);
streamConfig.setTimeCharacteristic(TimeCharacteristic.EventTime);
streamConfig.setOutputSelectors(Collections.<OutputSelector<?>>emptyList());
streamConfig.setNumberOfOutputs(1);
streamConfig.setTypeSerializerOut(outputSerializer);
streamConfig.setVertexID(0);
StreamOperator<OUT> dummyOperator = new AbstractStreamOperator<OUT>() {
private static final long serialVersionUID = 1L;
};
List<StreamEdge> outEdgesInOrder = new LinkedList<StreamEdge>();
StreamNode sourceVertexDummy = new StreamNode(null, 0, "group", dummyOperator, "source dummy", new LinkedList<OutputSelector<?>>(), SourceStreamTask.class);
StreamNode targetVertexDummy = new StreamNode(null, 1, "group", dummyOperator, "target dummy", new LinkedList<OutputSelector<?>>(), SourceStreamTask.class);
outEdgesInOrder.add(new StreamEdge(sourceVertexDummy, targetVertexDummy, 0, new LinkedList<String>(), new BroadcastPartitioner<Object>(), null));
streamConfig.setOutEdgesInOrder(outEdgesInOrder);
streamConfig.setNonChainedOutputs(outEdgesInOrder);
}
use of org.apache.flink.streaming.api.graph.StreamNode in project flink by apache.
the class OneInputStreamTaskTest method testWatermarksNotForwardedWithinChainWhenIdle.
/**
* This test verifies that watermarks are not forwarded when the task is idle.
* It also verifies that when task is idle, watermarks generated in the middle of chains are also blocked and
* never forwarded.
*
* The tested chain will be: (HEAD: normal operator) --> (watermark generating operator) --> (normal operator).
* The operators will throw an exception and fail the test if either of them were forwarded watermarks when
* the task is idle.
*/
@Test
public void testWatermarksNotForwardedWithinChainWhenIdle() throws Exception {
final OneInputStreamTask<String, String> testTask = new OneInputStreamTask<>();
final OneInputStreamTaskTestHarness<String, String> testHarness = new OneInputStreamTaskTestHarness<String, String>(testTask, 1, 1, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
// ------------------ setup the chain ------------------
TriggerableFailOnWatermarkTestOperator headOperator = new TriggerableFailOnWatermarkTestOperator();
StreamConfig headOperatorConfig = testHarness.getStreamConfig();
WatermarkGeneratingTestOperator watermarkOperator = new WatermarkGeneratingTestOperator();
StreamConfig watermarkOperatorConfig = new StreamConfig(new Configuration());
TriggerableFailOnWatermarkTestOperator tailOperator = new TriggerableFailOnWatermarkTestOperator();
StreamConfig tailOperatorConfig = new StreamConfig(new Configuration());
headOperatorConfig.setStreamOperator(headOperator);
headOperatorConfig.setChainStart();
headOperatorConfig.setChainIndex(0);
headOperatorConfig.setChainedOutputs(Collections.singletonList(new StreamEdge(new StreamNode(null, 0, null, null, null, null, null), new StreamNode(null, 1, null, null, null, null, null), 0, Collections.<String>emptyList(), null, null)));
watermarkOperatorConfig.setStreamOperator(watermarkOperator);
watermarkOperatorConfig.setTypeSerializerIn1(StringSerializer.INSTANCE);
watermarkOperatorConfig.setChainIndex(1);
watermarkOperatorConfig.setChainedOutputs(Collections.singletonList(new StreamEdge(new StreamNode(null, 1, null, null, null, null, null), new StreamNode(null, 2, null, null, null, null, null), 0, Collections.<String>emptyList(), null, null)));
List<StreamEdge> outEdgesInOrder = new LinkedList<StreamEdge>();
outEdgesInOrder.add(new StreamEdge(new StreamNode(null, 2, null, null, null, null, null), new StreamNode(null, 3, null, null, null, null, null), 0, Collections.<String>emptyList(), new BroadcastPartitioner<Object>(), null));
tailOperatorConfig.setStreamOperator(tailOperator);
tailOperatorConfig.setTypeSerializerIn1(StringSerializer.INSTANCE);
tailOperatorConfig.setBufferTimeout(0);
tailOperatorConfig.setChainIndex(2);
tailOperatorConfig.setChainEnd();
tailOperatorConfig.setOutputSelectors(Collections.<OutputSelector<?>>emptyList());
tailOperatorConfig.setNumberOfOutputs(1);
tailOperatorConfig.setOutEdgesInOrder(outEdgesInOrder);
tailOperatorConfig.setNonChainedOutputs(outEdgesInOrder);
tailOperatorConfig.setTypeSerializerOut(StringSerializer.INSTANCE);
Map<Integer, StreamConfig> chainedConfigs = new HashMap<>(2);
chainedConfigs.put(1, watermarkOperatorConfig);
chainedConfigs.put(2, tailOperatorConfig);
headOperatorConfig.setTransitiveChainedTaskConfigs(chainedConfigs);
headOperatorConfig.setOutEdgesInOrder(outEdgesInOrder);
// -----------------------------------------------------
// --------------------- begin test ---------------------
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<Object>();
testHarness.invoke();
testHarness.waitForTaskRunning();
// the task starts as active, so all generated watermarks should be forwarded
testHarness.processElement(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
testHarness.processElement(new StreamRecord<>("10"), 0, 0);
// this watermark will be forwarded since the task is currently active,
// but should not be in the final output because it should be blocked by the watermark generator in the chain
testHarness.processElement(new Watermark(15));
testHarness.processElement(new StreamRecord<>("20"), 0, 0);
testHarness.processElement(new StreamRecord<>("30"), 0, 0);
testHarness.waitForInputProcessing();
expectedOutput.add(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
expectedOutput.add(new StreamRecord<>("10"));
expectedOutput.add(new Watermark(10));
expectedOutput.add(new StreamRecord<>("20"));
expectedOutput.add(new Watermark(20));
expectedOutput.add(new StreamRecord<>("30"));
expectedOutput.add(new Watermark(30));
TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
// now, toggle the task to be idle, and let the watermark generator produce some watermarks
testHarness.processElement(StreamStatus.IDLE);
// after this, the operators will throw an exception if they are forwarded watermarks anywhere in the chain
testHarness.processElement(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.NO_FORWARDED_WATERMARKS_MARKER));
// NOTE: normally, tasks will not have records to process while idle;
// we're doing this here only to mimic watermark generating in operators
testHarness.processElement(new StreamRecord<>("40"), 0, 0);
testHarness.processElement(new StreamRecord<>("50"), 0, 0);
testHarness.processElement(new StreamRecord<>("60"), 0, 0);
// the test will fail if any of the operators were forwarded this
testHarness.processElement(new Watermark(65));
testHarness.waitForInputProcessing();
// the 40 - 60 watermarks should not be forwarded, only the stream status toggle element and records
expectedOutput.add(StreamStatus.IDLE);
expectedOutput.add(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.NO_FORWARDED_WATERMARKS_MARKER));
expectedOutput.add(new StreamRecord<>("40"));
expectedOutput.add(new StreamRecord<>("50"));
expectedOutput.add(new StreamRecord<>("60"));
TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
// re-toggle the task to be active and see if new watermarks are correctly forwarded again
testHarness.processElement(StreamStatus.ACTIVE);
testHarness.processElement(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
testHarness.processElement(new StreamRecord<>("70"), 0, 0);
testHarness.processElement(new StreamRecord<>("80"), 0, 0);
testHarness.processElement(new StreamRecord<>("90"), 0, 0);
testHarness.waitForInputProcessing();
expectedOutput.add(StreamStatus.ACTIVE);
expectedOutput.add(new StreamRecord<>(TriggerableFailOnWatermarkTestOperator.EXPECT_FORWARDED_WATERMARKS_MARKER));
expectedOutput.add(new StreamRecord<>("70"));
expectedOutput.add(new Watermark(70));
expectedOutput.add(new StreamRecord<>("80"));
expectedOutput.add(new Watermark(80));
expectedOutput.add(new StreamRecord<>("90"));
expectedOutput.add(new Watermark(90));
TestHarnessUtil.assertOutputEquals("Output was not correct.", expectedOutput, testHarness.getOutput());
testHarness.endInput();
testHarness.waitForTaskCompletion();
List<String> resultElements = TestHarnessUtil.getRawElementsFromOutput(testHarness.getOutput());
assertEquals(12, resultElements.size());
}
use of org.apache.flink.streaming.api.graph.StreamNode in project flink by apache.
the class OneInputStreamTaskTest method configureChainedTestingStreamOperator.
//==============================================================================================
// Utility functions and classes
//==============================================================================================
private void configureChainedTestingStreamOperator(StreamConfig streamConfig, int numberChainedTasks, long seed, long recoveryTimestamp) {
Preconditions.checkArgument(numberChainedTasks >= 1, "The operator chain must at least " + "contain one operator.");
Random random = new Random(seed);
TestingStreamOperator<Integer, Integer> previousOperator = new TestingStreamOperator<>(random.nextLong(), recoveryTimestamp);
streamConfig.setStreamOperator(previousOperator);
// create the chain of operators
Map<Integer, StreamConfig> chainedTaskConfigs = new HashMap<>(numberChainedTasks - 1);
List<StreamEdge> outputEdges = new ArrayList<>(numberChainedTasks - 1);
for (int chainedIndex = 1; chainedIndex < numberChainedTasks; chainedIndex++) {
TestingStreamOperator<Integer, Integer> chainedOperator = new TestingStreamOperator<>(random.nextLong(), recoveryTimestamp);
StreamConfig chainedConfig = new StreamConfig(new Configuration());
chainedConfig.setStreamOperator(chainedOperator);
chainedTaskConfigs.put(chainedIndex, chainedConfig);
StreamEdge outputEdge = new StreamEdge(new StreamNode(null, chainedIndex - 1, null, null, null, null, null), new StreamNode(null, chainedIndex, null, null, null, null, null), 0, Collections.<String>emptyList(), null, null);
outputEdges.add(outputEdge);
}
streamConfig.setChainedOutputs(outputEdges);
streamConfig.setTransitiveChainedTaskConfigs(chainedTaskConfigs);
}
use of org.apache.flink.streaming.api.graph.StreamNode in project flink by apache.
the class IterateITCase method testmultipleHeadsTailsSimple.
@Test
public void testmultipleHeadsTailsSimple() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source1 = env.fromElements(1, 2, 3, 4, 5).shuffle().map(NoOpIntMap).name("ParallelizeMapShuffle");
DataStream<Integer> source2 = env.fromElements(1, 2, 3, 4, 5).map(NoOpIntMap).name("ParallelizeMapRebalance");
IterativeStream<Integer> iter1 = source1.union(source2).iterate();
DataStream<Integer> head1 = iter1.map(NoOpIntMap).name("IterRebalanceMap").setParallelism(DEFAULT_PARALLELISM / 2);
DataStream<Integer> head2 = iter1.map(NoOpIntMap).name("IterForwardMap");
DataStreamSink<Integer> head3 = iter1.map(NoOpIntMap).setParallelism(DEFAULT_PARALLELISM / 2).addSink(new ReceiveCheckNoOpSink<Integer>());
DataStreamSink<Integer> head4 = iter1.map(NoOpIntMap).addSink(new ReceiveCheckNoOpSink<Integer>());
SplitStream<Integer> source3 = env.fromElements(1, 2, 3, 4, 5).map(NoOpIntMap).name("EvenOddSourceMap").split(new EvenOddOutputSelector());
iter1.closeWith(source3.select("even").union(head1.rebalance().map(NoOpIntMap).broadcast(), head2.shuffle()));
StreamGraph graph = env.getStreamGraph();
JobGraph jg = graph.getJobGraph();
assertEquals(1, graph.getIterationSourceSinkPairs().size());
Tuple2<StreamNode, StreamNode> sourceSinkPair = graph.getIterationSourceSinkPairs().iterator().next();
StreamNode itSource = sourceSinkPair.f0;
StreamNode itSink = sourceSinkPair.f1;
assertEquals(4, itSource.getOutEdges().size());
assertEquals(3, itSink.getInEdges().size());
assertEquals(itSource.getParallelism(), itSink.getParallelism());
for (StreamEdge edge : itSource.getOutEdges()) {
if (edge.getTargetVertex().getOperatorName().equals("IterRebalanceMap")) {
assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
} else if (edge.getTargetVertex().getOperatorName().equals("IterForwardMap")) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
}
}
for (StreamEdge edge : itSink.getInEdges()) {
if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("ParallelizeMapShuffle")) {
assertTrue(edge.getPartitioner() instanceof ShufflePartitioner);
}
if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("ParallelizeMapForward")) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
}
if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("EvenOddSourceMap")) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
assertTrue(edge.getSelectedNames().contains("even"));
}
}
// Test co-location
JobVertex itSource1 = null;
JobVertex itSink1 = null;
for (JobVertex vertex : jg.getVertices()) {
if (vertex.getName().contains("IterationSource")) {
itSource1 = vertex;
} else if (vertex.getName().contains("IterationSink")) {
itSink1 = vertex;
}
}
assertTrue(itSource1.getCoLocationGroup() != null);
assertEquals(itSource1.getCoLocationGroup(), itSink1.getCoLocationGroup());
}
Aggregations