use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.
the class StreamGraphHasherV1 method traverseStreamGraphAndGenerateHashes.
@Override
public Map<Integer, byte[]> traverseStreamGraphAndGenerateHashes(StreamGraph streamGraph) {
// The hash function used to generate the hash
final HashFunction hashFunction = Hashing.murmur3_128(0);
final Map<Integer, byte[]> hashes = new HashMap<>();
Set<Integer> visited = new HashSet<>();
Queue<StreamNode> remaining = new ArrayDeque<>();
// We need to make the source order deterministic. The source IDs are
// not returned in the same order, which means that submitting the same
// program twice might result in different traversal, which breaks the
// deterministic hash assignment.
List<Integer> sources = new ArrayList<>();
for (Integer sourceNodeId : streamGraph.getSourceIDs()) {
sources.add(sourceNodeId);
}
Collections.sort(sources);
// Start with source nodes
for (Integer sourceNodeId : sources) {
remaining.add(streamGraph.getStreamNode(sourceNodeId));
visited.add(sourceNodeId);
}
StreamNode currentNode;
while ((currentNode = remaining.poll()) != null) {
// generate the hash code.
if (generateNodeHash(currentNode, hashFunction, hashes, streamGraph.isChainingEnabled())) {
// Add the child nodes
for (StreamEdge outEdge : currentNode.getOutEdges()) {
StreamNode child = outEdge.getTargetVertex();
if (!visited.contains(child.getId())) {
remaining.add(child);
visited.add(child.getId());
}
}
} else {
// We will revisit this later.
visited.remove(currentNode.getId());
}
}
return hashes;
}
use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.
the class StreamGraphHasherV1 method generateDeterministicHash.
/**
* Generates a deterministic hash from node-local properties and input and
* output edges.
*/
private byte[] generateDeterministicHash(StreamNode node, Hasher hasher, Map<Integer, byte[]> hashes, boolean isChainingEnabled) {
// Include stream node to hash. We use the current size of the computed
// hashes as the ID. We cannot use the node's ID, because it is
// assigned from a static counter. This will result in two identical
// programs having different hashes.
generateNodeLocalHash(node, hasher, hashes.size());
// Include chained nodes to hash
for (StreamEdge outEdge : node.getOutEdges()) {
if (isChainable(outEdge, isChainingEnabled)) {
StreamNode chainedNode = outEdge.getTargetVertex();
// Use the hash size again, because the nodes are chained to
// this node. This does not add a hash for the chained nodes.
generateNodeLocalHash(chainedNode, hasher, hashes.size());
}
}
byte[] hash = hasher.hash().asBytes();
// this loop (calling this method).
for (StreamEdge inEdge : node.getInEdges()) {
byte[] otherHash = hashes.get(inEdge.getSourceId());
// Sanity check
if (otherHash == null) {
throw new IllegalStateException("Missing hash for input node " + inEdge.getSourceVertex() + ". Cannot generate hash for " + node + ".");
}
for (int j = 0; j < hash.length; j++) {
hash[j] = (byte) (hash[j] * 37 ^ otherHash[j]);
}
}
if (LOG.isDebugEnabled()) {
String udfClassName = "";
if (node.getOperator() instanceof AbstractUdfStreamOperator) {
udfClassName = ((AbstractUdfStreamOperator<?, ?>) node.getOperator()).getUserFunction().getClass().getName();
}
LOG.debug("Generated hash '" + byteToHexString(hash) + "' for node " + "'" + node.toString() + "' {id: " + node.getId() + ", " + "parallelism: " + node.getParallelism() + ", " + "user function: " + udfClassName + "}");
}
return hash;
}
use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.
the class TwoInputStreamTask method init.
@Override
public void init() throws Exception {
StreamConfig configuration = getConfiguration();
ClassLoader userClassLoader = getUserCodeClassLoader();
TypeSerializer<IN1> inputDeserializer1 = configuration.getTypeSerializerIn1(userClassLoader);
TypeSerializer<IN2> inputDeserializer2 = configuration.getTypeSerializerIn2(userClassLoader);
int numberOfInputs = configuration.getNumberOfInputs();
ArrayList<InputGate> inputList1 = new ArrayList<InputGate>();
ArrayList<InputGate> inputList2 = new ArrayList<InputGate>();
List<StreamEdge> inEdges = configuration.getInPhysicalEdges(userClassLoader);
for (int i = 0; i < numberOfInputs; i++) {
int inputType = inEdges.get(i).getTypeNumber();
InputGate reader = getEnvironment().getInputGate(i);
switch(inputType) {
case 1:
inputList1.add(reader);
break;
case 2:
inputList2.add(reader);
break;
default:
throw new RuntimeException("Invalid input type number: " + inputType);
}
}
this.inputProcessor = new StreamTwoInputProcessor<>(inputList1, inputList2, inputDeserializer1, inputDeserializer2, this, configuration.getCheckpointMode(), getCheckpointLock(), getEnvironment().getIOManager(), getEnvironment().getTaskManagerInfo().getConfiguration(), getStreamStatusMaintainer(), this.headOperator);
// make sure that stream tasks report their I/O statistics
inputProcessor.setMetricGroup(getEnvironment().getMetricGroup().getIOMetricGroup());
}
use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.
the class ForwardForConsecutiveHashPartitionerTest method testConvertToForwardPartitioner.
private void testConvertToForwardPartitioner(StreamExchangeMode streamExchangeMode) {
JobGraph jobGraph = StreamPartitionerTestUtils.createJobGraph("group1", "group1", new ForwardForConsecutiveHashPartitioner<>(new KeyGroupStreamPartitioner<>(record -> 0L, 100)));
List<JobVertex> jobVertices = jobGraph.getVerticesSortedTopologicallyFromSources();
assertThat(jobVertices.size(), is(1));
JobVertex vertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
StreamConfig sourceConfig = new StreamConfig(vertex.getConfiguration());
StreamEdge edge = sourceConfig.getChainedOutputs(getClass().getClassLoader()).get(0);
assertThat(edge.getPartitioner(), instanceOf(ForwardPartitioner.class));
}
use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.
the class ForwardForConsecutiveHashPartitionerTest method testConvertToHashPartitioner.
private void testConvertToHashPartitioner(StreamExchangeMode streamExchangeMode) {
JobGraph jobGraph = StreamPartitionerTestUtils.createJobGraph("group1", "group2", new ForwardForConsecutiveHashPartitioner<>(new KeyGroupStreamPartitioner<>(record -> 0L, 100)));
List<JobVertex> jobVertices = jobGraph.getVerticesSortedTopologicallyFromSources();
assertThat(jobVertices.size(), is(2));
JobVertex sourceVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
StreamConfig sourceConfig = new StreamConfig(sourceVertex.getConfiguration());
StreamEdge edge = sourceConfig.getNonChainedOutputs(getClass().getClassLoader()).get(0);
assertThat(edge.getPartitioner(), instanceOf(KeyGroupStreamPartitioner.class));
}
Aggregations