Search in sources :

Example 1 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class StreamGraphHasherV1 method traverseStreamGraphAndGenerateHashes.

@Override
public Map<Integer, byte[]> traverseStreamGraphAndGenerateHashes(StreamGraph streamGraph) {
    // The hash function used to generate the hash
    final HashFunction hashFunction = Hashing.murmur3_128(0);
    final Map<Integer, byte[]> hashes = new HashMap<>();
    Set<Integer> visited = new HashSet<>();
    Queue<StreamNode> remaining = new ArrayDeque<>();
    // We need to make the source order deterministic. The source IDs are
    // not returned in the same order, which means that submitting the same
    // program twice might result in different traversal, which breaks the
    // deterministic hash assignment.
    List<Integer> sources = new ArrayList<>();
    for (Integer sourceNodeId : streamGraph.getSourceIDs()) {
        sources.add(sourceNodeId);
    }
    Collections.sort(sources);
    // Start with source nodes
    for (Integer sourceNodeId : sources) {
        remaining.add(streamGraph.getStreamNode(sourceNodeId));
        visited.add(sourceNodeId);
    }
    StreamNode currentNode;
    while ((currentNode = remaining.poll()) != null) {
        // generate the hash code.
        if (generateNodeHash(currentNode, hashFunction, hashes, streamGraph.isChainingEnabled())) {
            // Add the child nodes
            for (StreamEdge outEdge : currentNode.getOutEdges()) {
                StreamNode child = outEdge.getTargetVertex();
                if (!visited.contains(child.getId())) {
                    remaining.add(child);
                    visited.add(child.getId());
                }
            }
        } else {
            // We will revisit this later.
            visited.remove(currentNode.getId());
        }
    }
    return hashes;
}
Also used : HashFunction(com.google.common.hash.HashFunction) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) ArrayDeque(java.util.ArrayDeque) HashSet(java.util.HashSet)

Example 2 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class StreamGraphHasherV1 method generateDeterministicHash.

/**
	 * Generates a deterministic hash from node-local properties and input and
	 * output edges.
	 */
private byte[] generateDeterministicHash(StreamNode node, Hasher hasher, Map<Integer, byte[]> hashes, boolean isChainingEnabled) {
    // Include stream node to hash. We use the current size of the computed
    // hashes as the ID. We cannot use the node's ID, because it is
    // assigned from a static counter. This will result in two identical
    // programs having different hashes.
    generateNodeLocalHash(node, hasher, hashes.size());
    // Include chained nodes to hash
    for (StreamEdge outEdge : node.getOutEdges()) {
        if (isChainable(outEdge, isChainingEnabled)) {
            StreamNode chainedNode = outEdge.getTargetVertex();
            // Use the hash size again, because the nodes are chained to
            // this node. This does not add a hash for the chained nodes.
            generateNodeLocalHash(chainedNode, hasher, hashes.size());
        }
    }
    byte[] hash = hasher.hash().asBytes();
    // this loop (calling this method).
    for (StreamEdge inEdge : node.getInEdges()) {
        byte[] otherHash = hashes.get(inEdge.getSourceId());
        // Sanity check
        if (otherHash == null) {
            throw new IllegalStateException("Missing hash for input node " + inEdge.getSourceVertex() + ". Cannot generate hash for " + node + ".");
        }
        for (int j = 0; j < hash.length; j++) {
            hash[j] = (byte) (hash[j] * 37 ^ otherHash[j]);
        }
    }
    if (LOG.isDebugEnabled()) {
        String udfClassName = "";
        if (node.getOperator() instanceof AbstractUdfStreamOperator) {
            udfClassName = ((AbstractUdfStreamOperator<?, ?>) node.getOperator()).getUserFunction().getClass().getName();
        }
        LOG.debug("Generated hash '" + byteToHexString(hash) + "' for node " + "'" + node.toString() + "' {id: " + node.getId() + ", " + "parallelism: " + node.getParallelism() + ", " + "user function: " + udfClassName + "}");
    }
    return hash;
}
Also used : AbstractUdfStreamOperator(org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) StringUtils.byteToHexString(org.apache.flink.util.StringUtils.byteToHexString)

Example 3 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class TwoInputStreamTask method init.

@Override
public void init() throws Exception {
    StreamConfig configuration = getConfiguration();
    ClassLoader userClassLoader = getUserCodeClassLoader();
    TypeSerializer<IN1> inputDeserializer1 = configuration.getTypeSerializerIn1(userClassLoader);
    TypeSerializer<IN2> inputDeserializer2 = configuration.getTypeSerializerIn2(userClassLoader);
    int numberOfInputs = configuration.getNumberOfInputs();
    ArrayList<InputGate> inputList1 = new ArrayList<InputGate>();
    ArrayList<InputGate> inputList2 = new ArrayList<InputGate>();
    List<StreamEdge> inEdges = configuration.getInPhysicalEdges(userClassLoader);
    for (int i = 0; i < numberOfInputs; i++) {
        int inputType = inEdges.get(i).getTypeNumber();
        InputGate reader = getEnvironment().getInputGate(i);
        switch(inputType) {
            case 1:
                inputList1.add(reader);
                break;
            case 2:
                inputList2.add(reader);
                break;
            default:
                throw new RuntimeException("Invalid input type number: " + inputType);
        }
    }
    this.inputProcessor = new StreamTwoInputProcessor<>(inputList1, inputList2, inputDeserializer1, inputDeserializer2, this, configuration.getCheckpointMode(), getCheckpointLock(), getEnvironment().getIOManager(), getEnvironment().getTaskManagerInfo().getConfiguration(), getStreamStatusMaintainer(), this.headOperator);
    // make sure that stream tasks report their I/O statistics
    inputProcessor.setMetricGroup(getEnvironment().getMetricGroup().getIOMetricGroup());
}
Also used : ArrayList(java.util.ArrayList) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) InputGate(org.apache.flink.runtime.io.network.partition.consumer.InputGate)

Example 4 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class ForwardForConsecutiveHashPartitionerTest method testConvertToForwardPartitioner.

private void testConvertToForwardPartitioner(StreamExchangeMode streamExchangeMode) {
    JobGraph jobGraph = StreamPartitionerTestUtils.createJobGraph("group1", "group1", new ForwardForConsecutiveHashPartitioner<>(new KeyGroupStreamPartitioner<>(record -> 0L, 100)));
    List<JobVertex> jobVertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    assertThat(jobVertices.size(), is(1));
    JobVertex vertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
    StreamConfig sourceConfig = new StreamConfig(vertex.getConfiguration());
    StreamEdge edge = sourceConfig.getChainedOutputs(getClass().getClassLoader()).get(0);
    assertThat(edge.getPartitioner(), instanceOf(ForwardPartitioner.class));
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge)

Example 5 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class ForwardForConsecutiveHashPartitionerTest method testConvertToHashPartitioner.

private void testConvertToHashPartitioner(StreamExchangeMode streamExchangeMode) {
    JobGraph jobGraph = StreamPartitionerTestUtils.createJobGraph("group1", "group2", new ForwardForConsecutiveHashPartitioner<>(new KeyGroupStreamPartitioner<>(record -> 0L, 100)));
    List<JobVertex> jobVertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    assertThat(jobVertices.size(), is(2));
    JobVertex sourceVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
    StreamConfig sourceConfig = new StreamConfig(sourceVertex.getConfiguration());
    StreamEdge edge = sourceConfig.getNonChainedOutputs(getClass().getClassLoader()).get(0);
    assertThat(edge.getPartitioner(), instanceOf(KeyGroupStreamPartitioner.class));
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge)

Aggregations

StreamEdge (org.apache.flink.streaming.api.graph.StreamEdge)27 StreamNode (org.apache.flink.streaming.api.graph.StreamNode)14 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)13 ArrayList (java.util.ArrayList)8 LinkedList (java.util.LinkedList)6 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)6 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)6 AbstractStreamOperator (org.apache.flink.streaming.api.operators.AbstractStreamOperator)6 BroadcastPartitioner (org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner)5 Test (org.junit.Test)5 Configuration (org.apache.flink.configuration.Configuration)4 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)4 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)3 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)3 StreamGraph (org.apache.flink.streaming.api.graph.StreamGraph)3 OneInputStreamOperator (org.apache.flink.streaming.api.operators.OneInputStreamOperator)3 ForwardPartitioner (org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner)3 RebalancePartitioner (org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner)3 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)3 HashMap (java.util.HashMap)2