Search in sources :

Example 86 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class RescalingITCase method testSavepointRescalingKeyedState.

/**
	 * Tests that a a job with purely keyed state can be restarted from a savepoint
	 * with a different parallelism.
	 */
public void testSavepointRescalingKeyedState(boolean scaleOut, boolean deriveMaxParallelism) throws Exception {
    final int numberKeys = 42;
    final int numberElements = 1000;
    final int numberElements2 = 500;
    final int parallelism = scaleOut ? numSlots / 2 : numSlots;
    final int parallelism2 = scaleOut ? numSlots : numSlots / 2;
    final int maxParallelism = 13;
    FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
    Deadline deadline = timeout.fromNow();
    ActorGateway jobManager = null;
    JobID jobID = null;
    try {
        jobManager = cluster.getLeaderGateway(deadline.timeLeft());
        JobGraph jobGraph = createJobGraphWithKeyedState(parallelism, maxParallelism, numberKeys, numberElements, false, 100);
        jobID = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // wait til the sources have emitted numberElements for each key and completed a checkpoint
        SubtaskIndexFlatMapper.workCompletedLatch.await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        // verify the current state
        Set<Tuple2<Integer, Integer>> actualResult = CollectionSink.getElementsSet();
        Set<Tuple2<Integer, Integer>> expectedResult = new HashSet<>();
        for (int key = 0; key < numberKeys; key++) {
            int keyGroupIndex = KeyGroupRangeAssignment.assignToKeyGroup(key, maxParallelism);
            expectedResult.add(Tuple2.of(KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, parallelism, keyGroupIndex), numberElements * key));
        }
        assertEquals(expectedResult, actualResult);
        // clear the CollectionSink set for the restarted job
        CollectionSink.clearElementsSet();
        Future<Object> savepointPathFuture = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
        final String savepointPath = ((JobManagerMessages.TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
        Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());
        Future<Object> cancellationResponseFuture = jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());
        Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());
        assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);
        Await.ready(jobRemovedFuture, deadline.timeLeft());
        jobID = null;
        int restoreMaxParallelism = deriveMaxParallelism ? ExecutionJobVertex.VALUE_NOT_SET : maxParallelism;
        JobGraph scaledJobGraph = createJobGraphWithKeyedState(parallelism2, restoreMaxParallelism, numberKeys, numberElements2, true, 100);
        scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        jobID = scaledJobGraph.getJobID();
        cluster.submitJobAndWait(scaledJobGraph, false);
        jobID = null;
        Set<Tuple2<Integer, Integer>> actualResult2 = CollectionSink.getElementsSet();
        Set<Tuple2<Integer, Integer>> expectedResult2 = new HashSet<>();
        for (int key = 0; key < numberKeys; key++) {
            int keyGroupIndex = KeyGroupRangeAssignment.assignToKeyGroup(key, maxParallelism);
            expectedResult2.add(Tuple2.of(KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, parallelism2, keyGroupIndex), key * (numberElements + numberElements2)));
        }
        assertEquals(expectedResult2, actualResult2);
    } finally {
        // clear the CollectionSink set for the restarted job
        CollectionSink.clearElementsSet();
        // clear any left overs from a possibly failed job
        if (jobID != null && jobManager != null) {
            Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);
            try {
                Await.ready(jobRemovedFuture, timeout);
            } catch (TimeoutException | InterruptedException ie) {
                fail("Failed while cleaning up the cluster.");
            }
        }
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JobID(org.apache.flink.api.common.JobID) HashSet(java.util.HashSet) TimeoutException(java.util.concurrent.TimeoutException)

Example 87 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class TestStreamEnvironment method execute.

@Override
public JobExecutionResult execute(String jobName) throws Exception {
    final StreamGraph streamGraph = getStreamGraph();
    streamGraph.setJobName(jobName);
    final JobGraph jobGraph = streamGraph.getJobGraph();
    return executor.submitJobAndWait(jobGraph, false);
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph)

Example 88 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class MiniClusterITCase method executeJob.

// ------------------------------------------------------------------------
//  Utilities
// ------------------------------------------------------------------------
private static void executeJob(MiniCluster miniCluster) throws Exception {
    JobGraph job = getSimpleJob();
    miniCluster.runJobBlocking(job);
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph)

Example 89 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class StreamOperatorChainingTest method testMultiChaining.

/**
	 * Verify that multi-chaining works.
	 */
private void testMultiChaining(StreamExecutionEnvironment env) throws Exception {
    // the actual elements will not be used
    DataStream<Integer> input = env.fromElements(1, 2, 3);
    sink1Results = new ArrayList<>();
    sink2Results = new ArrayList<>();
    input = input.map(new MapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer map(Integer value) throws Exception {
            return value;
        }
    });
    input.map(new MapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Integer value) throws Exception {
            return "First: " + value;
        }
    }).addSink(new SinkFunction<String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void invoke(String value) throws Exception {
            sink1Results.add(value);
        }
    });
    input.map(new MapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Integer value) throws Exception {
            return "Second: " + value;
        }
    }).addSink(new SinkFunction<String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void invoke(String value) throws Exception {
            sink2Results.add(value);
        }
    });
    // be build our own StreamTask and OperatorChain
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 2);
    JobVertex chainedVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
    Configuration configuration = chainedVertex.getConfiguration();
    StreamConfig streamConfig = new StreamConfig(configuration);
    StreamMap<Integer, Integer> headOperator = streamConfig.getStreamOperator(Thread.currentThread().getContextClassLoader());
    StreamTask<Integer, StreamMap<Integer, Integer>> mockTask = createMockTask(streamConfig, chainedVertex.getName());
    OperatorChain<Integer, StreamMap<Integer, Integer>> operatorChain = new OperatorChain<>(mockTask);
    headOperator.setup(mockTask, streamConfig, operatorChain.getChainEntryPoint());
    for (StreamOperator<?> operator : operatorChain.getAllOperators()) {
        if (operator != null) {
            operator.open();
        }
    }
    headOperator.processElement(new StreamRecord<>(1));
    headOperator.processElement(new StreamRecord<>(2));
    headOperator.processElement(new StreamRecord<>(3));
    assertThat(sink1Results, contains("First: 1", "First: 2", "First: 3"));
    assertThat(sink2Results, contains("Second: 1", "Second: 2", "Second: 3"));
}
Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) MapFunction(org.apache.flink.api.common.functions.MapFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) OperatorChain(org.apache.flink.streaming.runtime.tasks.OperatorChain) StreamMap(org.apache.flink.streaming.api.operators.StreamMap)

Example 90 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class StreamingJobGraphGeneratorNodeHashTest method testNodeHashIdenticalSources.

/**
	 * Tests that there are no collisions with two identical sources.
	 *
	 * <pre>
	 * [ (src0) ] --\
	 *               +--> [ (sink) ]
	 * [ (src1) ] --/
	 * </pre>
	 */
@Test
public void testNodeHashIdenticalSources() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(4);
    env.disableOperatorChaining();
    DataStream<String> src0 = env.addSource(new NoOpSourceFunction());
    DataStream<String> src1 = env.addSource(new NoOpSourceFunction());
    src0.union(src1).addSink(new NoOpSinkFunction());
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    assertTrue(vertices.get(0).isInputVertex());
    assertTrue(vertices.get(1).isInputVertex());
    assertNotNull(vertices.get(0).getID());
    assertNotNull(vertices.get(1).getID());
    assertNotEquals(vertices.get(0).getID(), vertices.get(1).getID());
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)131 Test (org.junit.Test)95 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)78 Configuration (org.apache.flink.configuration.Configuration)45 JobID (org.apache.flink.api.common.JobID)39 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)34 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)32 Deadline (scala.concurrent.duration.Deadline)31 FiniteDuration (scala.concurrent.duration.FiniteDuration)27 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)20 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)18 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)17 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)15 TestingCluster (org.apache.flink.runtime.testingUtils.TestingCluster)15 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)14 TestingJobManagerMessages (org.apache.flink.runtime.testingUtils.TestingJobManagerMessages)14 IOException (java.io.IOException)13 ActorRef (akka.actor.ActorRef)12 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)11 StreamGraph (org.apache.flink.streaming.api.graph.StreamGraph)11