Search in sources :

Example 91 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class StreamingJobGraphGeneratorNodeHashTest method testManualHashAssignment.

// ------------------------------------------------------------------------
// Manual hash assignment
// ------------------------------------------------------------------------
/**
	 * Tests that manual hash assignments are mapped to the same operator ID.
	 *
	 * <pre>
	 *                     /-> [ (map) ] -> [ (sink)@sink0 ]
	 * [ (src@source ) ] -+
	 *                     \-> [ (map) ] -> [ (sink)@sink1 ]
	 * </pre>
	 *
	 * <pre>
	 *                    /-> [ (map) ] -> [ (reduce) ] -> [ (sink)@sink0 ]
	 * [ (src)@source ] -+
	 *                   \-> [ (map) ] -> [ (reduce) ] -> [ (sink)@sink1 ]
	 * </pre>
	 */
@Test
public void testManualHashAssignment() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(4);
    env.disableOperatorChaining();
    DataStream<String> src = env.addSource(new NoOpSourceFunction()).name("source").uid("source");
    src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction()).name("sink0").uid("sink0");
    src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction()).name("sink1").uid("sink1");
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    Set<JobVertexID> ids = new HashSet<>();
    for (JobVertex vertex : jobGraph.getVertices()) {
        assertTrue(ids.add(vertex.getID()));
    }
    // Resubmit a slightly different program
    env = StreamExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(4);
    env.disableOperatorChaining();
    src = env.addSource(new NoOpSourceFunction()).map(new NoOpMapFunction()).name("source").uid("source");
    src.map(new NoOpMapFunction()).keyBy(new NoOpKeySelector()).reduce(new NoOpReduceFunction()).addSink(new NoOpSinkFunction()).name("sink0").uid("sink0");
    src.map(new NoOpMapFunction()).keyBy(new NoOpKeySelector()).reduce(new NoOpReduceFunction()).addSink(new NoOpSinkFunction()).name("sink1").uid("sink1");
    JobGraph newJobGraph = env.getStreamGraph().getJobGraph();
    assertNotEquals(jobGraph.getJobID(), newJobGraph.getJobID());
    for (JobVertex vertex : newJobGraph.getVertices()) {
        // Verify that the expected IDs are the same
        if (vertex.getName().endsWith("source") || vertex.getName().endsWith("sink0") || vertex.getName().endsWith("sink1")) {
            assertTrue(ids.contains(vertex.getID()));
        }
    }
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 92 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class StreamingJobGraphGeneratorNodeHashTest method testNodeHashIdenticalNodes.

/**
	 * Tests that there are no collisions with two identical intermediate nodes connected to the
	 * same predecessor.
	 *
	 * <pre>
	 *             /-> [ (map) ] -> [ (sink) ]
	 * [ (src) ] -+
	 *             \-> [ (map) ] -> [ (sink) ]
	 * </pre>
	 */
@Test
public void testNodeHashIdenticalNodes() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(4);
    env.disableOperatorChaining();
    DataStream<String> src = env.addSource(new NoOpSourceFunction());
    src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction());
    src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction());
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    Set<JobVertexID> vertexIds = new HashSet<>();
    for (JobVertex vertex : jobGraph.getVertices()) {
        assertTrue(vertexIds.add(vertex.getID()));
    }
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 93 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class RescalingITCase method testSavepointRescalingNonPartitionedStateCausesException.

/**
	 * Tests that a job cannot be restarted from a savepoint with a different parallelism if the
	 * rescaled operator has non-partitioned state.
	 *
	 * @throws Exception
	 */
@Test
public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception {
    final int parallelism = numSlots / 2;
    final int parallelism2 = numSlots;
    final int maxParallelism = 13;
    FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
    Deadline deadline = timeout.fromNow();
    JobID jobID = null;
    ActorGateway jobManager = null;
    try {
        jobManager = cluster.getLeaderGateway(deadline.timeLeft());
        JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);
        jobID = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        Object savepointResponse = null;
        // wait until the operator is started
        StateSourceBase.workStartedLatch.await();
        Future<Object> savepointPathFuture = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
        FiniteDuration waitingTime = new FiniteDuration(10, TimeUnit.SECONDS);
        savepointResponse = Await.result(savepointPathFuture, waitingTime);
        assertTrue(String.valueOf(savepointResponse), savepointResponse instanceof JobManagerMessages.TriggerSavepointSuccess);
        final String savepointPath = ((JobManagerMessages.TriggerSavepointSuccess) savepointResponse).savepointPath();
        Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());
        Future<Object> cancellationResponseFuture = jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());
        Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());
        assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);
        Await.ready(jobRemovedFuture, deadline.timeLeft());
        // job successfully removed
        jobID = null;
        JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);
        scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        jobID = scaledJobGraph.getJobID();
        cluster.submitJobAndWait(scaledJobGraph, false);
        jobID = null;
    } catch (JobExecutionException exception) {
        if (exception.getCause() instanceof IllegalStateException) {
        // we expect a IllegalStateException wrapped
        // in a JobExecutionException, because the job containing non-partitioned state
        // is being rescaled
        } else {
            throw exception;
        }
    } finally {
        // clear any left overs from a possibly failed job
        if (jobID != null && jobManager != null) {
            Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);
            try {
                Await.ready(jobRemovedFuture, timeout);
            } catch (TimeoutException | InterruptedException ie) {
                fail("Failed while cleaning up the cluster.");
            }
        }
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JobID(org.apache.flink.api.common.JobID) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 94 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class RescalingITCase method testSavepointRescalingWithKeyedAndNonPartitionedState.

/**
	 * Tests that a job with non partitioned state can be restarted from a savepoint with a
	 * different parallelism if the operator with non-partitioned state are not rescaled.
	 *
	 * @throws Exception
	 */
@Test
public void testSavepointRescalingWithKeyedAndNonPartitionedState() throws Exception {
    int numberKeys = 42;
    int numberElements = 1000;
    int numberElements2 = 500;
    int parallelism = numSlots / 2;
    int parallelism2 = numSlots;
    int maxParallelism = 13;
    FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
    Deadline deadline = timeout.fromNow();
    ActorGateway jobManager = null;
    JobID jobID = null;
    try {
        jobManager = cluster.getLeaderGateway(deadline.timeLeft());
        JobGraph jobGraph = createJobGraphWithKeyedAndNonPartitionedOperatorState(parallelism, maxParallelism, parallelism, numberKeys, numberElements, false, 100);
        jobID = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // wait til the sources have emitted numberElements for each key and completed a checkpoint
        SubtaskIndexFlatMapper.workCompletedLatch.await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        // verify the current state
        Set<Tuple2<Integer, Integer>> actualResult = CollectionSink.getElementsSet();
        Set<Tuple2<Integer, Integer>> expectedResult = new HashSet<>();
        for (int key = 0; key < numberKeys; key++) {
            int keyGroupIndex = KeyGroupRangeAssignment.assignToKeyGroup(key, maxParallelism);
            expectedResult.add(Tuple2.of(KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, parallelism, keyGroupIndex), numberElements * key));
        }
        assertEquals(expectedResult, actualResult);
        // clear the CollectionSink set for the restarted job
        CollectionSink.clearElementsSet();
        Future<Object> savepointPathFuture = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
        final String savepointPath = ((JobManagerMessages.TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
        Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());
        Future<Object> cancellationResponseFuture = jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());
        Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());
        assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);
        Await.ready(jobRemovedFuture, deadline.timeLeft());
        jobID = null;
        JobGraph scaledJobGraph = createJobGraphWithKeyedAndNonPartitionedOperatorState(parallelism2, maxParallelism, parallelism, numberKeys, numberElements + numberElements2, true, 100);
        scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        jobID = scaledJobGraph.getJobID();
        cluster.submitJobAndWait(scaledJobGraph, false);
        jobID = null;
        Set<Tuple2<Integer, Integer>> actualResult2 = CollectionSink.getElementsSet();
        Set<Tuple2<Integer, Integer>> expectedResult2 = new HashSet<>();
        for (int key = 0; key < numberKeys; key++) {
            int keyGroupIndex = KeyGroupRangeAssignment.assignToKeyGroup(key, maxParallelism);
            expectedResult2.add(Tuple2.of(KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, parallelism2, keyGroupIndex), key * (numberElements + numberElements2)));
        }
        assertEquals(expectedResult2, actualResult2);
    } finally {
        // clear the CollectionSink set for the restarted job
        CollectionSink.clearElementsSet();
        // clear any left overs from a possibly failed job
        if (jobID != null && jobManager != null) {
            Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);
            try {
                Await.ready(jobRemovedFuture, timeout);
            } catch (TimeoutException | InterruptedException ie) {
                fail("Failed while cleaning up the cluster.");
            }
        }
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JobID(org.apache.flink.api.common.JobID) HashSet(java.util.HashSet) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 95 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class SavepointITCase method testSavepointForJobWithIteration.

@Test
public void testSavepointForJobWithIteration() throws Exception {
    for (int i = 0; i < ITER_TEST_PARALLELISM; ++i) {
        ITER_TEST_SNAPSHOT_WAIT[i] = new OneShotLatch();
        ITER_TEST_RESTORE_WAIT[i] = new OneShotLatch();
        ITER_TEST_CHECKPOINT_VERIFY[i] = 0;
    }
    TemporaryFolder folder = new TemporaryFolder();
    folder.create();
    // Temporary directory for file state backend
    final File tmpDir = folder.newFolder();
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    final IntegerStreamSource source = new IntegerStreamSource();
    IterativeStream<Integer> iteration = env.addSource(source).flatMap(new RichFlatMapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Integer in, Collector<Integer> clctr) throws Exception {
            clctr.collect(in);
        }
    }).setParallelism(ITER_TEST_PARALLELISM).keyBy(new KeySelector<Integer, Object>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Object getKey(Integer value) throws Exception {
            return value;
        }
    }).flatMap(new DuplicateFilter()).setParallelism(ITER_TEST_PARALLELISM).iterate();
    DataStream<Integer> iterationBody = iteration.map(new MapFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer map(Integer value) throws Exception {
            return value;
        }
    }).setParallelism(ITER_TEST_PARALLELISM);
    iteration.closeWith(iterationBody);
    StreamGraph streamGraph = env.getStreamGraph();
    streamGraph.setJobName("Test");
    JobGraph jobGraph = streamGraph.getJobGraph();
    Configuration config = new Configuration();
    config.addAll(jobGraph.getJobConfiguration());
    config.setLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1L);
    config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 2 * jobGraph.getMaximumParallelism());
    final File checkpointDir = new File(tmpDir, "checkpoints");
    final File savepointDir = new File(tmpDir, "savepoints");
    if (!checkpointDir.mkdir() || !savepointDir.mkdirs()) {
        fail("Test setup failed: failed to create temporary directories.");
    }
    config.setString(CoreOptions.STATE_BACKEND, "filesystem");
    config.setString(FsStateBackendFactory.CHECKPOINT_DIRECTORY_URI_CONF_KEY, checkpointDir.toURI().toString());
    config.setString(FsStateBackendFactory.MEMORY_THRESHOLD_CONF_KEY, "0");
    config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
    TestingCluster cluster = new TestingCluster(config, false);
    String savepointPath = null;
    try {
        cluster.start();
        cluster.submitJobDetached(jobGraph);
        for (OneShotLatch latch : ITER_TEST_SNAPSHOT_WAIT) {
            latch.await();
        }
        savepointPath = cluster.triggerSavepoint(jobGraph.getJobID());
        source.cancel();
        jobGraph = streamGraph.getJobGraph();
        jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        cluster.submitJobDetached(jobGraph);
        for (OneShotLatch latch : ITER_TEST_RESTORE_WAIT) {
            latch.await();
        }
        source.cancel();
    } finally {
        if (null != savepointPath) {
            cluster.disposeSavepoint(savepointPath);
        }
        cluster.stop();
        cluster.awaitTermination();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) KeySelector(org.apache.flink.api.java.functions.KeySelector) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) DisposeSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) TemporaryFolder(org.junit.rules.TemporaryFolder) Collector(org.apache.flink.util.Collector) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) File(java.io.File) Test(org.junit.Test)

Aggregations

JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)131 Test (org.junit.Test)95 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)78 Configuration (org.apache.flink.configuration.Configuration)45 JobID (org.apache.flink.api.common.JobID)39 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)34 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)32 Deadline (scala.concurrent.duration.Deadline)31 FiniteDuration (scala.concurrent.duration.FiniteDuration)27 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)20 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)18 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)17 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)15 TestingCluster (org.apache.flink.runtime.testingUtils.TestingCluster)15 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)14 TestingJobManagerMessages (org.apache.flink.runtime.testingUtils.TestingJobManagerMessages)14 IOException (java.io.IOException)13 ActorRef (akka.actor.ActorRef)12 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)11 StreamGraph (org.apache.flink.streaming.api.graph.StreamGraph)11