Search in sources :

Example 71 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class CoordinatorShutdownTest method testCoordinatorShutsDownOnFailure.

@Test
public void testCoordinatorShutsDownOnFailure() {
    LocalFlinkMiniCluster cluster = null;
    try {
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
        cluster = new LocalFlinkMiniCluster(config, true);
        cluster.start();
        // build a test graph with snapshotting enabled
        JobVertex vertex = new JobVertex("Test Vertex");
        vertex.setInvokableClass(FailingBlockingInvokable.class);
        List<JobVertexID> vertexIdList = Collections.singletonList(vertex.getID());
        JobGraph testGraph = new JobGraph("test job", vertex);
        testGraph.setSnapshotSettings(new JobSnapshottingSettings(vertexIdList, vertexIdList, vertexIdList, 5000, 60000, 0L, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true));
        ActorGateway jmGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
        FiniteDuration timeout = new FiniteDuration(60, TimeUnit.SECONDS);
        JobManagerMessages.SubmitJob submitMessage = new JobManagerMessages.SubmitJob(testGraph, ListeningBehaviour.EXECUTION_RESULT);
        // submit is successful, but then the job blocks due to the invokable
        Future<Object> submitFuture = jmGateway.ask(submitMessage, timeout);
        Await.result(submitFuture, timeout);
        // get the execution graph and store the ExecutionGraph reference
        Future<Object> jobRequestFuture = jmGateway.ask(new JobManagerMessages.RequestJob(testGraph.getJobID()), timeout);
        ExecutionGraph graph = (ExecutionGraph) ((JobManagerMessages.JobFound) Await.result(jobRequestFuture, timeout)).executionGraph();
        assertNotNull(graph);
        FailingBlockingInvokable.unblock();
        graph.waitUntilFinished();
        // verify that the coordinator was shut down
        CheckpointCoordinator coord = graph.getCheckpointCoordinator();
        assertTrue(coord == null || coord.isShutdown());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (cluster != null) {
            cluster.shutdown();
            cluster.awaitTermination();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) LocalFlinkMiniCluster(org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 72 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class LeaderChangeJobRecoveryTest method createBlockingJob.

public JobGraph createBlockingJob(int parallelism) {
    Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true);
    JobVertex sender = new JobVertex("sender");
    JobVertex receiver = new JobVertex("receiver");
    sender.setInvokableClass(Tasks.Sender.class);
    receiver.setInvokableClass(Tasks.BlockingOnceReceiver.class);
    sender.setParallelism(parallelism);
    receiver.setParallelism(parallelism);
    receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    SlotSharingGroup slotSharingGroup = new SlotSharingGroup();
    sender.setSlotSharingGroup(slotSharingGroup);
    receiver.setSlotSharingGroup(slotSharingGroup);
    return new JobGraph("Blocking test job", sender, receiver);
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Tasks(org.apache.flink.runtime.jobmanager.Tasks) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup)

Example 73 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class LeaderChangeStateCleanupTest method createBlockingJob.

public JobGraph createBlockingJob(int parallelism) {
    Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true);
    JobVertex sender = new JobVertex("sender");
    JobVertex receiver = new JobVertex("receiver");
    sender.setInvokableClass(Tasks.Sender.class);
    receiver.setInvokableClass(Tasks.BlockingOnceReceiver.class);
    sender.setParallelism(parallelism);
    receiver.setParallelism(parallelism);
    receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    SlotSharingGroup slotSharingGroup = new SlotSharingGroup();
    sender.setSlotSharingGroup(slotSharingGroup);
    receiver.setSlotSharingGroup(slotSharingGroup);
    return new JobGraph("Blocking test job", sender, receiver);
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Tasks(org.apache.flink.runtime.jobmanager.Tasks) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup)

Example 74 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class TaskCancelAsyncProducerConsumerITCase method testCancelAsyncProducerAndConsumer.

/**
	 * Tests that a task waiting on an async producer/consumer that is stuck
	 * in a blocking buffer request can be properly cancelled.
	 *
	 * <p>This is currently required for the Flink Kafka sources, which spawn
	 * a separate Thread consuming from Kafka and producing the intermediate
	 * streams in the spawned Thread instead of the main task Thread.
	 */
@Test
public void testCancelAsyncProducerAndConsumer() throws Exception {
    Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
    TestingCluster flink = null;
    try {
        // Cluster
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY, 4096);
        config.setInteger(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, 8);
        flink = new TestingCluster(config, true);
        flink.start();
        // Job with async producer and consumer
        JobVertex producer = new JobVertex("AsyncProducer");
        producer.setParallelism(1);
        producer.setInvokableClass(AsyncProducer.class);
        JobVertex consumer = new JobVertex("AsyncConsumer");
        consumer.setParallelism(1);
        consumer.setInvokableClass(AsyncConsumer.class);
        consumer.connectNewDataSetAsInput(producer, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
        SlotSharingGroup slot = new SlotSharingGroup(producer.getID(), consumer.getID());
        producer.setSlotSharingGroup(slot);
        consumer.setSlotSharingGroup(slot);
        JobGraph jobGraph = new JobGraph(producer, consumer);
        // Submit job and wait until running
        ActorGateway jobManager = flink.getLeaderGateway(deadline.timeLeft());
        flink.submitJobDetached(jobGraph);
        Object msg = new WaitForAllVerticesToBeRunning(jobGraph.getJobID());
        Future<?> runningFuture = jobManager.ask(msg, deadline.timeLeft());
        Await.ready(runningFuture, deadline.timeLeft());
        // Wait for blocking requests, cancel and wait for cancellation
        msg = new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.CANCELED);
        Future<?> cancelledFuture = jobManager.ask(msg, deadline.timeLeft());
        boolean producerBlocked = false;
        for (int i = 0; i < 50; i++) {
            Thread thread = ASYNC_PRODUCER_THREAD;
            if (thread != null && thread.isAlive()) {
                StackTraceElement[] stackTrace = thread.getStackTrace();
                producerBlocked = isInBlockingBufferRequest(stackTrace);
            }
            if (producerBlocked) {
                break;
            } else {
                // Retry
                Thread.sleep(500);
            }
        }
        // Verify that async producer is in blocking request
        assertTrue("Producer thread is not blocked: " + Arrays.toString(ASYNC_CONSUMER_THREAD.getStackTrace()), producerBlocked);
        boolean consumerWaiting = false;
        for (int i = 0; i < 50; i++) {
            Thread thread = ASYNC_CONSUMER_THREAD;
            if (thread != null && thread.isAlive()) {
                consumerWaiting = thread.getState() == Thread.State.WAITING;
            }
            if (consumerWaiting) {
                break;
            } else {
                // Retry
                Thread.sleep(500);
            }
        }
        // Verify that async consumer is in blocking request
        assertTrue("Consumer thread is not blocked.", consumerWaiting);
        msg = new CancelJob(jobGraph.getJobID());
        Future<?> cancelFuture = jobManager.ask(msg, deadline.timeLeft());
        Await.ready(cancelFuture, deadline.timeLeft());
        Await.ready(cancelledFuture, deadline.timeLeft());
        // Verify the expected Exceptions
        assertNotNull(ASYNC_PRODUCER_EXCEPTION);
        assertEquals(IllegalStateException.class, ASYNC_PRODUCER_EXCEPTION.getClass());
        assertNotNull(ASYNC_CONSUMER_EXCEPTION);
        assertEquals(IllegalStateException.class, ASYNC_CONSUMER_EXCEPTION.getClass());
    } finally {
        if (flink != null) {
            flink.shutdown();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) CancelJob(org.apache.flink.runtime.messages.JobManagerMessages.CancelJob) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) NotifyWhenJobStatus(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenJobStatus) Test(org.junit.Test)

Example 75 with JobGraph

use of org.apache.flink.runtime.jobgraph.JobGraph in project flink by apache.

the class StreamingJobGraphGenerator method createJobGraph.

public JobGraph createJobGraph() {
    jobGraph = new JobGraph(streamGraph.getJobName());
    // make sure that all vertices start immediately
    jobGraph.setScheduleMode(ScheduleMode.EAGER);
    init();
    // Generate deterministic hashes for the nodes in order to identify them across
    // submission iff they didn't change.
    Map<Integer, byte[]> hashes = defaultStreamGraphHasher.traverseStreamGraphAndGenerateHashes(streamGraph);
    // Generate legacy version hashes for backwards compatibility
    List<Map<Integer, byte[]>> legacyHashes = new ArrayList<>(legacyStreamGraphHashers.size());
    for (StreamGraphHasher hasher : legacyStreamGraphHashers) {
        legacyHashes.add(hasher.traverseStreamGraphAndGenerateHashes(streamGraph));
    }
    setChaining(hashes, legacyHashes);
    setPhysicalEdges();
    setSlotSharing();
    configureCheckpointing();
    // set the ExecutionConfig last when it has been finalized
    try {
        jobGraph.setExecutionConfig(streamGraph.getExecutionConfig());
    } catch (IOException e) {
        throw new IllegalConfigurationException("Could not serialize the ExecutionConfig." + "This indicates that non-serializable types (like custom serializers) were registered");
    }
    return jobGraph;
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ArrayList(java.util.ArrayList) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) IOException(java.io.IOException) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)131 Test (org.junit.Test)95 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)78 Configuration (org.apache.flink.configuration.Configuration)45 JobID (org.apache.flink.api.common.JobID)39 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)34 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)32 Deadline (scala.concurrent.duration.Deadline)31 FiniteDuration (scala.concurrent.duration.FiniteDuration)27 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)20 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)18 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)17 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)15 TestingCluster (org.apache.flink.runtime.testingUtils.TestingCluster)15 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)14 TestingJobManagerMessages (org.apache.flink.runtime.testingUtils.TestingJobManagerMessages)14 IOException (java.io.IOException)13 ActorRef (akka.actor.ActorRef)12 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)11 StreamGraph (org.apache.flink.streaming.api.graph.StreamGraph)11