Search in sources :

Example 1 with JobSnapshottingSettings

use of org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings in project flink by apache.

the class JobManagerHARecoveryTest method testJobRecoveryWhenLosingLeadership.

/**
	 * Tests that the persisted job is not removed from the SubmittedJobGraphStore if the JobManager
	 * loses its leadership. Furthermore, it tests that the job manager can recover the job from
	 * the SubmittedJobGraphStore and checkpoint state is recovered as well.
	 */
@Test
public void testJobRecoveryWhenLosingLeadership() throws Exception {
    FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
    FiniteDuration jobRecoveryTimeout = new FiniteDuration(3, TimeUnit.SECONDS);
    Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
    Configuration flinkConfiguration = new Configuration();
    UUID leaderSessionID = UUID.randomUUID();
    UUID newLeaderSessionID = UUID.randomUUID();
    int slots = 2;
    ActorRef archive = null;
    ActorRef jobManager = null;
    ActorRef taskManager = null;
    flinkConfiguration.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
    flinkConfiguration.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().toString());
    flinkConfiguration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, slots);
    try {
        Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
        MySubmittedJobGraphStore mySubmittedJobGraphStore = new MySubmittedJobGraphStore();
        MyCheckpointStore checkpointStore = new MyCheckpointStore();
        CheckpointIDCounter checkpointCounter = new StandaloneCheckpointIDCounter();
        CheckpointRecoveryFactory checkpointStateFactory = new MyCheckpointRecoveryFactory(checkpointStore, checkpointCounter);
        TestingLeaderElectionService myLeaderElectionService = new TestingLeaderElectionService();
        TestingLeaderRetrievalService myLeaderRetrievalService = new TestingLeaderRetrievalService();
        InstanceManager instanceManager = new InstanceManager();
        instanceManager.addInstanceListener(scheduler);
        archive = system.actorOf(Props.create(MemoryArchivist.class, 10));
        Props jobManagerProps = Props.create(TestingJobManager.class, flinkConfiguration, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), instanceManager, scheduler, new BlobLibraryCacheManager(new BlobServer(flinkConfiguration), 3600000), archive, new FixedDelayRestartStrategy.FixedDelayRestartStrategyFactory(Int.MaxValue(), 100), timeout, myLeaderElectionService, mySubmittedJobGraphStore, checkpointStateFactory, jobRecoveryTimeout, Option.apply(null));
        jobManager = system.actorOf(jobManagerProps);
        ActorGateway gateway = new AkkaActorGateway(jobManager, leaderSessionID);
        taskManager = TaskManager.startTaskManagerComponentsAndActor(flinkConfiguration, ResourceID.generate(), system, "localhost", Option.apply("taskmanager"), Option.apply((LeaderRetrievalService) myLeaderRetrievalService), true, TestingTaskManager.class);
        ActorGateway tmGateway = new AkkaActorGateway(taskManager, leaderSessionID);
        Future<Object> tmAlive = tmGateway.ask(TestingMessages.getAlive(), deadline.timeLeft());
        Await.ready(tmAlive, deadline.timeLeft());
        JobVertex sourceJobVertex = new JobVertex("Source");
        sourceJobVertex.setInvokableClass(BlockingStatefulInvokable.class);
        sourceJobVertex.setParallelism(slots);
        JobGraph jobGraph = new JobGraph("TestingJob", sourceJobVertex);
        List<JobVertexID> vertexId = Collections.singletonList(sourceJobVertex.getID());
        jobGraph.setSnapshotSettings(new JobSnapshottingSettings(vertexId, vertexId, vertexId, 100, 10 * 60 * 1000, 0, 1, ExternalizedCheckpointSettings.none(), null, true));
        BlockingStatefulInvokable.initializeStaticHelpers(slots);
        Future<Object> isLeader = gateway.ask(TestingJobManagerMessages.getNotifyWhenLeader(), deadline.timeLeft());
        Future<Object> isConnectedToJobManager = tmGateway.ask(new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager), deadline.timeLeft());
        // tell jobManager that he's the leader
        myLeaderElectionService.isLeader(leaderSessionID);
        // tell taskManager who's the leader
        myLeaderRetrievalService.notifyListener(gateway.path(), leaderSessionID);
        Await.ready(isLeader, deadline.timeLeft());
        Await.ready(isConnectedToJobManager, deadline.timeLeft());
        // submit blocking job
        Future<Object> jobSubmitted = gateway.ask(new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED), deadline.timeLeft());
        Await.ready(jobSubmitted, deadline.timeLeft());
        // Wait for some checkpoints to complete
        BlockingStatefulInvokable.awaitCompletedCheckpoints();
        Future<Object> jobRemoved = gateway.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID()), deadline.timeLeft());
        // Revoke leadership
        myLeaderElectionService.notLeader();
        // check that the job gets removed from the JobManager
        Await.ready(jobRemoved, deadline.timeLeft());
        // but stays in the submitted job graph store
        assertTrue(mySubmittedJobGraphStore.contains(jobGraph.getJobID()));
        Future<Object> jobRunning = gateway.ask(new TestingJobManagerMessages.NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.RUNNING), deadline.timeLeft());
        // Make JobManager again a leader
        myLeaderElectionService.isLeader(newLeaderSessionID);
        // tell the TaskManager about it
        myLeaderRetrievalService.notifyListener(gateway.path(), newLeaderSessionID);
        // wait that the job is recovered and reaches state RUNNING
        Await.ready(jobRunning, deadline.timeLeft());
        Future<Object> jobFinished = gateway.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID()), deadline.timeLeft());
        BlockingInvokable.unblock();
        // wait til the job has finished
        Await.ready(jobFinished, deadline.timeLeft());
        // check that the job has been removed from the submitted job graph store
        assertFalse(mySubmittedJobGraphStore.contains(jobGraph.getJobID()));
        // Check that state has been recovered
        long[] recoveredStates = BlockingStatefulInvokable.getRecoveredStates();
        for (long state : recoveredStates) {
            boolean isExpected = state >= BlockingStatefulInvokable.NUM_CHECKPOINTS_TO_COMPLETE;
            assertTrue("Did not recover checkpoint state correctly, expecting >= " + BlockingStatefulInvokable.NUM_CHECKPOINTS_TO_COMPLETE + ", but state was " + state, isExpected);
        }
    } finally {
        if (archive != null) {
            archive.tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
        if (jobManager != null) {
            jobManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
        if (taskManager != null) {
            taskManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
    }
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) ActorRef(akka.actor.ActorRef) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) InstanceManager(org.apache.flink.runtime.instance.InstanceManager) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Props(akka.actor.Props) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TestingTaskManager(org.apache.flink.runtime.testingUtils.TestingTaskManager) BlobServer(org.apache.flink.runtime.blob.BlobServer) CheckpointIDCounter(org.apache.flink.runtime.checkpoint.CheckpointIDCounter) StandaloneCheckpointIDCounter(org.apache.flink.runtime.checkpoint.StandaloneCheckpointIDCounter) UUID(java.util.UUID) TestingTaskManagerMessages(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) Deadline(scala.concurrent.duration.Deadline) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StandaloneCheckpointIDCounter(org.apache.flink.runtime.checkpoint.StandaloneCheckpointIDCounter) Test(org.junit.Test)

Example 2 with JobSnapshottingSettings

use of org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings in project flink by apache.

the class ExecutionGraphBuilder method buildGraph.

/**
	 * Builds the ExecutionGraph from the JobGraph.
	 * If a prior execution graph exists, the JobGraph will be attached. If no prior execution
	 * graph exists, then the JobGraph will become attach to a new empty execution graph.
	 */
public static ExecutionGraph buildGraph(@Nullable ExecutionGraph prior, JobGraph jobGraph, Configuration jobManagerConfig, ScheduledExecutorService futureExecutor, Executor ioExecutor, SlotProvider slotProvider, ClassLoader classLoader, CheckpointRecoveryFactory recoveryFactory, Time timeout, RestartStrategy restartStrategy, MetricGroup metrics, int parallelismForAutoMax, Logger log) throws JobExecutionException, JobException {
    checkNotNull(jobGraph, "job graph cannot be null");
    final String jobName = jobGraph.getName();
    final JobID jobId = jobGraph.getJobID();
    // create a new execution graph, if none exists so far
    final ExecutionGraph executionGraph;
    try {
        executionGraph = (prior != null) ? prior : new ExecutionGraph(futureExecutor, ioExecutor, jobId, jobName, jobGraph.getJobConfiguration(), jobGraph.getSerializedExecutionConfig(), timeout, restartStrategy, jobGraph.getUserJarBlobKeys(), jobGraph.getClasspaths(), slotProvider, classLoader, metrics);
    } catch (IOException e) {
        throw new JobException("Could not create the execution graph.", e);
    }
    // set the basic properties
    executionGraph.setScheduleMode(jobGraph.getScheduleMode());
    executionGraph.setQueuedSchedulingAllowed(jobGraph.getAllowQueuedScheduling());
    try {
        executionGraph.setJsonPlan(JsonPlanGenerator.generatePlan(jobGraph));
    } catch (Throwable t) {
        log.warn("Cannot create JSON plan for job", t);
        // give the graph an empty plan
        executionGraph.setJsonPlan("{}");
    }
    // initialize the vertices that have a master initialization hook
    // file output formats create directories here, input formats create splits
    final long initMasterStart = System.nanoTime();
    log.info("Running initialization on master for job {} ({}).", jobName, jobId);
    for (JobVertex vertex : jobGraph.getVertices()) {
        String executableClass = vertex.getInvokableClassName();
        if (executableClass == null || executableClass.isEmpty()) {
            throw new JobSubmissionException(jobId, "The vertex " + vertex.getID() + " (" + vertex.getName() + ") has no invokable class.");
        }
        if (vertex.getParallelism() == ExecutionConfig.PARALLELISM_AUTO_MAX) {
            vertex.setParallelism(parallelismForAutoMax);
        }
        try {
            vertex.initializeOnMaster(classLoader);
        } catch (Throwable t) {
            throw new JobExecutionException(jobId, "Cannot initialize task '" + vertex.getName() + "': " + t.getMessage(), t);
        }
    }
    log.info("Successfully ran initialization on master in {} ms.", (System.nanoTime() - initMasterStart) / 1_000_000);
    // topologically sort the job vertices and attach the graph to the existing one
    List<JobVertex> sortedTopology = jobGraph.getVerticesSortedTopologicallyFromSources();
    if (log.isDebugEnabled()) {
        log.debug("Adding {} vertices from job graph {} ({}).", sortedTopology.size(), jobName, jobId);
    }
    executionGraph.attachJobGraph(sortedTopology);
    if (log.isDebugEnabled()) {
        log.debug("Successfully created execution graph from job graph {} ({}).", jobName, jobId);
    }
    // configure the state checkpointing
    JobSnapshottingSettings snapshotSettings = jobGraph.getSnapshotSettings();
    if (snapshotSettings != null) {
        List<ExecutionJobVertex> triggerVertices = idToVertex(snapshotSettings.getVerticesToTrigger(), executionGraph);
        List<ExecutionJobVertex> ackVertices = idToVertex(snapshotSettings.getVerticesToAcknowledge(), executionGraph);
        List<ExecutionJobVertex> confirmVertices = idToVertex(snapshotSettings.getVerticesToConfirm(), executionGraph);
        CompletedCheckpointStore completedCheckpoints;
        CheckpointIDCounter checkpointIdCounter;
        try {
            int maxNumberOfCheckpointsToRetain = jobManagerConfig.getInteger(CoreOptions.MAX_RETAINED_CHECKPOINTS);
            if (maxNumberOfCheckpointsToRetain <= 0) {
                // warning and use 1 as the default value if the setting in
                // state.checkpoints.max-retained-checkpoints is not greater than 0.
                log.warn("The setting for '{} : {}' is invalid. Using default value of {}", CoreOptions.MAX_RETAINED_CHECKPOINTS.key(), maxNumberOfCheckpointsToRetain, CoreOptions.MAX_RETAINED_CHECKPOINTS.defaultValue());
                maxNumberOfCheckpointsToRetain = CoreOptions.MAX_RETAINED_CHECKPOINTS.defaultValue();
            }
            completedCheckpoints = recoveryFactory.createCheckpointStore(jobId, maxNumberOfCheckpointsToRetain, classLoader);
            checkpointIdCounter = recoveryFactory.createCheckpointIDCounter(jobId);
        } catch (Exception e) {
            throw new JobExecutionException(jobId, "Failed to initialize high-availability checkpoint handler", e);
        }
        // Maximum number of remembered checkpoints
        int historySize = jobManagerConfig.getInteger(ConfigConstants.JOB_MANAGER_WEB_CHECKPOINTS_HISTORY_SIZE, ConfigConstants.DEFAULT_JOB_MANAGER_WEB_CHECKPOINTS_HISTORY_SIZE);
        CheckpointStatsTracker checkpointStatsTracker = new CheckpointStatsTracker(historySize, ackVertices, snapshotSettings, metrics);
        // The default directory for externalized checkpoints
        String externalizedCheckpointsDir = jobManagerConfig.getString(ConfigConstants.CHECKPOINTS_DIRECTORY_KEY, null);
        // load the state backend for checkpoint metadata.
        // if specified in the application, use from there, otherwise load from configuration
        final StateBackend metadataBackend;
        final StateBackend applicationConfiguredBackend = snapshotSettings.getDefaultStateBackend();
        if (applicationConfiguredBackend != null) {
            metadataBackend = applicationConfiguredBackend;
            log.info("Using application-defined state backend for checkpoint/savepoint metadata: {}.", applicationConfiguredBackend);
        } else {
            try {
                metadataBackend = AbstractStateBackend.loadStateBackendFromConfigOrCreateDefault(jobManagerConfig, classLoader, log);
            } catch (IllegalConfigurationException | IOException | DynamicCodeLoadingException e) {
                throw new JobExecutionException(jobId, "Could not instantiate configured state backend", e);
            }
        }
        executionGraph.enableCheckpointing(snapshotSettings.getCheckpointInterval(), snapshotSettings.getCheckpointTimeout(), snapshotSettings.getMinPauseBetweenCheckpoints(), snapshotSettings.getMaxConcurrentCheckpoints(), snapshotSettings.getExternalizedCheckpointSettings(), triggerVertices, ackVertices, confirmVertices, checkpointIdCounter, completedCheckpoints, externalizedCheckpointsDir, metadataBackend, checkpointStatsTracker);
    }
    return executionGraph;
}
Also used : CheckpointStatsTracker(org.apache.flink.runtime.checkpoint.CheckpointStatsTracker) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) IOException(java.io.IOException) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) JobException(org.apache.flink.runtime.JobException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) DynamicCodeLoadingException(org.apache.flink.util.DynamicCodeLoadingException) StateBackend(org.apache.flink.runtime.state.StateBackend) AbstractStateBackend(org.apache.flink.runtime.state.AbstractStateBackend) JobException(org.apache.flink.runtime.JobException) DynamicCodeLoadingException(org.apache.flink.util.DynamicCodeLoadingException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) CheckpointIDCounter(org.apache.flink.runtime.checkpoint.CheckpointIDCounter) JobID(org.apache.flink.api.common.JobID) CompletedCheckpointStore(org.apache.flink.runtime.checkpoint.CompletedCheckpointStore)

Example 3 with JobSnapshottingSettings

use of org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings in project flink by apache.

the class CheckpointConfigHandlerTest method createGraphAndSettings.

private static GraphAndSettings createGraphAndSettings(boolean externalized, boolean exactlyOnce) {
    long interval = 18231823L;
    long timeout = 996979L;
    long minPause = 119191919L;
    int maxConcurrent = 12929329;
    ExternalizedCheckpointSettings externalizedSetting = externalized ? ExternalizedCheckpointSettings.externalizeCheckpoints(true) : ExternalizedCheckpointSettings.none();
    JobSnapshottingSettings settings = new JobSnapshottingSettings(Collections.<JobVertexID>emptyList(), Collections.<JobVertexID>emptyList(), Collections.<JobVertexID>emptyList(), interval, timeout, minPause, maxConcurrent, externalizedSetting, null, exactlyOnce);
    AccessExecutionGraph graph = mock(AccessExecutionGraph.class);
    when(graph.getJobSnapshottingSettings()).thenReturn(settings);
    return new GraphAndSettings(graph, settings, externalizedSetting);
}
Also used : ExternalizedCheckpointSettings(org.apache.flink.runtime.jobgraph.tasks.ExternalizedCheckpointSettings) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) AccessExecutionGraph(org.apache.flink.runtime.executiongraph.AccessExecutionGraph)

Example 4 with JobSnapshottingSettings

use of org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings in project flink by apache.

the class CoordinatorShutdownTest method testCoordinatorShutsDownOnSuccess.

@Test
public void testCoordinatorShutsDownOnSuccess() {
    LocalFlinkMiniCluster cluster = null;
    try {
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
        cluster = new LocalFlinkMiniCluster(config, true);
        cluster.start();
        // build a test graph with snapshotting enabled
        JobVertex vertex = new JobVertex("Test Vertex");
        vertex.setInvokableClass(BlockingInvokable.class);
        List<JobVertexID> vertexIdList = Collections.singletonList(vertex.getID());
        JobGraph testGraph = new JobGraph("test job", vertex);
        testGraph.setSnapshotSettings(new JobSnapshottingSettings(vertexIdList, vertexIdList, vertexIdList, 5000, 60000, 0L, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true));
        ActorGateway jmGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
        FiniteDuration timeout = new FiniteDuration(60, TimeUnit.SECONDS);
        JobManagerMessages.SubmitJob submitMessage = new JobManagerMessages.SubmitJob(testGraph, ListeningBehaviour.EXECUTION_RESULT);
        // submit is successful, but then the job blocks due to the invokable
        Future<Object> submitFuture = jmGateway.ask(submitMessage, timeout);
        Await.result(submitFuture, timeout);
        // get the execution graph and store the ExecutionGraph reference
        Future<Object> jobRequestFuture = jmGateway.ask(new JobManagerMessages.RequestJob(testGraph.getJobID()), timeout);
        ExecutionGraph graph = (ExecutionGraph) ((JobManagerMessages.JobFound) Await.result(jobRequestFuture, timeout)).executionGraph();
        assertNotNull(graph);
        BlockingInvokable.unblock();
        graph.waitUntilFinished();
        // verify that the coordinator was shut down
        CheckpointCoordinator coord = graph.getCheckpointCoordinator();
        assertTrue(coord == null || coord.isShutdown());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (cluster != null) {
            cluster.shutdown();
            cluster.awaitTermination();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) LocalFlinkMiniCluster(org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 5 with JobSnapshottingSettings

use of org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings in project flink by apache.

the class JMXJobManagerMetricTest method testJobManagerJMXMetricAccess.

/**
	 * Tests that metrics registered on the JobManager are actually accessible via JMX.
	 *
	 * @throws Exception
	 */
@Test
public void testJobManagerJMXMetricAccess() throws Exception {
    Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
    Configuration flinkConfiguration = new Configuration();
    flinkConfiguration.setString(ConfigConstants.METRICS_REPORTERS_LIST, "test");
    flinkConfiguration.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, JMXReporter.class.getName());
    flinkConfiguration.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test.port", "9060-9075");
    flinkConfiguration.setString(ConfigConstants.METRICS_SCOPE_NAMING_JM_JOB, "jobmanager.<job_name>");
    TestingCluster flink = new TestingCluster(flinkConfiguration);
    try {
        flink.start();
        JobVertex sourceJobVertex = new JobVertex("Source");
        sourceJobVertex.setInvokableClass(BlockingInvokable.class);
        JobGraph jobGraph = new JobGraph("TestingJob", sourceJobVertex);
        jobGraph.setSnapshotSettings(new JobSnapshottingSettings(Collections.<JobVertexID>emptyList(), Collections.<JobVertexID>emptyList(), Collections.<JobVertexID>emptyList(), 500, 500, 50, 5, ExternalizedCheckpointSettings.none(), null, true));
        flink.waitForActorsToBeAlive();
        flink.submitJobDetached(jobGraph);
        Future<Object> jobRunning = flink.getLeaderGateway(deadline.timeLeft()).ask(new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID()), deadline.timeLeft());
        Await.ready(jobRunning, deadline.timeLeft());
        MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
        Set<ObjectName> nameSet = mBeanServer.queryNames(new ObjectName("org.apache.flink.jobmanager.job.lastCheckpointSize:job_name=TestingJob,*"), null);
        Assert.assertEquals(1, nameSet.size());
        assertEquals(-1L, mBeanServer.getAttribute(nameSet.iterator().next(), "Value"));
        Future<Object> jobFinished = flink.getLeaderGateway(deadline.timeLeft()).ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID()), deadline.timeLeft());
        BlockingInvokable.unblock();
        // wait til the job has finished
        Await.ready(jobFinished, deadline.timeLeft());
    } finally {
        flink.stop();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Deadline(scala.concurrent.duration.Deadline) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) FiniteDuration(scala.concurrent.duration.FiniteDuration) JMXReporter(org.apache.flink.metrics.jmx.JMXReporter) ObjectName(javax.management.ObjectName) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) MBeanServer(javax.management.MBeanServer) Test(org.junit.Test)

Aggregations

JobSnapshottingSettings (org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings)18 Test (org.junit.Test)12 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)11 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)11 Configuration (org.apache.flink.configuration.Configuration)8 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)8 FiniteDuration (scala.concurrent.duration.FiniteDuration)8 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)7 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)7 TestingJobManagerMessages (org.apache.flink.runtime.testingUtils.TestingJobManagerMessages)6 ActorRef (akka.actor.ActorRef)5 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)5 ActorSystem (akka.actor.ActorSystem)4 ExternalizedCheckpointSettings (org.apache.flink.runtime.jobgraph.tasks.ExternalizedCheckpointSettings)4 StandaloneLeaderRetrievalService (org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService)4 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)4 WaitForAllVerticesToBeRunning (org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning)4 File (java.io.File)3 JobID (org.apache.flink.api.common.JobID)3 AccessExecutionGraph (org.apache.flink.runtime.executiongraph.AccessExecutionGraph)3