Search in sources :

Example 21 with ClusterClient

use of org.apache.flink.client.program.ClusterClient in project flink by apache.

the class JMXJobManagerMetricTest method testJobManagerJMXMetricAccess.

/**
 * Tests that metrics registered on the JobManager are actually accessible via JMX.
 */
@Test
void testJobManagerJMXMetricAccess(@InjectClusterClient ClusterClient<?> client) throws Exception {
    Deadline deadline = Deadline.now().plus(Duration.ofMinutes(2));
    try {
        JobVertex sourceJobVertex = new JobVertex("Source");
        sourceJobVertex.setInvokableClass(BlockingInvokable.class);
        sourceJobVertex.setParallelism(1);
        final JobCheckpointingSettings jobCheckpointingSettings = new JobCheckpointingSettings(new CheckpointCoordinatorConfiguration(500, 500, 50, 5, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0, 0), null);
        final JobGraph jobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().setJobName("TestingJob").addJobVertex(sourceJobVertex).setJobCheckpointingSettings(jobCheckpointingSettings).build();
        client.submitJob(jobGraph).get();
        FutureUtils.retrySuccessfulWithDelay(() -> client.getJobStatus(jobGraph.getJobID()), Time.milliseconds(10), deadline, status -> status == JobStatus.RUNNING, TestingUtils.defaultScheduledExecutor()).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
        Set<ObjectName> nameSet = mBeanServer.queryNames(new ObjectName("org.apache.flink.jobmanager.job.lastCheckpointSize:job_name=TestingJob,*"), null);
        assertThat(nameSet).hasSize(1);
        assertThat(mBeanServer.getAttribute(nameSet.iterator().next(), "Value")).isEqualTo(-1L);
        BlockingInvokable.unblock();
    } finally {
        BlockingInvokable.unblock();
    }
}
Also used : OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Deadline(org.apache.flink.api.common.time.Deadline) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) JobStatus(org.apache.flink.api.common.JobStatus) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) InjectClusterClient(org.apache.flink.test.junit5.InjectClusterClient) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) RegisterExtension(org.junit.jupiter.api.extension.RegisterExtension) MBeanServer(javax.management.MBeanServer) Duration(java.time.Duration) ConfigConstants(org.apache.flink.configuration.ConfigConstants) ManagementFactory(java.lang.management.ManagementFactory) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) Configuration(org.apache.flink.configuration.Configuration) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) MiniClusterExtension(org.apache.flink.test.junit5.MiniClusterExtension) Set(java.util.Set) MetricOptions(org.apache.flink.configuration.MetricOptions) ObjectName(javax.management.ObjectName) Test(org.junit.jupiter.api.Test) TimeUnit(java.util.concurrent.TimeUnit) TestingUtils(org.apache.flink.testutils.TestingUtils) ClusterClient(org.apache.flink.client.program.ClusterClient) JMXReporter(org.apache.flink.metrics.jmx.JMXReporter) Time(org.apache.flink.api.common.time.Time) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy) Environment(org.apache.flink.runtime.execution.Environment) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Deadline(org.apache.flink.api.common.time.Deadline) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) MBeanServer(javax.management.MBeanServer) ObjectName(javax.management.ObjectName) Test(org.junit.jupiter.api.Test)

Example 22 with ClusterClient

use of org.apache.flink.client.program.ClusterClient in project flink by apache.

the class SavepointITCase method testStopWithFailingSourceInOnePipeline.

/**
 * FLINK-21030
 *
 * <p>Tests the handling of a failure that happened while stopping an embarrassingly parallel
 * job with a Savepoint. The test expects that the stopping action fails and all executions are
 * in state {@code RUNNING} afterwards.
 *
 * @param failingSource the failing {@link SourceFunction} used in one of the two pipelines.
 * @param expectedMaximumNumberOfRestarts the maximum number of restarts allowed by the restart
 *     strategy.
 * @param exceptionAssertion asserts the client-call exception to verify that the right error
 *     was handled.
 * @see SavepointITCase#failingPipelineLatch The latch used to trigger the successful start of
 *     the later on failing pipeline.
 * @see SavepointITCase#succeedingPipelineLatch The latch that triggers the successful start of
 *     the succeeding pipeline.
 * @throws Exception if an error occurred while running the test.
 */
private static void testStopWithFailingSourceInOnePipeline(InfiniteTestSource failingSource, File savepointDir, int expectedMaximumNumberOfRestarts, BiFunction<JobID, ExecutionException, Boolean> exceptionAssertion) throws Exception {
    MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().build());
    failingPipelineLatch = new OneShotLatch();
    succeedingPipelineLatch = new OneShotLatch();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(expectedMaximumNumberOfRestarts, 0));
    env.addSource(failingSource).name("Failing Source").map(value -> {
        failingPipelineLatch.trigger();
        return value;
    }).addSink(new DiscardingSink<>());
    env.addSource(new InfiniteTestSource()).name("Succeeding Source").map(value -> {
        succeedingPipelineLatch.trigger();
        return value;
    }).addSink(new DiscardingSink<>());
    final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    cluster.before();
    try {
        ClusterClient<?> client = cluster.getClusterClient();
        JobID jobID = client.submitJob(jobGraph).get();
        // we need to wait for both pipelines to be in state RUNNING because that's the only
        // state which allows creating a savepoint
        failingPipelineLatch.await();
        succeedingPipelineLatch.await();
        waitForAllTaskRunning(cluster.getMiniCluster(), jobID, false);
        try {
            client.stopWithSavepoint(jobGraph.getJobID(), false, savepointDir.getAbsolutePath(), SavepointFormatType.CANONICAL).get();
            fail("The future should fail exceptionally.");
        } catch (ExecutionException e) {
            assertThrowable(e, ex -> exceptionAssertion.apply(jobGraph.getJobID(), e));
        }
        waitUntilAllTasksAreRunning(cluster.getRestClusterClient(), jobGraph.getJobID());
    } finally {
        cluster.after();
    }
}
Also used : Arrays(java.util.Arrays) SharedObjects(org.apache.flink.testutils.junit.SharedObjects) MemorySize(org.apache.flink.configuration.MemorySize) EmptyRequestBody(org.apache.flink.runtime.rest.messages.EmptyRequestBody) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) ExceptionUtils.findThrowable(org.apache.flink.util.ExceptionUtils.findThrowable) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) TestUtils.submitJobAndWaitForResult(org.apache.flink.test.util.TestUtils.submitJobAndWaitForResult) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) Duration(java.time.Duration) Map(java.util.Map) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) ExceptionUtils.assertThrowable(org.apache.flink.util.ExceptionUtils.assertThrowable) RichSourceFunction(org.apache.flink.streaming.api.functions.source.RichSourceFunction) Path(java.nio.file.Path) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) FileSystemFactory(org.apache.flink.core.fs.FileSystemFactory) CountDownLatch(java.util.concurrent.CountDownLatch) JobMessageParameters(org.apache.flink.runtime.rest.messages.JobMessageParameters) Stream(java.util.stream.Stream) ValueState(org.apache.flink.api.common.state.ValueState) ClusterClient(org.apache.flink.client.program.ClusterClient) Assert.assertFalse(org.junit.Assert.assertFalse) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) Time(org.apache.flink.api.common.time.Time) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) FlinkException(org.apache.flink.util.FlinkException) LocalFileSystem(org.apache.flink.core.fs.local.LocalFileSystem) JobStatus(org.apache.flink.api.common.JobStatus) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) TypeSafeDiagnosingMatcher(org.hamcrest.TypeSafeDiagnosingMatcher) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Before(org.junit.Before) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) Files(java.nio.file.Files) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) File(java.io.File) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) Paths(java.nio.file.Paths) Matcher(org.hamcrest.Matcher) Assert(org.junit.Assert) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Assert.assertEquals(org.junit.Assert.assertEquals) StateBackendOptions(org.apache.flink.configuration.StateBackendOptions) EntropyInjectingTestFileSystem(org.apache.flink.testutils.EntropyInjectingTestFileSystem) Deadline(org.apache.flink.api.common.time.Deadline) ExceptionUtils.findThrowableWithMessage(org.apache.flink.util.ExceptionUtils.findThrowableWithMessage) ClusterOptions(org.apache.flink.configuration.ClusterOptions) FileUtils(org.apache.flink.util.FileUtils) URISyntaxException(java.net.URISyntaxException) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) Random(java.util.Random) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) MapFunction(org.apache.flink.api.common.functions.MapFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) CommonTestUtils.waitForAllTaskRunning(org.apache.flink.runtime.testutils.CommonTestUtils.waitForAllTaskRunning) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) TestLogger(org.apache.flink.util.TestLogger) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Assert.fail(org.junit.Assert.fail) URI(java.net.URI) KeySelector(org.apache.flink.api.java.functions.KeySelector) CheckpointedFunction(org.apache.flink.streaming.api.checkpoint.CheckpointedFunction) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) CheckpointingOptions(org.apache.flink.configuration.CheckpointingOptions) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) FileSystem(org.apache.flink.core.fs.FileSystem) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) Optional(java.util.Optional) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) RestClusterClient(org.apache.flink.client.program.rest.RestClusterClient) RestoreMode(org.apache.flink.runtime.jobgraph.RestoreMode) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) CompletableFuture.allOf(java.util.concurrent.CompletableFuture.allOf) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) JobDetailsHeaders(org.apache.flink.runtime.rest.messages.job.JobDetailsHeaders) SharedReference(org.apache.flink.testutils.junit.SharedReference) Description(org.hamcrest.Description) Logger(org.slf4j.Logger) LocalRecoverableWriter(org.apache.flink.core.fs.local.LocalRecoverableWriter) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) ExceptionUtils.assertThrowableWithMessage(org.apache.flink.util.ExceptionUtils.assertThrowableWithMessage) DataStream(org.apache.flink.streaming.api.datastream.DataStream) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) Ignore(org.junit.Ignore) ListCheckpointed(org.apache.flink.streaming.api.checkpoint.ListCheckpointed) FileVisitOption(java.nio.file.FileVisitOption) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID)

Aggregations

ClusterClient (org.apache.flink.client.program.ClusterClient)22 Configuration (org.apache.flink.configuration.Configuration)14 Test (org.junit.Test)14 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)10 IOException (java.io.IOException)7 JobID (org.apache.flink.api.common.JobID)7 Arrays (java.util.Arrays)6 CompletableFuture (java.util.concurrent.CompletableFuture)6 ValueState (org.apache.flink.api.common.state.ValueState)6 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)6 StandaloneClusterClient (org.apache.flink.client.program.StandaloneClusterClient)6 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)6 List (java.util.List)5 ListState (org.apache.flink.api.common.state.ListState)5 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)5 DataStream (org.apache.flink.streaming.api.datastream.DataStream)5 Collector (org.apache.flink.util.Collector)5 File (java.io.File)4 FileNotFoundException (java.io.FileNotFoundException)4 Collection (java.util.Collection)4