Search in sources :

Example 1 with FlinkDeploymentStatus

use of org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus in project flink-kubernetes-operator by apache.

the class ObserverTest method observeApplicationCluster.

@Test
public void observeApplicationCluster() {
    TestingFlinkService flinkService = new TestingFlinkService();
    Observer observer = new Observer(flinkService, FlinkOperatorConfiguration.fromConfiguration(new Configuration()));
    FlinkDeployment deployment = TestUtils.buildApplicationCluster();
    Configuration conf = FlinkUtils.getEffectiveConfig(deployment, new Configuration());
    observer.observe(deployment, TestUtils.createEmptyContext(), conf);
    deployment.setStatus(new FlinkDeploymentStatus());
    deployment.getStatus().getReconciliationStatus().setLastReconciledSpec(deployment.getSpec());
    deployment.getStatus().setJobStatus(new JobStatus());
    flinkService.submitApplicationCluster(deployment, conf);
    // Validate port check logic
    flinkService.setPortReady(false);
    // Port not ready
    observer.observe(deployment, readyContext, conf);
    assertEquals(JobManagerDeploymentStatus.DEPLOYING, deployment.getStatus().getJobManagerDeploymentStatus());
    observer.observe(deployment, readyContext, conf);
    assertEquals(JobManagerDeploymentStatus.DEPLOYING, deployment.getStatus().getJobManagerDeploymentStatus());
    flinkService.setPortReady(true);
    // Port ready but we have to recheck once again
    observer.observe(deployment, readyContext, conf);
    assertEquals(JobManagerDeploymentStatus.DEPLOYED_NOT_READY, deployment.getStatus().getJobManagerDeploymentStatus());
    // Stable ready
    observer.observe(deployment, readyContext, conf);
    assertEquals(JobManagerDeploymentStatus.READY, deployment.getStatus().getJobManagerDeploymentStatus());
    assertEquals(JobState.RUNNING.name(), deployment.getStatus().getJobStatus().getState());
    observer.observe(deployment, readyContext, conf);
    assertEquals(JobManagerDeploymentStatus.READY, deployment.getStatus().getJobManagerDeploymentStatus());
    assertEquals(JobState.RUNNING.name(), deployment.getStatus().getJobStatus().getState());
    assertEquals(deployment.getMetadata().getName(), deployment.getStatus().getJobStatus().getJobName());
    // Test listing failure
    flinkService.clear();
    observer.observe(deployment, readyContext, conf);
    assertEquals(JobManagerDeploymentStatus.READY, deployment.getStatus().getJobManagerDeploymentStatus());
    assertEquals(Observer.JOB_STATE_UNKNOWN, deployment.getStatus().getJobStatus().getState());
}
Also used : JobStatus(org.apache.flink.kubernetes.operator.crd.status.JobStatus) FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) FlinkDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus) Configuration(org.apache.flink.configuration.Configuration) FlinkOperatorConfiguration(org.apache.flink.kubernetes.operator.config.FlinkOperatorConfiguration) TestingFlinkService(org.apache.flink.kubernetes.operator.TestingFlinkService) Test(org.junit.jupiter.api.Test)

Example 2 with FlinkDeploymentStatus

use of org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus in project flink-kubernetes-operator by apache.

the class DeploymentValidatorTest method testValidation.

@Test
public void testValidation() {
    testSuccess(dep -> {
    });
    // Test job validation
    testError(dep -> dep.getSpec().getJob().setJarURI(null), "Jar URI must be defined");
    testError(dep -> dep.getSpec().getJob().setState(JobState.SUSPENDED), "Job must start in running state");
    testError(dep -> dep.getSpec().getJob().setParallelism(0), "Job parallelism must be larger than 0");
    testError(dep -> dep.getSpec().getJob().setParallelism(-1), "Job parallelism must be larger than 0");
    testError(dep -> {
        dep.getSpec().setFlinkConfiguration(new HashMap<>());
        dep.getSpec().getJob().setUpgradeMode(UpgradeMode.LAST_STATE);
    }, "Job could not be upgraded with last-state while HA disabled");
    // Test conf validation
    testSuccess(dep -> dep.getSpec().setFlinkConfiguration(Collections.singletonMap("random", "config")));
    testError(dep -> dep.getSpec().setFlinkConfiguration(Collections.singletonMap(KubernetesConfigOptions.NAMESPACE.key(), "myns")), "Forbidden Flink config key");
    // Test log config validation
    testSuccess(dep -> dep.getSpec().setLogConfiguration(Map.of(Constants.CONFIG_FILE_LOG4J_NAME, "rootLogger.level = INFO")));
    testError(dep -> dep.getSpec().setLogConfiguration(Map.of("random", "config")), "Invalid log config key");
    testError(dep -> {
        dep.getSpec().setFlinkConfiguration(new HashMap<>());
        dep.getSpec().getJobManager().setReplicas(2);
    }, "High availability should be enabled when starting standby JobManagers.");
    testError(dep -> dep.getSpec().getJobManager().setReplicas(0), "JobManager replicas should not be configured less than one.");
    // Test resource validation
    testSuccess(dep -> dep.getSpec().getTaskManager().getResource().setMemory("1G"));
    testSuccess(dep -> dep.getSpec().getTaskManager().getResource().setMemory("100"));
    testError(dep -> dep.getSpec().getTaskManager().getResource().setMemory("invalid"), "TaskManager resource memory parse error");
    testError(dep -> dep.getSpec().getJobManager().getResource().setMemory("invalid"), "JobManager resource memory parse error");
    testError(dep -> dep.getSpec().getTaskManager().getResource().setMemory(null), "TaskManager resource memory must be defined");
    testError(dep -> dep.getSpec().getJobManager().getResource().setMemory(null), "JobManager resource memory must be defined");
    // Test savepoint restore validation
    testSuccess(dep -> {
        dep.setStatus(new FlinkDeploymentStatus());
        dep.getStatus().setJobStatus(new JobStatus());
        dep.getStatus().getJobStatus().getSavepointInfo().setLastSavepoint(Savepoint.of("sp"));
        dep.getStatus().setReconciliationStatus(new ReconciliationStatus());
        dep.getStatus().getReconciliationStatus().setLastReconciledSpec(ReconciliationUtils.clone(dep.getSpec()));
        dep.getStatus().getReconciliationStatus().getLastReconciledSpec().getJob().setState(JobState.SUSPENDED);
        dep.getSpec().getJob().setUpgradeMode(UpgradeMode.SAVEPOINT);
    });
    testError(dep -> {
        dep.setStatus(new FlinkDeploymentStatus());
        dep.getStatus().setJobStatus(new JobStatus());
        dep.getStatus().setReconciliationStatus(new ReconciliationStatus());
        dep.getStatus().getReconciliationStatus().setLastReconciledSpec(ReconciliationUtils.clone(dep.getSpec()));
        dep.getStatus().getReconciliationStatus().getLastReconciledSpec().getJob().setState(JobState.SUSPENDED);
        dep.getSpec().getJob().setUpgradeMode(UpgradeMode.SAVEPOINT);
    }, "Cannot perform savepoint restore without a valid savepoint");
    // Test cluster type validation
    testError(dep -> {
        dep.setStatus(new FlinkDeploymentStatus());
        dep.getStatus().setJobStatus(new JobStatus());
        dep.getStatus().setReconciliationStatus(new ReconciliationStatus());
        dep.getStatus().getReconciliationStatus().setLastReconciledSpec(ReconciliationUtils.clone(dep.getSpec()));
        dep.getSpec().setJob(null);
    }, "Cannot switch from job to session cluster");
    testError(dep -> {
        dep.setStatus(new FlinkDeploymentStatus());
        dep.getStatus().setJobStatus(new JobStatus());
        dep.getStatus().setReconciliationStatus(new ReconciliationStatus());
        dep.getStatus().getReconciliationStatus().setLastReconciledSpec(ReconciliationUtils.clone(dep.getSpec()));
        dep.getStatus().getReconciliationStatus().getLastReconciledSpec().setJob(null);
    }, "Cannot switch from session to job cluster");
}
Also used : JobStatus(org.apache.flink.kubernetes.operator.crd.status.JobStatus) FlinkDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus) ReconciliationStatus(org.apache.flink.kubernetes.operator.crd.status.ReconciliationStatus) Test(org.junit.jupiter.api.Test)

Example 3 with FlinkDeploymentStatus

use of org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus in project flink-kubernetes-operator by apache.

the class TestUtils method buildSessionCluster.

public static FlinkDeployment buildSessionCluster() {
    FlinkDeployment deployment = new FlinkDeployment();
    deployment.setStatus(new FlinkDeploymentStatus());
    deployment.setMetadata(new ObjectMetaBuilder().withName("test-cluster").withNamespace(TEST_NAMESPACE).build());
    deployment.setSpec(getTestFlinkDeploymentSpec());
    return deployment;
}
Also used : FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) FlinkDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus) ObjectMetaBuilder(io.fabric8.kubernetes.api.model.ObjectMetaBuilder)

Example 4 with FlinkDeploymentStatus

use of org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus in project flink-kubernetes-operator by apache.

the class ApplicationReconcilerTest method testRandomJobResultStorePath.

@Test
public void testRandomJobResultStorePath() throws Exception {
    TestingFlinkService flinkService = new TestingFlinkService();
    ApplicationReconciler reconciler = new ApplicationReconciler(kubernetesClient, flinkService, configManager);
    FlinkDeployment flinkApp = TestUtils.buildApplicationCluster();
    final String haStoragePath = "file:///flink-data/ha";
    flinkApp.getSpec().getFlinkConfiguration().put(HighAvailabilityOptions.HA_STORAGE_PATH.key(), haStoragePath);
    ObjectMeta deployMeta = flinkApp.getMetadata();
    FlinkDeploymentStatus status = flinkApp.getStatus();
    FlinkDeploymentSpec spec = flinkApp.getSpec();
    JobSpec jobSpec = spec.getJob();
    Configuration deployConfig = configManager.getDeployConfig(deployMeta, spec);
    status.getJobStatus().setState(org.apache.flink.api.common.JobStatus.FINISHED.name());
    status.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.READY);
    reconciler.deployFlinkJob(deployMeta, jobSpec, status, deployConfig, Optional.empty(), false);
    String path1 = deployConfig.get(JobResultStoreOptions.STORAGE_PATH);
    Assertions.assertTrue(path1.startsWith(haStoragePath));
    status.getJobStatus().setState(org.apache.flink.api.common.JobStatus.FINISHED.name());
    status.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.READY);
    reconciler.deployFlinkJob(deployMeta, jobSpec, status, deployConfig, Optional.empty(), false);
    String path2 = deployConfig.get(JobResultStoreOptions.STORAGE_PATH);
    Assertions.assertTrue(path2.startsWith(haStoragePath));
    assertNotEquals(path1, path2);
}
Also used : ObjectMeta(io.fabric8.kubernetes.api.model.ObjectMeta) FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) FlinkDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus) FlinkDeploymentSpec(org.apache.flink.kubernetes.operator.crd.spec.FlinkDeploymentSpec) Configuration(org.apache.flink.configuration.Configuration) TestingFlinkService(org.apache.flink.kubernetes.operator.TestingFlinkService) JobSpec(org.apache.flink.kubernetes.operator.crd.spec.JobSpec) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 5 with FlinkDeploymentStatus

use of org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus in project flink-kubernetes-operator by apache.

the class ApplicationReconciler method reconcile.

@Override
public void reconcile(FlinkDeployment flinkApp, Context context) throws Exception {
    ObjectMeta deployMeta = flinkApp.getMetadata();
    FlinkDeploymentStatus status = flinkApp.getStatus();
    ReconciliationStatus<FlinkDeploymentSpec> reconciliationStatus = status.getReconciliationStatus();
    FlinkDeploymentSpec lastReconciledSpec = reconciliationStatus.deserializeLastReconciledSpec();
    FlinkDeploymentSpec currentDeploySpec = flinkApp.getSpec();
    JobSpec desiredJobSpec = currentDeploySpec.getJob();
    Configuration deployConfig = configManager.getDeployConfig(deployMeta, currentDeploySpec);
    if (lastReconciledSpec == null) {
        LOG.debug("Deploying application for the first time");
        deployFlinkJob(deployMeta, desiredJobSpec, status, deployConfig, Optional.ofNullable(desiredJobSpec.getInitialSavepointPath()), false);
        IngressUtils.updateIngressRules(deployMeta, currentDeploySpec, deployConfig, kubernetesClient);
        ReconciliationUtils.updateForSpecReconciliationSuccess(flinkApp, JobState.RUNNING);
        return;
    }
    if (!deployConfig.getBoolean(KubernetesOperatorConfigOptions.JOB_UPGRADE_IGNORE_PENDING_SAVEPOINT) && SavepointUtils.savepointInProgress(status.getJobStatus())) {
        LOG.info("Delaying job reconciliation until pending savepoint is completed.");
        return;
    }
    Configuration observeConfig = configManager.getObserveConfig(flinkApp);
    boolean specChanged = !currentDeploySpec.equals(lastReconciledSpec);
    if (specChanged) {
        if (newSpecIsAlreadyDeployed(flinkApp)) {
            return;
        }
        LOG.debug("Detected spec change, starting upgrade process.");
        JobState currentJobState = lastReconciledSpec.getJob().getState();
        JobState desiredJobState = desiredJobSpec.getState();
        JobState stateAfterReconcile = currentJobState;
        if (currentJobState == JobState.RUNNING) {
            if (desiredJobState == JobState.RUNNING) {
                LOG.info("Upgrading/Restarting running job, suspending first...");
            }
            Optional<UpgradeMode> availableUpgradeMode = getAvailableUpgradeMode(flinkApp, deployConfig);
            if (availableUpgradeMode.isEmpty()) {
                return;
            }
            // We must record the upgrade mode used to the status later
            desiredJobSpec.setUpgradeMode(availableUpgradeMode.get());
            flinkService.cancelJob(flinkApp, availableUpgradeMode.get());
            stateAfterReconcile = JobState.SUSPENDED;
        }
        if (currentJobState == JobState.SUSPENDED && desiredJobState == JobState.RUNNING) {
            restoreJob(deployMeta, desiredJobSpec, status, deployConfig, // We decide to enforce HA based on how job was previously suspended
            lastReconciledSpec.getJob().getUpgradeMode() == UpgradeMode.LAST_STATE);
            stateAfterReconcile = JobState.RUNNING;
        }
        ReconciliationUtils.updateForSpecReconciliationSuccess(flinkApp, stateAfterReconcile);
        IngressUtils.updateIngressRules(deployMeta, currentDeploySpec, deployConfig, kubernetesClient);
    } else if (ReconciliationUtils.shouldRollBack(flinkService, reconciliationStatus, observeConfig)) {
        rollbackApplication(flinkApp);
    } else if (ReconciliationUtils.shouldRecoverDeployment(observeConfig, flinkApp)) {
        recoverJmDeployment(flinkApp, observeConfig);
    } else {
        if (!SavepointUtils.triggerSavepointIfNeeded(flinkService, flinkApp, observeConfig)) {
            LOG.info("Deployment is fully reconciled, nothing to do.");
        }
    }
}
Also used : ObjectMeta(io.fabric8.kubernetes.api.model.ObjectMeta) FlinkDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus) FlinkDeploymentSpec(org.apache.flink.kubernetes.operator.crd.spec.FlinkDeploymentSpec) Configuration(org.apache.flink.configuration.Configuration) JobState(org.apache.flink.kubernetes.operator.crd.spec.JobState) JobSpec(org.apache.flink.kubernetes.operator.crd.spec.JobSpec) UpgradeMode(org.apache.flink.kubernetes.operator.crd.spec.UpgradeMode)

Aggregations

FlinkDeploymentStatus (org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus)17 FlinkDeployment (org.apache.flink.kubernetes.operator.crd.FlinkDeployment)8 FlinkDeploymentSpec (org.apache.flink.kubernetes.operator.crd.spec.FlinkDeploymentSpec)7 Configuration (org.apache.flink.configuration.Configuration)6 Test (org.junit.jupiter.api.Test)5 JobStatus (org.apache.flink.kubernetes.operator.crd.status.JobStatus)4 TestingFlinkService (org.apache.flink.kubernetes.operator.TestingFlinkService)3 JobSpec (org.apache.flink.kubernetes.operator.crd.spec.JobSpec)3 DeploymentFailedException (org.apache.flink.kubernetes.operator.exception.DeploymentFailedException)3 ObjectMeta (io.fabric8.kubernetes.api.model.ObjectMeta)2 ObjectMetaBuilder (io.fabric8.kubernetes.api.model.ObjectMetaBuilder)2 Deployment (io.fabric8.kubernetes.api.model.apps.Deployment)2 DeploymentSpec (io.fabric8.kubernetes.api.model.apps.DeploymentSpec)2 DeploymentStatus (io.fabric8.kubernetes.api.model.apps.DeploymentStatus)2 ReconciliationStatus (org.apache.flink.kubernetes.operator.crd.status.ReconciliationStatus)2 DeploymentCondition (io.fabric8.kubernetes.api.model.apps.DeploymentCondition)1 FlinkOperatorConfiguration (org.apache.flink.kubernetes.operator.config.FlinkOperatorConfiguration)1 FlinkControllerConfig (org.apache.flink.kubernetes.operator.controller.FlinkControllerConfig)1 FlinkDeploymentController (org.apache.flink.kubernetes.operator.controller.FlinkDeploymentController)1 IngressSpec (org.apache.flink.kubernetes.operator.crd.spec.IngressSpec)1