use of org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus in project flink-kubernetes-operator by apache.
the class ObserverTest method observeApplicationCluster.
@Test
public void observeApplicationCluster() {
TestingFlinkService flinkService = new TestingFlinkService();
Observer observer = new Observer(flinkService, FlinkOperatorConfiguration.fromConfiguration(new Configuration()));
FlinkDeployment deployment = TestUtils.buildApplicationCluster();
Configuration conf = FlinkUtils.getEffectiveConfig(deployment, new Configuration());
observer.observe(deployment, TestUtils.createEmptyContext(), conf);
deployment.setStatus(new FlinkDeploymentStatus());
deployment.getStatus().getReconciliationStatus().setLastReconciledSpec(deployment.getSpec());
deployment.getStatus().setJobStatus(new JobStatus());
flinkService.submitApplicationCluster(deployment, conf);
// Validate port check logic
flinkService.setPortReady(false);
// Port not ready
observer.observe(deployment, readyContext, conf);
assertEquals(JobManagerDeploymentStatus.DEPLOYING, deployment.getStatus().getJobManagerDeploymentStatus());
observer.observe(deployment, readyContext, conf);
assertEquals(JobManagerDeploymentStatus.DEPLOYING, deployment.getStatus().getJobManagerDeploymentStatus());
flinkService.setPortReady(true);
// Port ready but we have to recheck once again
observer.observe(deployment, readyContext, conf);
assertEquals(JobManagerDeploymentStatus.DEPLOYED_NOT_READY, deployment.getStatus().getJobManagerDeploymentStatus());
// Stable ready
observer.observe(deployment, readyContext, conf);
assertEquals(JobManagerDeploymentStatus.READY, deployment.getStatus().getJobManagerDeploymentStatus());
assertEquals(JobState.RUNNING.name(), deployment.getStatus().getJobStatus().getState());
observer.observe(deployment, readyContext, conf);
assertEquals(JobManagerDeploymentStatus.READY, deployment.getStatus().getJobManagerDeploymentStatus());
assertEquals(JobState.RUNNING.name(), deployment.getStatus().getJobStatus().getState());
assertEquals(deployment.getMetadata().getName(), deployment.getStatus().getJobStatus().getJobName());
// Test listing failure
flinkService.clear();
observer.observe(deployment, readyContext, conf);
assertEquals(JobManagerDeploymentStatus.READY, deployment.getStatus().getJobManagerDeploymentStatus());
assertEquals(Observer.JOB_STATE_UNKNOWN, deployment.getStatus().getJobStatus().getState());
}
use of org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus in project flink-kubernetes-operator by apache.
the class DeploymentValidatorTest method testValidation.
@Test
public void testValidation() {
testSuccess(dep -> {
});
// Test job validation
testError(dep -> dep.getSpec().getJob().setJarURI(null), "Jar URI must be defined");
testError(dep -> dep.getSpec().getJob().setState(JobState.SUSPENDED), "Job must start in running state");
testError(dep -> dep.getSpec().getJob().setParallelism(0), "Job parallelism must be larger than 0");
testError(dep -> dep.getSpec().getJob().setParallelism(-1), "Job parallelism must be larger than 0");
testError(dep -> {
dep.getSpec().setFlinkConfiguration(new HashMap<>());
dep.getSpec().getJob().setUpgradeMode(UpgradeMode.LAST_STATE);
}, "Job could not be upgraded with last-state while HA disabled");
// Test conf validation
testSuccess(dep -> dep.getSpec().setFlinkConfiguration(Collections.singletonMap("random", "config")));
testError(dep -> dep.getSpec().setFlinkConfiguration(Collections.singletonMap(KubernetesConfigOptions.NAMESPACE.key(), "myns")), "Forbidden Flink config key");
// Test log config validation
testSuccess(dep -> dep.getSpec().setLogConfiguration(Map.of(Constants.CONFIG_FILE_LOG4J_NAME, "rootLogger.level = INFO")));
testError(dep -> dep.getSpec().setLogConfiguration(Map.of("random", "config")), "Invalid log config key");
testError(dep -> {
dep.getSpec().setFlinkConfiguration(new HashMap<>());
dep.getSpec().getJobManager().setReplicas(2);
}, "High availability should be enabled when starting standby JobManagers.");
testError(dep -> dep.getSpec().getJobManager().setReplicas(0), "JobManager replicas should not be configured less than one.");
// Test resource validation
testSuccess(dep -> dep.getSpec().getTaskManager().getResource().setMemory("1G"));
testSuccess(dep -> dep.getSpec().getTaskManager().getResource().setMemory("100"));
testError(dep -> dep.getSpec().getTaskManager().getResource().setMemory("invalid"), "TaskManager resource memory parse error");
testError(dep -> dep.getSpec().getJobManager().getResource().setMemory("invalid"), "JobManager resource memory parse error");
testError(dep -> dep.getSpec().getTaskManager().getResource().setMemory(null), "TaskManager resource memory must be defined");
testError(dep -> dep.getSpec().getJobManager().getResource().setMemory(null), "JobManager resource memory must be defined");
// Test savepoint restore validation
testSuccess(dep -> {
dep.setStatus(new FlinkDeploymentStatus());
dep.getStatus().setJobStatus(new JobStatus());
dep.getStatus().getJobStatus().getSavepointInfo().setLastSavepoint(Savepoint.of("sp"));
dep.getStatus().setReconciliationStatus(new ReconciliationStatus());
dep.getStatus().getReconciliationStatus().setLastReconciledSpec(ReconciliationUtils.clone(dep.getSpec()));
dep.getStatus().getReconciliationStatus().getLastReconciledSpec().getJob().setState(JobState.SUSPENDED);
dep.getSpec().getJob().setUpgradeMode(UpgradeMode.SAVEPOINT);
});
testError(dep -> {
dep.setStatus(new FlinkDeploymentStatus());
dep.getStatus().setJobStatus(new JobStatus());
dep.getStatus().setReconciliationStatus(new ReconciliationStatus());
dep.getStatus().getReconciliationStatus().setLastReconciledSpec(ReconciliationUtils.clone(dep.getSpec()));
dep.getStatus().getReconciliationStatus().getLastReconciledSpec().getJob().setState(JobState.SUSPENDED);
dep.getSpec().getJob().setUpgradeMode(UpgradeMode.SAVEPOINT);
}, "Cannot perform savepoint restore without a valid savepoint");
// Test cluster type validation
testError(dep -> {
dep.setStatus(new FlinkDeploymentStatus());
dep.getStatus().setJobStatus(new JobStatus());
dep.getStatus().setReconciliationStatus(new ReconciliationStatus());
dep.getStatus().getReconciliationStatus().setLastReconciledSpec(ReconciliationUtils.clone(dep.getSpec()));
dep.getSpec().setJob(null);
}, "Cannot switch from job to session cluster");
testError(dep -> {
dep.setStatus(new FlinkDeploymentStatus());
dep.getStatus().setJobStatus(new JobStatus());
dep.getStatus().setReconciliationStatus(new ReconciliationStatus());
dep.getStatus().getReconciliationStatus().setLastReconciledSpec(ReconciliationUtils.clone(dep.getSpec()));
dep.getStatus().getReconciliationStatus().getLastReconciledSpec().setJob(null);
}, "Cannot switch from session to job cluster");
}
use of org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus in project flink-kubernetes-operator by apache.
the class TestUtils method buildSessionCluster.
public static FlinkDeployment buildSessionCluster() {
FlinkDeployment deployment = new FlinkDeployment();
deployment.setStatus(new FlinkDeploymentStatus());
deployment.setMetadata(new ObjectMetaBuilder().withName("test-cluster").withNamespace(TEST_NAMESPACE).build());
deployment.setSpec(getTestFlinkDeploymentSpec());
return deployment;
}
use of org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus in project flink-kubernetes-operator by apache.
the class ApplicationReconcilerTest method testRandomJobResultStorePath.
@Test
public void testRandomJobResultStorePath() throws Exception {
TestingFlinkService flinkService = new TestingFlinkService();
ApplicationReconciler reconciler = new ApplicationReconciler(kubernetesClient, flinkService, configManager);
FlinkDeployment flinkApp = TestUtils.buildApplicationCluster();
final String haStoragePath = "file:///flink-data/ha";
flinkApp.getSpec().getFlinkConfiguration().put(HighAvailabilityOptions.HA_STORAGE_PATH.key(), haStoragePath);
ObjectMeta deployMeta = flinkApp.getMetadata();
FlinkDeploymentStatus status = flinkApp.getStatus();
FlinkDeploymentSpec spec = flinkApp.getSpec();
JobSpec jobSpec = spec.getJob();
Configuration deployConfig = configManager.getDeployConfig(deployMeta, spec);
status.getJobStatus().setState(org.apache.flink.api.common.JobStatus.FINISHED.name());
status.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.READY);
reconciler.deployFlinkJob(deployMeta, jobSpec, status, deployConfig, Optional.empty(), false);
String path1 = deployConfig.get(JobResultStoreOptions.STORAGE_PATH);
Assertions.assertTrue(path1.startsWith(haStoragePath));
status.getJobStatus().setState(org.apache.flink.api.common.JobStatus.FINISHED.name());
status.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.READY);
reconciler.deployFlinkJob(deployMeta, jobSpec, status, deployConfig, Optional.empty(), false);
String path2 = deployConfig.get(JobResultStoreOptions.STORAGE_PATH);
Assertions.assertTrue(path2.startsWith(haStoragePath));
assertNotEquals(path1, path2);
}
use of org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus in project flink-kubernetes-operator by apache.
the class ApplicationReconciler method reconcile.
@Override
public void reconcile(FlinkDeployment flinkApp, Context context) throws Exception {
ObjectMeta deployMeta = flinkApp.getMetadata();
FlinkDeploymentStatus status = flinkApp.getStatus();
ReconciliationStatus<FlinkDeploymentSpec> reconciliationStatus = status.getReconciliationStatus();
FlinkDeploymentSpec lastReconciledSpec = reconciliationStatus.deserializeLastReconciledSpec();
FlinkDeploymentSpec currentDeploySpec = flinkApp.getSpec();
JobSpec desiredJobSpec = currentDeploySpec.getJob();
Configuration deployConfig = configManager.getDeployConfig(deployMeta, currentDeploySpec);
if (lastReconciledSpec == null) {
LOG.debug("Deploying application for the first time");
deployFlinkJob(deployMeta, desiredJobSpec, status, deployConfig, Optional.ofNullable(desiredJobSpec.getInitialSavepointPath()), false);
IngressUtils.updateIngressRules(deployMeta, currentDeploySpec, deployConfig, kubernetesClient);
ReconciliationUtils.updateForSpecReconciliationSuccess(flinkApp, JobState.RUNNING);
return;
}
if (!deployConfig.getBoolean(KubernetesOperatorConfigOptions.JOB_UPGRADE_IGNORE_PENDING_SAVEPOINT) && SavepointUtils.savepointInProgress(status.getJobStatus())) {
LOG.info("Delaying job reconciliation until pending savepoint is completed.");
return;
}
Configuration observeConfig = configManager.getObserveConfig(flinkApp);
boolean specChanged = !currentDeploySpec.equals(lastReconciledSpec);
if (specChanged) {
if (newSpecIsAlreadyDeployed(flinkApp)) {
return;
}
LOG.debug("Detected spec change, starting upgrade process.");
JobState currentJobState = lastReconciledSpec.getJob().getState();
JobState desiredJobState = desiredJobSpec.getState();
JobState stateAfterReconcile = currentJobState;
if (currentJobState == JobState.RUNNING) {
if (desiredJobState == JobState.RUNNING) {
LOG.info("Upgrading/Restarting running job, suspending first...");
}
Optional<UpgradeMode> availableUpgradeMode = getAvailableUpgradeMode(flinkApp, deployConfig);
if (availableUpgradeMode.isEmpty()) {
return;
}
// We must record the upgrade mode used to the status later
desiredJobSpec.setUpgradeMode(availableUpgradeMode.get());
flinkService.cancelJob(flinkApp, availableUpgradeMode.get());
stateAfterReconcile = JobState.SUSPENDED;
}
if (currentJobState == JobState.SUSPENDED && desiredJobState == JobState.RUNNING) {
restoreJob(deployMeta, desiredJobSpec, status, deployConfig, // We decide to enforce HA based on how job was previously suspended
lastReconciledSpec.getJob().getUpgradeMode() == UpgradeMode.LAST_STATE);
stateAfterReconcile = JobState.RUNNING;
}
ReconciliationUtils.updateForSpecReconciliationSuccess(flinkApp, stateAfterReconcile);
IngressUtils.updateIngressRules(deployMeta, currentDeploySpec, deployConfig, kubernetesClient);
} else if (ReconciliationUtils.shouldRollBack(flinkService, reconciliationStatus, observeConfig)) {
rollbackApplication(flinkApp);
} else if (ReconciliationUtils.shouldRecoverDeployment(observeConfig, flinkApp)) {
recoverJmDeployment(flinkApp, observeConfig);
} else {
if (!SavepointUtils.triggerSavepointIfNeeded(flinkService, flinkApp, observeConfig)) {
LOG.info("Deployment is fully reconciled, nothing to do.");
}
}
}
Aggregations