use of org.apache.flink.kubernetes.operator.crd.FlinkDeployment in project flink-kubernetes-operator by apache.
the class FlinkValidator method validate.
@Override
public void validate(GenericKubernetesResource resource, Operation operation) throws NotAllowedException {
LOG.debug("Validating resource {}", resource);
FlinkDeployment flinkDeployment = objectMapper.convertValue(resource, FlinkDeployment.class);
Optional<String> validationError = deploymentValidator.validate(flinkDeployment);
if (validationError.isPresent()) {
throw new NotAllowedException(validationError.get());
}
}
use of org.apache.flink.kubernetes.operator.crd.FlinkDeployment in project flink-kubernetes-operator by apache.
the class ApplicationObserverTest method observeSavepoint.
@Test
public void observeSavepoint() throws Exception {
TestingFlinkService flinkService = new TestingFlinkService(kubernetesClient);
ApplicationObserver observer = new ApplicationObserver(null, flinkService, configManager, new TestingStatusHelper<>());
FlinkDeployment deployment = TestUtils.buildApplicationCluster();
Configuration conf = configManager.getDeployConfig(deployment.getMetadata(), deployment.getSpec());
flinkService.submitApplicationCluster(deployment.getSpec().getJob(), conf, false);
bringToReadyStatus(deployment);
assertTrue(ReconciliationUtils.isJobRunning(deployment.getStatus()));
flinkService.triggerSavepoint(deployment.getStatus().getJobStatus().getJobId(), SavepointTriggerType.MANUAL, deployment.getStatus().getJobStatus().getSavepointInfo(), conf);
// pending savepoint
assertEquals("trigger_0", deployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
observer.observe(deployment, readyContext);
assertTrue(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerId("unknown");
// savepoint error within grace period
assertEquals(0, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().size());
observer.observe(deployment, readyContext);
assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
assertEquals(0, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().size());
deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerId("unknown");
deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerType(SavepointTriggerType.MANUAL);
deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerTimestamp(Instant.now().minus(Duration.ofHours(1)).toEpochMilli());
observer.observe(deployment, readyContext);
assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
assertEquals(1, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().size());
assertEquals(1, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().get(0).getCount());
// savepoint success
flinkService.triggerSavepoint(deployment.getStatus().getJobStatus().getJobId(), SavepointTriggerType.MANUAL, deployment.getStatus().getJobStatus().getSavepointInfo(), conf);
assertEquals("trigger_1", deployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
observer.observe(deployment, readyContext);
observer.observe(deployment, readyContext);
assertEquals("savepoint_0", deployment.getStatus().getJobStatus().getSavepointInfo().getLastSavepoint().getLocation());
assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
// second attempt success
flinkService.triggerSavepoint(deployment.getStatus().getJobStatus().getJobId(), SavepointTriggerType.MANUAL, deployment.getStatus().getJobStatus().getSavepointInfo(), conf);
assertEquals("trigger_2", deployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
assertTrue(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
observer.observe(deployment, readyContext);
observer.observe(deployment, readyContext);
assertEquals("savepoint_1", deployment.getStatus().getJobStatus().getSavepointInfo().getLastSavepoint().getLocation());
assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
// application failure after checkpoint trigger
flinkService.triggerSavepoint(deployment.getStatus().getJobStatus().getJobId(), SavepointTriggerType.MANUAL, deployment.getStatus().getJobStatus().getSavepointInfo(), conf);
assertEquals("trigger_3", deployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
assertTrue(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
flinkService.setPortReady(false);
observer.observe(deployment, readyContext);
assertEquals("savepoint_1", deployment.getStatus().getJobStatus().getSavepointInfo().getLastSavepoint().getLocation());
assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
assertEquals(1, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().size());
assertEquals(2, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().get(0).getCount());
flinkService.setPortReady(true);
observer.observe(deployment, readyContext);
// Simulate Failed job
Tuple2<String, JobStatusMessage> jobTuple = flinkService.listJobs().get(0);
jobTuple.f0 = "last-SP";
jobTuple.f1 = new JobStatusMessage(jobTuple.f1.getJobId(), jobTuple.f1.getJobName(), org.apache.flink.api.common.JobStatus.FAILED, jobTuple.f1.getStartTime());
deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerId("test");
deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerTimestamp(123L);
observer.observe(deployment, readyContext);
assertEquals(org.apache.flink.api.common.JobStatus.FAILED.name(), deployment.getStatus().getJobStatus().getState());
assertEquals("last-SP", deployment.getStatus().getJobStatus().getSavepointInfo().getLastSavepoint().getLocation());
assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
}
use of org.apache.flink.kubernetes.operator.crd.FlinkDeployment in project flink-kubernetes-operator by apache.
the class ApplicationReconcilerTest method testJobUpgradeIgnorePendingSavepoint.
@Test
public void testJobUpgradeIgnorePendingSavepoint() throws Exception {
TestingFlinkService flinkService = new TestingFlinkService();
Context context = flinkService.getContext();
ApplicationReconciler reconciler = new ApplicationReconciler(kubernetesClient, flinkService, configManager);
FlinkDeployment deployment = TestUtils.buildApplicationCluster();
reconciler.reconcile(deployment, context);
List<Tuple2<String, JobStatusMessage>> runningJobs = flinkService.listJobs();
verifyAndSetRunningJobsToStatus(deployment, runningJobs);
FlinkDeployment spDeployment = ReconciliationUtils.clone(deployment);
spDeployment.getSpec().getJob().setSavepointTriggerNonce(ThreadLocalRandom.current().nextLong());
reconciler.reconcile(spDeployment, context);
assertEquals("trigger_0", spDeployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
assertEquals(JobState.RUNNING.name(), spDeployment.getStatus().getJobStatus().getState());
// Force upgrade when savepoint is in progress.
spDeployment.getSpec().getFlinkConfiguration().put(KubernetesOperatorConfigOptions.JOB_UPGRADE_IGNORE_PENDING_SAVEPOINT.key(), "true");
reconciler.reconcile(spDeployment, context);
assertEquals("trigger_0", spDeployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
assertEquals(org.apache.flink.api.common.JobStatus.FINISHED.name(), spDeployment.getStatus().getJobStatus().getState());
}
use of org.apache.flink.kubernetes.operator.crd.FlinkDeployment in project flink-kubernetes-operator by apache.
the class ApplicationReconcilerTest method testUpgradeModeChangedToLastStateShouldNotTriggerSavepointWhileHAEnabled.
@Test
public void testUpgradeModeChangedToLastStateShouldNotTriggerSavepointWhileHAEnabled() throws Exception {
TestingFlinkService flinkService = new TestingFlinkService();
Context context = flinkService.getContext();
final ApplicationReconciler reconciler = new ApplicationReconciler(kubernetesClient, flinkService, configManager);
final FlinkDeployment deployment = TestUtils.buildApplicationCluster();
reconciler.reconcile(deployment, context);
assertNotEquals(UpgradeMode.LAST_STATE, deployment.getStatus().getReconciliationStatus().deserializeLastReconciledSpec().getJob().getUpgradeMode());
final String newImage = "new-image-1";
deployment.getSpec().getJob().setUpgradeMode(UpgradeMode.LAST_STATE);
deployment.getSpec().setImage(newImage);
verifyAndSetRunningJobsToStatus(deployment, flinkService.listJobs());
reconciler.reconcile(deployment, context);
reconciler.reconcile(deployment, context);
assertEquals(newImage, deployment.getStatus().getReconciliationStatus().deserializeLastReconciledSpec().getImage());
// Upgrade mode changes from stateless to last-state while HA enabled previously should not
// trigger a savepoint
assertNull(flinkService.listJobs().get(0).f0);
}
use of org.apache.flink.kubernetes.operator.crd.FlinkDeployment in project flink-kubernetes-operator by apache.
the class ApplicationReconcilerTest method triggerRestart.
@Test
public void triggerRestart() throws Exception {
TestingFlinkService flinkService = new TestingFlinkService();
Context context = flinkService.getContext();
ApplicationReconciler reconciler = new ApplicationReconciler(kubernetesClient, flinkService, configManager);
FlinkDeployment deployment = TestUtils.buildApplicationCluster();
reconciler.reconcile(deployment, context);
List<Tuple2<String, JobStatusMessage>> runningJobs = flinkService.listJobs();
verifyAndSetRunningJobsToStatus(deployment, runningJobs);
// Test restart job
FlinkDeployment restartJob = ReconciliationUtils.clone(deployment);
restartJob.getSpec().setRestartNonce(1L);
reconciler.reconcile(restartJob, context);
assertEquals(JobState.SUSPENDED, restartJob.getStatus().getReconciliationStatus().deserializeLastReconciledSpec().getJob().getState());
assertEquals(0, flinkService.getRunningCount());
reconciler.reconcile(restartJob, context);
assertEquals(JobState.RUNNING, restartJob.getStatus().getReconciliationStatus().deserializeLastReconciledSpec().getJob().getState());
assertEquals(1, flinkService.getRunningCount());
assertEquals(1L, restartJob.getStatus().getReconciliationStatus().deserializeLastReconciledSpec().getRestartNonce());
}
Aggregations