Search in sources :

Example 11 with FlinkDeployment

use of org.apache.flink.kubernetes.operator.crd.FlinkDeployment in project flink-kubernetes-operator by apache.

the class FlinkValidator method validate.

@Override
public void validate(GenericKubernetesResource resource, Operation operation) throws NotAllowedException {
    LOG.debug("Validating resource {}", resource);
    FlinkDeployment flinkDeployment = objectMapper.convertValue(resource, FlinkDeployment.class);
    Optional<String> validationError = deploymentValidator.validate(flinkDeployment);
    if (validationError.isPresent()) {
        throw new NotAllowedException(validationError.get());
    }
}
Also used : FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) NotAllowedException(org.apache.flink.kubernetes.operator.admission.admissioncontroller.NotAllowedException)

Example 12 with FlinkDeployment

use of org.apache.flink.kubernetes.operator.crd.FlinkDeployment in project flink-kubernetes-operator by apache.

the class ApplicationObserverTest method observeSavepoint.

@Test
public void observeSavepoint() throws Exception {
    TestingFlinkService flinkService = new TestingFlinkService(kubernetesClient);
    ApplicationObserver observer = new ApplicationObserver(null, flinkService, configManager, new TestingStatusHelper<>());
    FlinkDeployment deployment = TestUtils.buildApplicationCluster();
    Configuration conf = configManager.getDeployConfig(deployment.getMetadata(), deployment.getSpec());
    flinkService.submitApplicationCluster(deployment.getSpec().getJob(), conf, false);
    bringToReadyStatus(deployment);
    assertTrue(ReconciliationUtils.isJobRunning(deployment.getStatus()));
    flinkService.triggerSavepoint(deployment.getStatus().getJobStatus().getJobId(), SavepointTriggerType.MANUAL, deployment.getStatus().getJobStatus().getSavepointInfo(), conf);
    // pending savepoint
    assertEquals("trigger_0", deployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
    observer.observe(deployment, readyContext);
    assertTrue(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
    deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerId("unknown");
    // savepoint error within grace period
    assertEquals(0, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().size());
    observer.observe(deployment, readyContext);
    assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
    assertEquals(0, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().size());
    deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerId("unknown");
    deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerType(SavepointTriggerType.MANUAL);
    deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerTimestamp(Instant.now().minus(Duration.ofHours(1)).toEpochMilli());
    observer.observe(deployment, readyContext);
    assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
    assertEquals(1, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().size());
    assertEquals(1, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().get(0).getCount());
    // savepoint success
    flinkService.triggerSavepoint(deployment.getStatus().getJobStatus().getJobId(), SavepointTriggerType.MANUAL, deployment.getStatus().getJobStatus().getSavepointInfo(), conf);
    assertEquals("trigger_1", deployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
    observer.observe(deployment, readyContext);
    observer.observe(deployment, readyContext);
    assertEquals("savepoint_0", deployment.getStatus().getJobStatus().getSavepointInfo().getLastSavepoint().getLocation());
    assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
    // second attempt success
    flinkService.triggerSavepoint(deployment.getStatus().getJobStatus().getJobId(), SavepointTriggerType.MANUAL, deployment.getStatus().getJobStatus().getSavepointInfo(), conf);
    assertEquals("trigger_2", deployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
    assertTrue(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
    observer.observe(deployment, readyContext);
    observer.observe(deployment, readyContext);
    assertEquals("savepoint_1", deployment.getStatus().getJobStatus().getSavepointInfo().getLastSavepoint().getLocation());
    assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
    // application failure after checkpoint trigger
    flinkService.triggerSavepoint(deployment.getStatus().getJobStatus().getJobId(), SavepointTriggerType.MANUAL, deployment.getStatus().getJobStatus().getSavepointInfo(), conf);
    assertEquals("trigger_3", deployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
    assertTrue(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
    flinkService.setPortReady(false);
    observer.observe(deployment, readyContext);
    assertEquals("savepoint_1", deployment.getStatus().getJobStatus().getSavepointInfo().getLastSavepoint().getLocation());
    assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
    assertEquals(1, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().size());
    assertEquals(2, kubernetesClient.v1().events().inNamespace(deployment.getMetadata().getNamespace()).list().getItems().get(0).getCount());
    flinkService.setPortReady(true);
    observer.observe(deployment, readyContext);
    // Simulate Failed job
    Tuple2<String, JobStatusMessage> jobTuple = flinkService.listJobs().get(0);
    jobTuple.f0 = "last-SP";
    jobTuple.f1 = new JobStatusMessage(jobTuple.f1.getJobId(), jobTuple.f1.getJobName(), org.apache.flink.api.common.JobStatus.FAILED, jobTuple.f1.getStartTime());
    deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerId("test");
    deployment.getStatus().getJobStatus().getSavepointInfo().setTriggerTimestamp(123L);
    observer.observe(deployment, readyContext);
    assertEquals(org.apache.flink.api.common.JobStatus.FAILED.name(), deployment.getStatus().getJobStatus().getState());
    assertEquals("last-SP", deployment.getStatus().getJobStatus().getSavepointInfo().getLastSavepoint().getLocation());
    assertFalse(SavepointUtils.savepointInProgress(deployment.getStatus().getJobStatus()));
}
Also used : FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) Configuration(org.apache.flink.configuration.Configuration) TestingFlinkService(org.apache.flink.kubernetes.operator.TestingFlinkService) JobStatusMessage(org.apache.flink.runtime.client.JobStatusMessage) Test(org.junit.jupiter.api.Test)

Example 13 with FlinkDeployment

use of org.apache.flink.kubernetes.operator.crd.FlinkDeployment in project flink-kubernetes-operator by apache.

the class ApplicationReconcilerTest method testJobUpgradeIgnorePendingSavepoint.

@Test
public void testJobUpgradeIgnorePendingSavepoint() throws Exception {
    TestingFlinkService flinkService = new TestingFlinkService();
    Context context = flinkService.getContext();
    ApplicationReconciler reconciler = new ApplicationReconciler(kubernetesClient, flinkService, configManager);
    FlinkDeployment deployment = TestUtils.buildApplicationCluster();
    reconciler.reconcile(deployment, context);
    List<Tuple2<String, JobStatusMessage>> runningJobs = flinkService.listJobs();
    verifyAndSetRunningJobsToStatus(deployment, runningJobs);
    FlinkDeployment spDeployment = ReconciliationUtils.clone(deployment);
    spDeployment.getSpec().getJob().setSavepointTriggerNonce(ThreadLocalRandom.current().nextLong());
    reconciler.reconcile(spDeployment, context);
    assertEquals("trigger_0", spDeployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
    assertEquals(JobState.RUNNING.name(), spDeployment.getStatus().getJobStatus().getState());
    // Force upgrade when savepoint is in progress.
    spDeployment.getSpec().getFlinkConfiguration().put(KubernetesOperatorConfigOptions.JOB_UPGRADE_IGNORE_PENDING_SAVEPOINT.key(), "true");
    reconciler.reconcile(spDeployment, context);
    assertEquals("trigger_0", spDeployment.getStatus().getJobStatus().getSavepointInfo().getTriggerId());
    assertEquals(org.apache.flink.api.common.JobStatus.FINISHED.name(), spDeployment.getStatus().getJobStatus().getState());
}
Also used : Context(io.javaoperatorsdk.operator.api.reconciler.Context) FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) TestingFlinkService(org.apache.flink.kubernetes.operator.TestingFlinkService) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 14 with FlinkDeployment

use of org.apache.flink.kubernetes.operator.crd.FlinkDeployment in project flink-kubernetes-operator by apache.

the class ApplicationReconcilerTest method testUpgradeModeChangedToLastStateShouldNotTriggerSavepointWhileHAEnabled.

@Test
public void testUpgradeModeChangedToLastStateShouldNotTriggerSavepointWhileHAEnabled() throws Exception {
    TestingFlinkService flinkService = new TestingFlinkService();
    Context context = flinkService.getContext();
    final ApplicationReconciler reconciler = new ApplicationReconciler(kubernetesClient, flinkService, configManager);
    final FlinkDeployment deployment = TestUtils.buildApplicationCluster();
    reconciler.reconcile(deployment, context);
    assertNotEquals(UpgradeMode.LAST_STATE, deployment.getStatus().getReconciliationStatus().deserializeLastReconciledSpec().getJob().getUpgradeMode());
    final String newImage = "new-image-1";
    deployment.getSpec().getJob().setUpgradeMode(UpgradeMode.LAST_STATE);
    deployment.getSpec().setImage(newImage);
    verifyAndSetRunningJobsToStatus(deployment, flinkService.listJobs());
    reconciler.reconcile(deployment, context);
    reconciler.reconcile(deployment, context);
    assertEquals(newImage, deployment.getStatus().getReconciliationStatus().deserializeLastReconciledSpec().getImage());
    // Upgrade mode changes from stateless to last-state while HA enabled previously should not
    // trigger a savepoint
    assertNull(flinkService.listJobs().get(0).f0);
}
Also used : Context(io.javaoperatorsdk.operator.api.reconciler.Context) FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) TestingFlinkService(org.apache.flink.kubernetes.operator.TestingFlinkService) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 15 with FlinkDeployment

use of org.apache.flink.kubernetes.operator.crd.FlinkDeployment in project flink-kubernetes-operator by apache.

the class ApplicationReconcilerTest method triggerRestart.

@Test
public void triggerRestart() throws Exception {
    TestingFlinkService flinkService = new TestingFlinkService();
    Context context = flinkService.getContext();
    ApplicationReconciler reconciler = new ApplicationReconciler(kubernetesClient, flinkService, configManager);
    FlinkDeployment deployment = TestUtils.buildApplicationCluster();
    reconciler.reconcile(deployment, context);
    List<Tuple2<String, JobStatusMessage>> runningJobs = flinkService.listJobs();
    verifyAndSetRunningJobsToStatus(deployment, runningJobs);
    // Test restart job
    FlinkDeployment restartJob = ReconciliationUtils.clone(deployment);
    restartJob.getSpec().setRestartNonce(1L);
    reconciler.reconcile(restartJob, context);
    assertEquals(JobState.SUSPENDED, restartJob.getStatus().getReconciliationStatus().deserializeLastReconciledSpec().getJob().getState());
    assertEquals(0, flinkService.getRunningCount());
    reconciler.reconcile(restartJob, context);
    assertEquals(JobState.RUNNING, restartJob.getStatus().getReconciliationStatus().deserializeLastReconciledSpec().getJob().getState());
    assertEquals(1, flinkService.getRunningCount());
    assertEquals(1L, restartJob.getStatus().getReconciliationStatus().deserializeLastReconciledSpec().getRestartNonce());
}
Also used : Context(io.javaoperatorsdk.operator.api.reconciler.Context) FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) TestingFlinkService(org.apache.flink.kubernetes.operator.TestingFlinkService) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

FlinkDeployment (org.apache.flink.kubernetes.operator.crd.FlinkDeployment)66 Test (org.junit.jupiter.api.Test)44 TestingFlinkService (org.apache.flink.kubernetes.operator.TestingFlinkService)19 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)19 Configuration (org.apache.flink.configuration.Configuration)16 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)13 Context (io.javaoperatorsdk.operator.api.reconciler.Context)11 JobStatus (org.apache.flink.kubernetes.operator.crd.status.JobStatus)9 FlinkDeploymentStatus (org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus)8 FlinkOperatorConfiguration (org.apache.flink.kubernetes.operator.config.FlinkOperatorConfiguration)6 FlinkDeploymentSpec (org.apache.flink.kubernetes.operator.crd.spec.FlinkDeploymentSpec)6 DeploymentFailedException (org.apache.flink.kubernetes.operator.exception.DeploymentFailedException)5 JobID (org.apache.flink.api.common.JobID)4 TestingClusterClient (org.apache.flink.kubernetes.operator.TestingClusterClient)4 EnumSource (org.junit.jupiter.params.provider.EnumSource)4 ObjectMetaBuilder (io.fabric8.kubernetes.api.model.ObjectMetaBuilder)3 Pod (io.fabric8.kubernetes.api.model.Pod)3 File (java.io.File)3 ArrayList (java.util.ArrayList)3 CompletableFuture (java.util.concurrent.CompletableFuture)3