Search in sources :

Example 1 with DeploymentFailedException

use of org.apache.flink.kubernetes.operator.exception.DeploymentFailedException in project flink-kubernetes-operator by apache.

the class FlinkServiceTest method testGetLastCheckpoint.

@Test
public void testGetLastCheckpoint() throws Exception {
    ObjectMapper objectMapper = RestMapperUtils.getStrictObjectMapper();
    var testingClusterClient = new TestingClusterClient<>(configuration, TestUtils.TEST_DEPLOYMENT_NAME);
    String responseWithHistory = "{\"counts\":{\"restored\":1,\"total\":79,\"in_progress\":0,\"completed\":69,\"failed\":10},\"summary\":{\"checkpointed_size\":{\"min\":23928,\"max\":53670,\"avg\":28551,\"p50\":28239,\"p90\":28563,\"p95\":28635,\"p99\":53670,\"p999\":53670},\"state_size\":{\"min\":23928,\"max\":53670,\"avg\":28551,\"p50\":28239,\"p90\":28563,\"p95\":28635,\"p99\":53670,\"p999\":53670},\"end_to_end_duration\":{\"min\":14,\"max\":117,\"avg\":24,\"p50\":22,\"p90\":32,\"p95\":40.5,\"p99\":117,\"p999\":117},\"alignment_buffered\":{\"min\":0,\"max\":0,\"avg\":0,\"p50\":0,\"p90\":0,\"p95\":0,\"p99\":0,\"p999\":0},\"processed_data\":{\"min\":0,\"max\":1274,\"avg\":280,\"p50\":112,\"p90\":840,\"p95\":1071,\"p99\":1274,\"p999\":1274},\"persisted_data\":{\"min\":0,\"max\":0,\"avg\":0,\"p50\":0,\"p90\":0,\"p95\":0,\"p99\":0,\"p999\":0}},\"latest\":{\"completed\":{\"className\":\"completed\",\"id\":96,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212837604,\"latest_ack_timestamp\":1653212837621,\"checkpointed_size\":28437,\"state_size\":28437,\"end_to_end_duration\":17,\"alignment_buffered\":0,\"processed_data\":560,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-96\",\"discarded\":false},\"savepoint\":{\"className\":\"completed\",\"id\":51,\"status\":\"COMPLETED\",\"is_savepoint\":true,\"trigger_timestamp\":1653212748176,\"latest_ack_timestamp\":1653212748233,\"checkpointed_size\":53670,\"state_size\":53670,\"end_to_end_duration\":57,\"alignment_buffered\":0,\"processed_data\":483,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"SAVEPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/savepoints/savepoint-000000-e8ea2482ce4f\",\"discarded\":false},\"failed\":null,\"restored\":{\"id\":27,\"restore_timestamp\":1653212683022,\"is_savepoint\":true,\"external_path\":\"file:/flink-data/savepoints/savepoint-000000-5930e5326ca7\"}},\"history\":[{\"className\":\"completed\",\"id\":96,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212837604,\"latest_ack_timestamp\":1653212837621,\"checkpointed_size\":28437,\"state_size\":28437,\"end_to_end_duration\":17,\"alignment_buffered\":0,\"processed_data\":560,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-96\",\"discarded\":false},{\"className\":\"completed\",\"id\":95,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212835603,\"latest_ack_timestamp\":1653212835622,\"checkpointed_size\":28473,\"state_size\":28473,\"end_to_end_duration\":19,\"alignment_buffered\":0,\"processed_data\":42,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-95\",\"discarded\":true},{\"className\":\"completed\",\"id\":94,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212833603,\"latest_ack_timestamp\":1653212833623,\"checkpointed_size\":27969,\"state_size\":27969,\"end_to_end_duration\":20,\"alignment_buffered\":0,\"processed_data\":28,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-94\",\"discarded\":true},{\"className\":\"completed\",\"id\":93,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212831603,\"latest_ack_timestamp\":1653212831621,\"checkpointed_size\":28113,\"state_size\":28113,\"end_to_end_duration\":18,\"alignment_buffered\":0,\"processed_data\":138,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-93\",\"discarded\":true},{\"className\":\"completed\",\"id\":92,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212829603,\"latest_ack_timestamp\":1653212829621,\"checkpointed_size\":28293,\"state_size\":28293,\"end_to_end_duration\":18,\"alignment_buffered\":0,\"processed_data\":196,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-92\",\"discarded\":true},{\"className\":\"completed\",\"id\":91,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212827603,\"latest_ack_timestamp\":1653212827629,\"checkpointed_size\":27969,\"state_size\":27969,\"end_to_end_duration\":26,\"alignment_buffered\":0,\"processed_data\":0,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-91\",\"discarded\":true},{\"className\":\"completed\",\"id\":90,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212825603,\"latest_ack_timestamp\":1653212825641,\"checkpointed_size\":27735,\"state_size\":27735,\"end_to_end_duration\":38,\"alignment_buffered\":0,\"processed_data\":0,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-90\",\"discarded\":true},{\"className\":\"completed\",\"id\":89,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212823603,\"latest_ack_timestamp\":1653212823618,\"checkpointed_size\":28545,\"state_size\":28545,\"end_to_end_duration\":15,\"alignment_buffered\":0,\"processed_data\":364,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-89\",\"discarded\":true},{\"className\":\"completed\",\"id\":88,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212821603,\"latest_ack_timestamp\":1653212821619,\"checkpointed_size\":28275,\"state_size\":28275,\"end_to_end_duration\":16,\"alignment_buffered\":0,\"processed_data\":0,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-88\",\"discarded\":true},{\"className\":\"completed\",\"id\":87,\"status\":\"COMPLETED\",\"is_savepoint\":false,\"trigger_timestamp\":1653212819604,\"latest_ack_timestamp\":1653212819622,\"checkpointed_size\":28518,\"state_size\":28518,\"end_to_end_duration\":18,\"alignment_buffered\":0,\"processed_data\":0,\"persisted_data\":0,\"num_subtasks\":4,\"num_acknowledged_subtasks\":4,\"checkpoint_type\":\"CHECKPOINT\",\"tasks\":{},\"external_path\":\"file:/flink-data/checkpoints/00000000000000000000000000000000/chk-87\",\"discarded\":true}]}";
    String responseWithoutHistory = "{\"counts\":{\"restored\":1,\"total\":79,\"in_progress\":0,\"completed\":69,\"failed\":10},\"summary\":{\"checkpointed_size\":{\"min\":23928,\"max\":53670,\"avg\":28551,\"p50\":28239,\"p90\":28563,\"p95\":28635,\"p99\":53670,\"p999\":53670},\"state_size\":{\"min\":23928,\"max\":53670,\"avg\":28551,\"p50\":28239,\"p90\":28563,\"p95\":28635,\"p99\":53670,\"p999\":53670},\"end_to_end_duration\":{\"min\":14,\"max\":117,\"avg\":24,\"p50\":22,\"p90\":32,\"p95\":40.5,\"p99\":117,\"p999\":117},\"alignment_buffered\":{\"min\":0,\"max\":0,\"avg\":0,\"p50\":0,\"p90\":0,\"p95\":0,\"p99\":0,\"p999\":0},\"processed_data\":{\"min\":0,\"max\":1274,\"avg\":280,\"p50\":112,\"p90\":840,\"p95\":1071,\"p99\":1274,\"p999\":1274},\"persisted_data\":{\"min\":0,\"max\":0,\"avg\":0,\"p50\":0,\"p90\":0,\"p95\":0,\"p99\":0,\"p999\":0}},\"latest\":{\"completed\":null,\"savepoint\":null,\"failed\":null,\"restored\":{\"id\":27,\"restore_timestamp\":1653212683022,\"is_savepoint\":true,\"external_path\":\"file:/flink-data/savepoints/savepoint-000000-5930e5326ca7\"}},\"history\":[]}";
    String responseWithoutHistoryInternal = "{\"counts\":{\"restored\":1,\"total\":79,\"in_progress\":0,\"completed\":69,\"failed\":10},\"summary\":{\"checkpointed_size\":{\"min\":23928,\"max\":53670,\"avg\":28551,\"p50\":28239,\"p90\":28563,\"p95\":28635,\"p99\":53670,\"p999\":53670},\"state_size\":{\"min\":23928,\"max\":53670,\"avg\":28551,\"p50\":28239,\"p90\":28563,\"p95\":28635,\"p99\":53670,\"p999\":53670},\"end_to_end_duration\":{\"min\":14,\"max\":117,\"avg\":24,\"p50\":22,\"p90\":32,\"p95\":40.5,\"p99\":117,\"p999\":117},\"alignment_buffered\":{\"min\":0,\"max\":0,\"avg\":0,\"p50\":0,\"p90\":0,\"p95\":0,\"p99\":0,\"p999\":0},\"processed_data\":{\"min\":0,\"max\":1274,\"avg\":280,\"p50\":112,\"p90\":840,\"p95\":1071,\"p99\":1274,\"p999\":1274},\"persisted_data\":{\"min\":0,\"max\":0,\"avg\":0,\"p50\":0,\"p90\":0,\"p95\":0,\"p99\":0,\"p999\":0}},\"latest\":{\"completed\":null,\"savepoint\":null,\"failed\":null,\"restored\":{\"id\":27,\"restore_timestamp\":1653212683022,\"is_savepoint\":true,\"external_path\":\"<checkpoint-not-externally-addressable>\"}},\"history\":[]}";
    var responseContainer = new ArrayList<CheckpointHistoryWrapper>();
    testingClusterClient.setRequestProcessor((headers, parameters, requestBody) -> {
        if (headers instanceof CustomCheckpointingStatisticsHeaders) {
            return CompletableFuture.completedFuture(responseContainer.get(0));
        }
        fail("unknown request");
        return null;
    });
    var flinkService = createFlinkService(testingClusterClient);
    responseContainer.add(objectMapper.readValue(responseWithHistory, CheckpointHistoryWrapper.class));
    var checkpointOpt = flinkService.getLastCheckpoint(new JobID(), new Configuration());
    assertEquals("file:/flink-data/checkpoints/00000000000000000000000000000000/chk-96", checkpointOpt.get().getLocation());
    responseContainer.set(0, objectMapper.readValue(responseWithoutHistory, CheckpointHistoryWrapper.class));
    checkpointOpt = flinkService.getLastCheckpoint(new JobID(), new Configuration());
    assertEquals("file:/flink-data/savepoints/savepoint-000000-5930e5326ca7", checkpointOpt.get().getLocation());
    responseContainer.set(0, objectMapper.readValue(responseWithoutHistoryInternal, CheckpointHistoryWrapper.class));
    try {
        flinkService.getLastCheckpoint(new JobID(), new Configuration());
        fail();
    } catch (DeploymentFailedException dpe) {
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TestingClusterClient(org.apache.flink.kubernetes.operator.TestingClusterClient) ArrayList(java.util.ArrayList) DeploymentFailedException(org.apache.flink.kubernetes.operator.exception.DeploymentFailedException) ObjectMapper(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper) JobID(org.apache.flink.api.common.JobID) Test(org.junit.jupiter.api.Test)

Example 2 with DeploymentFailedException

use of org.apache.flink.kubernetes.operator.exception.DeploymentFailedException in project flink-kubernetes-operator by apache.

the class AbstractDeploymentObserver method observeJmDeployment.

protected void observeJmDeployment(FlinkDeployment flinkApp, Context context, Configuration effectiveConfig) {
    FlinkDeploymentStatus deploymentStatus = flinkApp.getStatus();
    JobManagerDeploymentStatus previousJmStatus = deploymentStatus.getJobManagerDeploymentStatus();
    if (isSuspendedJob(flinkApp)) {
        logger.debug("Skipping observe step for suspended application deployments.");
        return;
    }
    flinkApp.getStatus().setClusterInfo(new HashMap<>());
    logger.info("Observing JobManager deployment. Previous status: {}", previousJmStatus.name());
    if (JobManagerDeploymentStatus.DEPLOYED_NOT_READY == previousJmStatus) {
        logger.info("JobManager deployment is ready");
        deploymentStatus.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.READY);
        return;
    }
    Optional<Deployment> deployment = context.getSecondaryResource(Deployment.class);
    if (deployment.isPresent()) {
        DeploymentStatus status = deployment.get().getStatus();
        DeploymentSpec spec = deployment.get().getSpec();
        if (status != null && status.getAvailableReplicas() != null && spec.getReplicas().intValue() == status.getReplicas() && spec.getReplicas().intValue() == status.getAvailableReplicas() && flinkService.isJobManagerPortReady(effectiveConfig)) {
            // typically it takes a few seconds for the REST server to be ready
            logger.info("JobManager deployment port is ready, waiting for the Flink REST API...");
            deploymentStatus.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.DEPLOYED_NOT_READY);
            return;
        }
        try {
            checkFailedCreate(status);
            // checking the pod is expensive; only do it when the deployment isn't ready
            checkCrashLoopBackoff(flinkApp, effectiveConfig);
        } catch (DeploymentFailedException dfe) {
            // throw only when not already in error status to allow for spec update
            deploymentStatus.getJobStatus().setState(JobStatus.RECONCILING.name());
            if (!JobManagerDeploymentStatus.ERROR.equals(deploymentStatus.getJobManagerDeploymentStatus())) {
                throw dfe;
            }
            return;
        }
        logger.info("JobManager is being deployed");
        deploymentStatus.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.DEPLOYING);
        return;
    }
    deploymentStatus.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.MISSING);
    deploymentStatus.getJobStatus().setState(JobStatus.RECONCILING.name());
    if (previousJmStatus != JobManagerDeploymentStatus.MISSING && previousJmStatus != JobManagerDeploymentStatus.ERROR) {
        onMissingDeployment(flinkApp);
    }
}
Also used : FlinkDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus) FlinkDeploymentSpec(org.apache.flink.kubernetes.operator.crd.spec.FlinkDeploymentSpec) DeploymentSpec(io.fabric8.kubernetes.api.model.apps.DeploymentSpec) FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) Deployment(io.fabric8.kubernetes.api.model.apps.Deployment) DeploymentFailedException(org.apache.flink.kubernetes.operator.exception.DeploymentFailedException) JobManagerDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.JobManagerDeploymentStatus) FlinkDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus) DeploymentStatus(io.fabric8.kubernetes.api.model.apps.DeploymentStatus) JobManagerDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.JobManagerDeploymentStatus)

Example 3 with DeploymentFailedException

use of org.apache.flink.kubernetes.operator.exception.DeploymentFailedException in project flink-kubernetes-operator by apache.

the class FlinkDeploymentController method reconcile.

@Override
public UpdateControl<FlinkDeployment> reconcile(FlinkDeployment flinkApp, Context context) throws Exception {
    LOG.info("Starting reconciliation");
    statusHelper.updateStatusFromCache(flinkApp);
    FlinkDeployment previousDeployment = ReconciliationUtils.clone(flinkApp);
    try {
        observerFactory.getOrCreate(flinkApp).observe(flinkApp, context);
        if (!validateDeployment(flinkApp)) {
            metricManager.onUpdate(flinkApp);
            statusHelper.patchAndCacheStatus(flinkApp);
            return ReconciliationUtils.toUpdateControl(configManager.getOperatorConfiguration(), flinkApp, previousDeployment, false);
        }
        reconcilerFactory.getOrCreate(flinkApp).reconcile(flinkApp, context);
    } catch (DeploymentFailedException dfe) {
        handleDeploymentFailed(flinkApp, dfe);
    } catch (Exception e) {
        throw new ReconciliationException(e);
    }
    LOG.info("End of reconciliation");
    metricManager.onUpdate(flinkApp);
    statusHelper.patchAndCacheStatus(flinkApp);
    return ReconciliationUtils.toUpdateControl(configManager.getOperatorConfiguration(), flinkApp, previousDeployment, true);
}
Also used : ReconciliationException(org.apache.flink.kubernetes.operator.exception.ReconciliationException) FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) DeploymentFailedException(org.apache.flink.kubernetes.operator.exception.DeploymentFailedException) ReconciliationException(org.apache.flink.kubernetes.operator.exception.ReconciliationException) DeploymentFailedException(org.apache.flink.kubernetes.operator.exception.DeploymentFailedException)

Example 4 with DeploymentFailedException

use of org.apache.flink.kubernetes.operator.exception.DeploymentFailedException in project flink-kubernetes-operator by apache.

the class ApplicationReconcilerTest method testUpgrade.

@ParameterizedTest
@EnumSource(FlinkVersion.class)
public void testUpgrade(FlinkVersion flinkVersion) throws Exception {
    TestingFlinkService flinkService = new TestingFlinkService();
    Context context = flinkService.getContext();
    ApplicationReconciler reconciler = new ApplicationReconciler(kubernetesClient, flinkService, configManager);
    FlinkDeployment deployment = TestUtils.buildApplicationCluster(flinkVersion);
    reconciler.reconcile(deployment, context);
    List<Tuple2<String, JobStatusMessage>> runningJobs = flinkService.listJobs();
    verifyAndSetRunningJobsToStatus(deployment, runningJobs);
    // Test stateless upgrade
    FlinkDeployment statelessUpgrade = ReconciliationUtils.clone(deployment);
    statelessUpgrade.getSpec().getJob().setUpgradeMode(UpgradeMode.STATELESS);
    statelessUpgrade.getSpec().getFlinkConfiguration().put("new", "conf");
    reconciler.reconcile(statelessUpgrade, context);
    runningJobs = flinkService.listJobs();
    assertEquals(0, flinkService.getRunningCount());
    reconciler.reconcile(statelessUpgrade, context);
    runningJobs = flinkService.listJobs();
    assertEquals(1, flinkService.getRunningCount());
    assertNull(runningJobs.get(0).f0);
    deployment.getStatus().getJobStatus().setJobId(runningJobs.get(0).f1.getJobId().toHexString());
    // Test stateful upgrade
    FlinkDeployment statefulUpgrade = ReconciliationUtils.clone(deployment);
    statefulUpgrade.getSpec().getJob().setUpgradeMode(UpgradeMode.SAVEPOINT);
    statefulUpgrade.getSpec().getFlinkConfiguration().put("new", "conf2");
    reconciler.reconcile(statefulUpgrade, context);
    runningJobs = flinkService.listJobs();
    assertEquals(0, flinkService.getRunningCount());
    reconciler.reconcile(statefulUpgrade, context);
    runningJobs = flinkService.listJobs();
    assertEquals(1, flinkService.getRunningCount());
    assertEquals("savepoint_0", runningJobs.get(0).f0);
    assertEquals(SavepointTriggerType.UPGRADE, statefulUpgrade.getStatus().getJobStatus().getSavepointInfo().getLastSavepoint().getTriggerType());
    deployment.getSpec().getJob().setUpgradeMode(UpgradeMode.LAST_STATE);
    deployment.getSpec().setRestartNonce(100L);
    flinkService.setHaDataAvailable(false);
    deployment.getStatus().getJobStatus().setState("RECONCILING");
    try {
        deployment.getStatus().setJobManagerDeploymentStatus(JobManagerDeploymentStatus.MISSING);
        reconciler.reconcile(deployment, context);
        fail();
    } catch (DeploymentFailedException expected) {
    }
    try {
        deployment.getStatus().setJobManagerDeploymentStatus(JobManagerDeploymentStatus.ERROR);
        reconciler.reconcile(deployment, context);
        fail();
    } catch (DeploymentFailedException expected) {
    }
    flinkService.clear();
    deployment.getSpec().getJob().setUpgradeMode(UpgradeMode.LAST_STATE);
    deployment.getSpec().setRestartNonce(200L);
    flinkService.setHaDataAvailable(false);
    deployment.getStatus().getJobStatus().getSavepointInfo().setLastSavepoint(Savepoint.of("finished_sp", SavepointTriggerType.UPGRADE));
    deployment.getStatus().getJobStatus().setState("FINISHED");
    deployment.getStatus().setJobManagerDeploymentStatus(JobManagerDeploymentStatus.READY);
    reconciler.reconcile(deployment, context);
    reconciler.reconcile(deployment, context);
    assertEquals(1, flinkService.getRunningCount());
    assertEquals("finished_sp", runningJobs.get(0).f0);
}
Also used : Context(io.javaoperatorsdk.operator.api.reconciler.Context) FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) TestingFlinkService(org.apache.flink.kubernetes.operator.TestingFlinkService) Tuple2(org.apache.flink.api.java.tuple.Tuple2) DeploymentFailedException(org.apache.flink.kubernetes.operator.exception.DeploymentFailedException) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 5 with DeploymentFailedException

use of org.apache.flink.kubernetes.operator.exception.DeploymentFailedException in project flink-kubernetes-operator by apache.

the class Observer method observeJmDeployment.

private void observeJmDeployment(FlinkDeployment flinkApp, Context context, Configuration effectiveConfig) {
    FlinkDeploymentStatus deploymentStatus = flinkApp.getStatus();
    JobManagerDeploymentStatus previousJmStatus = deploymentStatus.getJobManagerDeploymentStatus();
    if (JobManagerDeploymentStatus.READY == previousJmStatus) {
        return;
    }
    if (JobManagerDeploymentStatus.DEPLOYED_NOT_READY == previousJmStatus) {
        deploymentStatus.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.READY);
        return;
    }
    Optional<Deployment> deployment = context.getSecondaryResource(Deployment.class);
    if (deployment.isPresent()) {
        DeploymentStatus status = deployment.get().getStatus();
        DeploymentSpec spec = deployment.get().getSpec();
        if (status != null && status.getAvailableReplicas() != null && spec.getReplicas().intValue() == status.getReplicas() && spec.getReplicas().intValue() == status.getAvailableReplicas() && flinkService.isJobManagerPortReady(effectiveConfig)) {
            // typically it takes a few seconds for the REST server to be ready
            LOG.info("JobManager deployment {} in namespace {} port ready, waiting for the REST API...", flinkApp.getMetadata().getName(), flinkApp.getMetadata().getNamespace());
            deploymentStatus.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.DEPLOYED_NOT_READY);
            return;
        }
        LOG.info("JobManager deployment {} in namespace {} exists but not ready yet, status {}", flinkApp.getMetadata().getName(), flinkApp.getMetadata().getNamespace(), status);
        List<DeploymentCondition> conditions = status.getConditions();
        for (DeploymentCondition dc : conditions) {
            if ("FailedCreate".equals(dc.getReason()) && "ReplicaFailure".equals(dc.getType())) {
                // throw only when not already in error status to allow for spec update
                if (!JobManagerDeploymentStatus.ERROR.equals(deploymentStatus.getJobManagerDeploymentStatus())) {
                    throw new DeploymentFailedException(DeploymentFailedException.COMPONENT_JOBMANAGER, dc);
                }
                return;
            }
        }
        deploymentStatus.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.DEPLOYING);
        return;
    }
    deploymentStatus.setJobManagerDeploymentStatus(JobManagerDeploymentStatus.MISSING);
}
Also used : FlinkDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus) DeploymentSpec(io.fabric8.kubernetes.api.model.apps.DeploymentSpec) FlinkDeployment(org.apache.flink.kubernetes.operator.crd.FlinkDeployment) Deployment(io.fabric8.kubernetes.api.model.apps.Deployment) DeploymentFailedException(org.apache.flink.kubernetes.operator.exception.DeploymentFailedException) FlinkDeploymentStatus(org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus) DeploymentStatus(io.fabric8.kubernetes.api.model.apps.DeploymentStatus) DeploymentCondition(io.fabric8.kubernetes.api.model.apps.DeploymentCondition)

Aggregations

DeploymentFailedException (org.apache.flink.kubernetes.operator.exception.DeploymentFailedException)6 FlinkDeployment (org.apache.flink.kubernetes.operator.crd.FlinkDeployment)4 Deployment (io.fabric8.kubernetes.api.model.apps.Deployment)2 DeploymentSpec (io.fabric8.kubernetes.api.model.apps.DeploymentSpec)2 DeploymentStatus (io.fabric8.kubernetes.api.model.apps.DeploymentStatus)2 Configuration (org.apache.flink.configuration.Configuration)2 FlinkDeploymentStatus (org.apache.flink.kubernetes.operator.crd.status.FlinkDeploymentStatus)2 DeploymentCondition (io.fabric8.kubernetes.api.model.apps.DeploymentCondition)1 Context (io.javaoperatorsdk.operator.api.reconciler.Context)1 ControllerConfiguration (io.javaoperatorsdk.operator.api.reconciler.ControllerConfiguration)1 ArrayList (java.util.ArrayList)1 JobID (org.apache.flink.api.common.JobID)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 TestingClusterClient (org.apache.flink.kubernetes.operator.TestingClusterClient)1 TestingFlinkService (org.apache.flink.kubernetes.operator.TestingFlinkService)1 FlinkOperatorConfiguration (org.apache.flink.kubernetes.operator.config.FlinkOperatorConfiguration)1 FlinkDeploymentSpec (org.apache.flink.kubernetes.operator.crd.spec.FlinkDeploymentSpec)1 JobManagerDeploymentStatus (org.apache.flink.kubernetes.operator.crd.status.JobManagerDeploymentStatus)1 ReconciliationException (org.apache.flink.kubernetes.operator.exception.ReconciliationException)1 ObjectMapper (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper)1