Search in sources :

Example 1 with ComponentState

use of com.baidu.hugegraph.computer.k8s.crd.model.ComponentState in project hugegraph-computer by hugegraph.

the class ComputerJobController method derivedCRStatus.

private ComputerJobStatus derivedCRStatus(ComputerJobComponent observed) {
    HugeGraphComputerJob computerJob = observed.computerJob();
    ComputerJobSpec spec = computerJob.getSpec();
    MutableInt failedComponents = new MutableInt(0);
    MutableInt succeededComponents = new MutableInt(0);
    MutableInt runningComponents = new MutableInt(0);
    ComputerJobStatus status = Serialization.clone(computerJob.getStatus());
    // ConfigMap
    ConfigMap configMap = observed.configMap();
    if (configMap != null) {
        ComponentState configMapState = new ComponentStateBuilder().withName(configMap.getMetadata().getName()).withState(CommonComponentState.READY.value()).build();
        status.getComponentStates().setConfigMap(configMapState);
    } else if (status.getComponentStates().getConfigMap() != null) {
        status.getComponentStates().getConfigMap().setState(CommonComponentState.DELETED.value());
    }
    // MasterJob
    Job masterJob = observed.masterJob();
    ComponentState masterJobState = this.deriveJobStatus(masterJob, observed.masterPods(), status.getComponentStates().getMasterJob(), Constants.MASTER_INSTANCES, failedComponents, succeededComponents, runningComponents);
    status.getComponentStates().setMasterJob(masterJobState);
    // WorkerJob
    Job workerJob = observed.workerJob();
    ComponentState workerJobState = this.deriveJobStatus(workerJob, observed.workerPods(), status.getComponentStates().getWorkerJob(), spec.getWorkerInstances(), failedComponents, succeededComponents, runningComponents);
    status.getComponentStates().setWorkerJob(workerJobState);
    if (failedComponents.intValue() > ALLOW_FAILED_COMPONENTS) {
        status.setJobStatus(JobStatus.FAILED.name());
        this.recordFailedEvent(computerJob, masterJobState, workerJobState);
        return status;
    } else if (succeededComponents.intValue() == TOTAL_COMPONENTS) {
        status.setJobStatus(JobStatus.SUCCEEDED.name());
        String crName = computerJob.getMetadata().getName();
        long cost = this.calculateJobCost(computerJob);
        this.recordEvent(computerJob, EventType.NORMAL, KubeUtil.succeedEventName(crName), "ComputerJobSucceed", String.format("Job %s run successfully, took %ss", crName, cost));
        return status;
    }
    int activeComponents = runningComponents.intValue() + succeededComponents.intValue();
    if (activeComponents == TOTAL_COMPONENTS) {
        status.setJobStatus(JobStatus.RUNNING.name());
    } else {
        status.setJobStatus(JobStatus.INITIALIZING.name());
    }
    return status;
}
Also used : ComponentStateBuilder(com.baidu.hugegraph.computer.k8s.crd.model.ComponentStateBuilder) ComputerJobSpec(com.baidu.hugegraph.computer.k8s.crd.model.ComputerJobSpec) ConfigMap(io.fabric8.kubernetes.api.model.ConfigMap) MutableInt(org.apache.commons.lang3.mutable.MutableInt) HugeGraphComputerJob(com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob) ComputerJobStatus(com.baidu.hugegraph.computer.k8s.crd.model.ComputerJobStatus) Job(io.fabric8.kubernetes.api.model.batch.v1.Job) HugeGraphComputerJob(com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob) CommonComponentState(com.baidu.hugegraph.computer.k8s.crd.model.CommonComponentState) ComponentState(com.baidu.hugegraph.computer.k8s.crd.model.ComponentState) JobComponentState(com.baidu.hugegraph.computer.k8s.crd.model.JobComponentState)

Example 2 with ComponentState

use of com.baidu.hugegraph.computer.k8s.crd.model.ComponentState in project hugegraph-computer by hugegraph.

the class ComputerJobController method deriveJobStatus.

private ComponentState deriveJobStatus(Job job, List<Pod> pods, ComponentState oldSate, int instances, MutableInt failedComponents, MutableInt succeededComponents, MutableInt runningComponents) {
    if (job != null && job.getStatus() != null) {
        ComponentState newState = new ComponentState();
        newState.setName(job.getMetadata().getName());
        int succeeded = KubeUtil.intVal(job.getStatus().getSucceeded());
        int failed = KubeUtil.intVal(job.getStatus().getFailed());
        MatchWithMsg unSchedulable = this.unSchedulable(pods);
        MatchWithMsg failedPullImage = this.imagePullBackOff(pods);
        if (succeeded >= instances) {
            newState.setState(JobComponentState.SUCCEEDED.name());
            succeededComponents.increment();
        } else if (failed > ALLOW_FAILED_JOBS) {
            newState.setState(JobComponentState.FAILED.name());
            List<JobCondition> conditions = job.getStatus().getConditions();
            if (CollectionUtils.isNotEmpty(conditions)) {
                newState.setMessage(conditions.get(0).getMessage());
            }
            String errorLog = this.getErrorLog(pods);
            if (StringUtils.isNotBlank(errorLog)) {
                newState.setErrorLog(errorLog);
            }
            failedComponents.increment();
        } else if (unSchedulable.isMatch()) {
            newState.setState(JobStatus.FAILED.name());
            newState.setMessage(unSchedulable.msg());
            failedComponents.increment();
        } else if (failedPullImage.isMatch()) {
            newState.setState(JobStatus.FAILED.name());
            newState.setMessage(failedPullImage.msg());
            failedComponents.increment();
        } else {
            int running = pods.stream().filter(PodStatusUtil::isRunning).mapToInt(x -> 1).sum();
            int active = running + succeeded;
            if (active >= instances) {
                newState.setState(JobComponentState.RUNNING.value());
                runningComponents.increment();
            } else {
                newState.setState(JobComponentState.PENDING.value());
            }
        }
        return newState;
    } else if (oldSate != null) {
        oldSate.setState(JobComponentState.CANCELLED.value());
    }
    return oldSate;
}
Also used : MatchWithMsg(com.baidu.hugegraph.computer.k8s.operator.common.MatchWithMsg) List(java.util.List) HugeGraphComputerJobList(com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJobList) CommonComponentState(com.baidu.hugegraph.computer.k8s.crd.model.CommonComponentState) ComponentState(com.baidu.hugegraph.computer.k8s.crd.model.ComponentState) JobComponentState(com.baidu.hugegraph.computer.k8s.crd.model.JobComponentState)

Aggregations

CommonComponentState (com.baidu.hugegraph.computer.k8s.crd.model.CommonComponentState)2 ComponentState (com.baidu.hugegraph.computer.k8s.crd.model.ComponentState)2 JobComponentState (com.baidu.hugegraph.computer.k8s.crd.model.JobComponentState)2 ComponentStateBuilder (com.baidu.hugegraph.computer.k8s.crd.model.ComponentStateBuilder)1 ComputerJobSpec (com.baidu.hugegraph.computer.k8s.crd.model.ComputerJobSpec)1 ComputerJobStatus (com.baidu.hugegraph.computer.k8s.crd.model.ComputerJobStatus)1 HugeGraphComputerJob (com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob)1 HugeGraphComputerJobList (com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJobList)1 MatchWithMsg (com.baidu.hugegraph.computer.k8s.operator.common.MatchWithMsg)1 ConfigMap (io.fabric8.kubernetes.api.model.ConfigMap)1 Job (io.fabric8.kubernetes.api.model.batch.v1.Job)1 List (java.util.List)1 MutableInt (org.apache.commons.lang3.mutable.MutableInt)1