use of com.baidu.hugegraph.computer.k8s.crd.model.ComponentState in project hugegraph-computer by hugegraph.
the class ComputerJobController method derivedCRStatus.
private ComputerJobStatus derivedCRStatus(ComputerJobComponent observed) {
HugeGraphComputerJob computerJob = observed.computerJob();
ComputerJobSpec spec = computerJob.getSpec();
MutableInt failedComponents = new MutableInt(0);
MutableInt succeededComponents = new MutableInt(0);
MutableInt runningComponents = new MutableInt(0);
ComputerJobStatus status = Serialization.clone(computerJob.getStatus());
// ConfigMap
ConfigMap configMap = observed.configMap();
if (configMap != null) {
ComponentState configMapState = new ComponentStateBuilder().withName(configMap.getMetadata().getName()).withState(CommonComponentState.READY.value()).build();
status.getComponentStates().setConfigMap(configMapState);
} else if (status.getComponentStates().getConfigMap() != null) {
status.getComponentStates().getConfigMap().setState(CommonComponentState.DELETED.value());
}
// MasterJob
Job masterJob = observed.masterJob();
ComponentState masterJobState = this.deriveJobStatus(masterJob, observed.masterPods(), status.getComponentStates().getMasterJob(), Constants.MASTER_INSTANCES, failedComponents, succeededComponents, runningComponents);
status.getComponentStates().setMasterJob(masterJobState);
// WorkerJob
Job workerJob = observed.workerJob();
ComponentState workerJobState = this.deriveJobStatus(workerJob, observed.workerPods(), status.getComponentStates().getWorkerJob(), spec.getWorkerInstances(), failedComponents, succeededComponents, runningComponents);
status.getComponentStates().setWorkerJob(workerJobState);
if (failedComponents.intValue() > ALLOW_FAILED_COMPONENTS) {
status.setJobStatus(JobStatus.FAILED.name());
this.recordFailedEvent(computerJob, masterJobState, workerJobState);
return status;
} else if (succeededComponents.intValue() == TOTAL_COMPONENTS) {
status.setJobStatus(JobStatus.SUCCEEDED.name());
String crName = computerJob.getMetadata().getName();
long cost = this.calculateJobCost(computerJob);
this.recordEvent(computerJob, EventType.NORMAL, KubeUtil.succeedEventName(crName), "ComputerJobSucceed", String.format("Job %s run successfully, took %ss", crName, cost));
return status;
}
int activeComponents = runningComponents.intValue() + succeededComponents.intValue();
if (activeComponents == TOTAL_COMPONENTS) {
status.setJobStatus(JobStatus.RUNNING.name());
} else {
status.setJobStatus(JobStatus.INITIALIZING.name());
}
return status;
}
use of com.baidu.hugegraph.computer.k8s.crd.model.ComponentState in project hugegraph-computer by hugegraph.
the class ComputerJobController method deriveJobStatus.
private ComponentState deriveJobStatus(Job job, List<Pod> pods, ComponentState oldSate, int instances, MutableInt failedComponents, MutableInt succeededComponents, MutableInt runningComponents) {
if (job != null && job.getStatus() != null) {
ComponentState newState = new ComponentState();
newState.setName(job.getMetadata().getName());
int succeeded = KubeUtil.intVal(job.getStatus().getSucceeded());
int failed = KubeUtil.intVal(job.getStatus().getFailed());
MatchWithMsg unSchedulable = this.unSchedulable(pods);
MatchWithMsg failedPullImage = this.imagePullBackOff(pods);
if (succeeded >= instances) {
newState.setState(JobComponentState.SUCCEEDED.name());
succeededComponents.increment();
} else if (failed > ALLOW_FAILED_JOBS) {
newState.setState(JobComponentState.FAILED.name());
List<JobCondition> conditions = job.getStatus().getConditions();
if (CollectionUtils.isNotEmpty(conditions)) {
newState.setMessage(conditions.get(0).getMessage());
}
String errorLog = this.getErrorLog(pods);
if (StringUtils.isNotBlank(errorLog)) {
newState.setErrorLog(errorLog);
}
failedComponents.increment();
} else if (unSchedulable.isMatch()) {
newState.setState(JobStatus.FAILED.name());
newState.setMessage(unSchedulable.msg());
failedComponents.increment();
} else if (failedPullImage.isMatch()) {
newState.setState(JobStatus.FAILED.name());
newState.setMessage(failedPullImage.msg());
failedComponents.increment();
} else {
int running = pods.stream().filter(PodStatusUtil::isRunning).mapToInt(x -> 1).sum();
int active = running + succeeded;
if (active >= instances) {
newState.setState(JobComponentState.RUNNING.value());
runningComponents.increment();
} else {
newState.setState(JobComponentState.PENDING.value());
}
}
return newState;
} else if (oldSate != null) {
oldSate.setState(JobComponentState.CANCELLED.value());
}
return oldSate;
}
Aggregations