use of com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob in project hugegraph-computer by hugegraph.
the class KubernetesDriverTest method testOnClose.
@Test
public void testOnClose() {
Map<String, Pair<CompletableFuture<Void>, JobObserver>> waits = Whitebox.getInternalState(this.driver, "waits");
waits.put("test-123", Pair.of(new CompletableFuture<>(), Mockito.mock(JobObserver.class)));
AbstractWatchManager<HugeGraphComputerJob> watch = Whitebox.getInternalState(this.driver, "watch");
Watcher<HugeGraphComputerJob> watcher = Whitebox.getInternalState(watch, "watcher");
watcher.eventReceived(Watcher.Action.ADDED, null);
watcher.eventReceived(Watcher.Action.ERROR, new HugeGraphComputerJob());
HugeGraphComputerJob computerJob = new HugeGraphComputerJob();
computerJob.setSpec(new ComputerJobSpec());
watcher.eventReceived(Watcher.Action.MODIFIED, computerJob);
WatcherException testClose = new WatcherException("test close");
watcher.onClose(testClose);
MutableBoolean watchActive = Whitebox.getInternalState(this.driver, "watchActive");
Assert.assertFalse(watchActive.booleanValue());
}
use of com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob in project hugegraph-computer by hugegraph.
the class MiniKubeTest method testTwiceCreate.
@Test
public void testTwiceCreate() {
super.updateOptions(KubeSpecOptions.MASTER_ARGS.name(), Lists.newArrayList("pwd && sleep 60"));
super.updateOptions(KubeSpecOptions.WORKER_ARGS.name(), Lists.newArrayList("pwd && sleep 60"));
Object defaultSpec = Whitebox.invoke(KubernetesDriver.class, "defaultSpec", this.driver);
Whitebox.setInternalState(this.driver, "defaultSpec", defaultSpec);
Map<String, String> params = new HashMap<>();
params.put(KubeSpecOptions.WORKER_INSTANCES.name(), "1");
String jobId = this.driver.submitJob(ALGORITHM_NAME, params);
JobObserver jobObserver = Mockito.mock(JobObserver.class);
CompletableFuture<Void> future = this.driver.waitJobAsync(jobId, params, jobObserver);
DefaultJobState jobState = new DefaultJobState();
jobState.jobStatus(JobStatus.RUNNING);
Mockito.verify(jobObserver, Mockito.timeout(20000L).atLeast(1)).onJobStateChanged(Mockito.eq(jobState));
HugeGraphComputerJob computerJob = this.operation.withName(KubeUtil.crName(jobId)).get();
computerJob.getSpec().setMasterCpu(Quantity.parse("2"));
this.operation.createOrReplace(computerJob);
UnitTestBase.sleep(1000L);
this.driver.cancelJob(jobId, params);
UnitTestBase.sleep(1000L);
future.cancel(true);
}
use of com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob in project hugegraph-computer by hugegraph.
the class ComputerJobController method derivedCRStatus.
private ComputerJobStatus derivedCRStatus(ComputerJobComponent observed) {
HugeGraphComputerJob computerJob = observed.computerJob();
ComputerJobSpec spec = computerJob.getSpec();
MutableInt failedComponents = new MutableInt(0);
MutableInt succeededComponents = new MutableInt(0);
MutableInt runningComponents = new MutableInt(0);
ComputerJobStatus status = Serialization.clone(computerJob.getStatus());
// ConfigMap
ConfigMap configMap = observed.configMap();
if (configMap != null) {
ComponentState configMapState = new ComponentStateBuilder().withName(configMap.getMetadata().getName()).withState(CommonComponentState.READY.value()).build();
status.getComponentStates().setConfigMap(configMapState);
} else if (status.getComponentStates().getConfigMap() != null) {
status.getComponentStates().getConfigMap().setState(CommonComponentState.DELETED.value());
}
// MasterJob
Job masterJob = observed.masterJob();
ComponentState masterJobState = this.deriveJobStatus(masterJob, observed.masterPods(), status.getComponentStates().getMasterJob(), Constants.MASTER_INSTANCES, failedComponents, succeededComponents, runningComponents);
status.getComponentStates().setMasterJob(masterJobState);
// WorkerJob
Job workerJob = observed.workerJob();
ComponentState workerJobState = this.deriveJobStatus(workerJob, observed.workerPods(), status.getComponentStates().getWorkerJob(), spec.getWorkerInstances(), failedComponents, succeededComponents, runningComponents);
status.getComponentStates().setWorkerJob(workerJobState);
if (failedComponents.intValue() > ALLOW_FAILED_COMPONENTS) {
status.setJobStatus(JobStatus.FAILED.name());
this.recordFailedEvent(computerJob, masterJobState, workerJobState);
return status;
} else if (succeededComponents.intValue() == TOTAL_COMPONENTS) {
status.setJobStatus(JobStatus.SUCCEEDED.name());
String crName = computerJob.getMetadata().getName();
long cost = this.calculateJobCost(computerJob);
this.recordEvent(computerJob, EventType.NORMAL, KubeUtil.succeedEventName(crName), "ComputerJobSucceed", String.format("Job %s run successfully, took %ss", crName, cost));
return status;
}
int activeComponents = runningComponents.intValue() + succeededComponents.intValue();
if (activeComponents == TOTAL_COMPONENTS) {
status.setJobStatus(JobStatus.RUNNING.name());
} else {
status.setJobStatus(JobStatus.INITIALIZING.name());
}
return status;
}
use of com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob in project hugegraph-computer by hugegraph.
the class ComputerJobController method reconcile.
@Override
protected OperatorResult reconcile(OperatorRequest request) {
HugeGraphComputerJob computerJob = this.getCR(request);
if (computerJob == null) {
LOG.info("Unable to fetch HugeGraphComputerJob {}, " + "it may have been deleted", request.name());
return OperatorResult.NO_REQUEUE;
}
this.fillCRStatus(computerJob);
if (this.finalizer(computerJob)) {
return OperatorResult.NO_REQUEUE;
}
ComputerJobComponent observed = this.observeComponent(computerJob);
if (!this.updateStatus(observed) && request.retryTimes() == 0) {
LOG.debug("Wait status to be stable before taking further actions");
return OperatorResult.NO_REQUEUE;
}
if (Objects.equals(computerJob.getStatus().getJobStatus(), JobStatus.RUNNING.name())) {
String crName = computerJob.getMetadata().getName();
LOG.info("ComputerJob {} already running, no action", crName);
return OperatorResult.NO_REQUEUE;
}
ComputerJobDeployer deployer = new ComputerJobDeployer(this.kubeClient, this.config);
deployer.deploy(observed);
return OperatorResult.NO_REQUEUE;
}
use of com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob in project hugegraph-computer by hugegraph.
the class ComputerJobController method handleFailOverLimit.
@Override
protected void handleFailOverLimit(OperatorRequest request, Exception e) {
HugeGraphComputerJob computerJob = this.getCR(request);
if (computerJob == null) {
LOG.info("Unable to fetch HugeGraphComputerJob {}, " + "it may have been deleted", request.name());
return;
}
String crName = computerJob.getMetadata().getName();
LOG.warn("ComputerJob {} reconcile failed reach {} times", crName, request.retryTimes());
this.recordEvent(computerJob, EventType.WARNING, KubeUtil.failedEventName(crName), String.format("ComputerJob %s reconcile failed\n", crName), Throwables.getStackTraceAsString(e));
computerJob.getStatus().setJobStatus(JobStatus.FAILED.name());
this.updateStatus(computerJob);
}
Aggregations