Search in sources :

Example 6 with HugeGraphComputerJob

use of com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob in project hugegraph-computer by hugegraph.

the class KubernetesDriverTest method testOnClose.

@Test
public void testOnClose() {
    Map<String, Pair<CompletableFuture<Void>, JobObserver>> waits = Whitebox.getInternalState(this.driver, "waits");
    waits.put("test-123", Pair.of(new CompletableFuture<>(), Mockito.mock(JobObserver.class)));
    AbstractWatchManager<HugeGraphComputerJob> watch = Whitebox.getInternalState(this.driver, "watch");
    Watcher<HugeGraphComputerJob> watcher = Whitebox.getInternalState(watch, "watcher");
    watcher.eventReceived(Watcher.Action.ADDED, null);
    watcher.eventReceived(Watcher.Action.ERROR, new HugeGraphComputerJob());
    HugeGraphComputerJob computerJob = new HugeGraphComputerJob();
    computerJob.setSpec(new ComputerJobSpec());
    watcher.eventReceived(Watcher.Action.MODIFIED, computerJob);
    WatcherException testClose = new WatcherException("test close");
    watcher.onClose(testClose);
    MutableBoolean watchActive = Whitebox.getInternalState(this.driver, "watchActive");
    Assert.assertFalse(watchActive.booleanValue());
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) ComputerJobSpec(com.baidu.hugegraph.computer.k8s.crd.model.ComputerJobSpec) MutableBoolean(org.apache.commons.lang3.mutable.MutableBoolean) HugeGraphComputerJob(com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob) Pair(org.apache.commons.lang3.tuple.Pair) WatcherException(io.fabric8.kubernetes.client.WatcherException) Test(org.junit.Test)

Example 7 with HugeGraphComputerJob

use of com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob in project hugegraph-computer by hugegraph.

the class MiniKubeTest method testTwiceCreate.

@Test
public void testTwiceCreate() {
    super.updateOptions(KubeSpecOptions.MASTER_ARGS.name(), Lists.newArrayList("pwd && sleep 60"));
    super.updateOptions(KubeSpecOptions.WORKER_ARGS.name(), Lists.newArrayList("pwd && sleep 60"));
    Object defaultSpec = Whitebox.invoke(KubernetesDriver.class, "defaultSpec", this.driver);
    Whitebox.setInternalState(this.driver, "defaultSpec", defaultSpec);
    Map<String, String> params = new HashMap<>();
    params.put(KubeSpecOptions.WORKER_INSTANCES.name(), "1");
    String jobId = this.driver.submitJob(ALGORITHM_NAME, params);
    JobObserver jobObserver = Mockito.mock(JobObserver.class);
    CompletableFuture<Void> future = this.driver.waitJobAsync(jobId, params, jobObserver);
    DefaultJobState jobState = new DefaultJobState();
    jobState.jobStatus(JobStatus.RUNNING);
    Mockito.verify(jobObserver, Mockito.timeout(20000L).atLeast(1)).onJobStateChanged(Mockito.eq(jobState));
    HugeGraphComputerJob computerJob = this.operation.withName(KubeUtil.crName(jobId)).get();
    computerJob.getSpec().setMasterCpu(Quantity.parse("2"));
    this.operation.createOrReplace(computerJob);
    UnitTestBase.sleep(1000L);
    this.driver.cancelJob(jobId, params);
    UnitTestBase.sleep(1000L);
    future.cancel(true);
}
Also used : JobObserver(com.baidu.hugegraph.computer.driver.JobObserver) HashMap(java.util.HashMap) DefaultJobState(com.baidu.hugegraph.computer.driver.DefaultJobState) HugeGraphComputerJob(com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob) Test(org.junit.Test)

Example 8 with HugeGraphComputerJob

use of com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob in project hugegraph-computer by hugegraph.

the class ComputerJobController method derivedCRStatus.

private ComputerJobStatus derivedCRStatus(ComputerJobComponent observed) {
    HugeGraphComputerJob computerJob = observed.computerJob();
    ComputerJobSpec spec = computerJob.getSpec();
    MutableInt failedComponents = new MutableInt(0);
    MutableInt succeededComponents = new MutableInt(0);
    MutableInt runningComponents = new MutableInt(0);
    ComputerJobStatus status = Serialization.clone(computerJob.getStatus());
    // ConfigMap
    ConfigMap configMap = observed.configMap();
    if (configMap != null) {
        ComponentState configMapState = new ComponentStateBuilder().withName(configMap.getMetadata().getName()).withState(CommonComponentState.READY.value()).build();
        status.getComponentStates().setConfigMap(configMapState);
    } else if (status.getComponentStates().getConfigMap() != null) {
        status.getComponentStates().getConfigMap().setState(CommonComponentState.DELETED.value());
    }
    // MasterJob
    Job masterJob = observed.masterJob();
    ComponentState masterJobState = this.deriveJobStatus(masterJob, observed.masterPods(), status.getComponentStates().getMasterJob(), Constants.MASTER_INSTANCES, failedComponents, succeededComponents, runningComponents);
    status.getComponentStates().setMasterJob(masterJobState);
    // WorkerJob
    Job workerJob = observed.workerJob();
    ComponentState workerJobState = this.deriveJobStatus(workerJob, observed.workerPods(), status.getComponentStates().getWorkerJob(), spec.getWorkerInstances(), failedComponents, succeededComponents, runningComponents);
    status.getComponentStates().setWorkerJob(workerJobState);
    if (failedComponents.intValue() > ALLOW_FAILED_COMPONENTS) {
        status.setJobStatus(JobStatus.FAILED.name());
        this.recordFailedEvent(computerJob, masterJobState, workerJobState);
        return status;
    } else if (succeededComponents.intValue() == TOTAL_COMPONENTS) {
        status.setJobStatus(JobStatus.SUCCEEDED.name());
        String crName = computerJob.getMetadata().getName();
        long cost = this.calculateJobCost(computerJob);
        this.recordEvent(computerJob, EventType.NORMAL, KubeUtil.succeedEventName(crName), "ComputerJobSucceed", String.format("Job %s run successfully, took %ss", crName, cost));
        return status;
    }
    int activeComponents = runningComponents.intValue() + succeededComponents.intValue();
    if (activeComponents == TOTAL_COMPONENTS) {
        status.setJobStatus(JobStatus.RUNNING.name());
    } else {
        status.setJobStatus(JobStatus.INITIALIZING.name());
    }
    return status;
}
Also used : ComponentStateBuilder(com.baidu.hugegraph.computer.k8s.crd.model.ComponentStateBuilder) ComputerJobSpec(com.baidu.hugegraph.computer.k8s.crd.model.ComputerJobSpec) ConfigMap(io.fabric8.kubernetes.api.model.ConfigMap) MutableInt(org.apache.commons.lang3.mutable.MutableInt) HugeGraphComputerJob(com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob) ComputerJobStatus(com.baidu.hugegraph.computer.k8s.crd.model.ComputerJobStatus) Job(io.fabric8.kubernetes.api.model.batch.v1.Job) HugeGraphComputerJob(com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob) CommonComponentState(com.baidu.hugegraph.computer.k8s.crd.model.CommonComponentState) ComponentState(com.baidu.hugegraph.computer.k8s.crd.model.ComponentState) JobComponentState(com.baidu.hugegraph.computer.k8s.crd.model.JobComponentState)

Example 9 with HugeGraphComputerJob

use of com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob in project hugegraph-computer by hugegraph.

the class ComputerJobController method reconcile.

@Override
protected OperatorResult reconcile(OperatorRequest request) {
    HugeGraphComputerJob computerJob = this.getCR(request);
    if (computerJob == null) {
        LOG.info("Unable to fetch HugeGraphComputerJob {}, " + "it may have been deleted", request.name());
        return OperatorResult.NO_REQUEUE;
    }
    this.fillCRStatus(computerJob);
    if (this.finalizer(computerJob)) {
        return OperatorResult.NO_REQUEUE;
    }
    ComputerJobComponent observed = this.observeComponent(computerJob);
    if (!this.updateStatus(observed) && request.retryTimes() == 0) {
        LOG.debug("Wait status to be stable before taking further actions");
        return OperatorResult.NO_REQUEUE;
    }
    if (Objects.equals(computerJob.getStatus().getJobStatus(), JobStatus.RUNNING.name())) {
        String crName = computerJob.getMetadata().getName();
        LOG.info("ComputerJob {} already running, no action", crName);
        return OperatorResult.NO_REQUEUE;
    }
    ComputerJobDeployer deployer = new ComputerJobDeployer(this.kubeClient, this.config);
    deployer.deploy(observed);
    return OperatorResult.NO_REQUEUE;
}
Also used : HugeGraphComputerJob(com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob)

Example 10 with HugeGraphComputerJob

use of com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob in project hugegraph-computer by hugegraph.

the class ComputerJobController method handleFailOverLimit.

@Override
protected void handleFailOverLimit(OperatorRequest request, Exception e) {
    HugeGraphComputerJob computerJob = this.getCR(request);
    if (computerJob == null) {
        LOG.info("Unable to fetch HugeGraphComputerJob {}, " + "it may have been deleted", request.name());
        return;
    }
    String crName = computerJob.getMetadata().getName();
    LOG.warn("ComputerJob {} reconcile failed reach {} times", crName, request.retryTimes());
    this.recordEvent(computerJob, EventType.WARNING, KubeUtil.failedEventName(crName), String.format("ComputerJob %s reconcile failed\n", crName), Throwables.getStackTraceAsString(e));
    computerJob.getStatus().setJobStatus(JobStatus.FAILED.name());
    this.updateStatus(computerJob);
}
Also used : HugeGraphComputerJob(com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob)

Aggregations

HugeGraphComputerJob (com.baidu.hugegraph.computer.k8s.crd.model.HugeGraphComputerJob)12 Test (org.junit.Test)4 ComputerJobSpec (com.baidu.hugegraph.computer.k8s.crd.model.ComputerJobSpec)3 HashMap (java.util.HashMap)3 ComputerJobStatus (com.baidu.hugegraph.computer.k8s.crd.model.ComputerJobStatus)2 ConfigMap (io.fabric8.kubernetes.api.model.ConfigMap)2 Job (io.fabric8.kubernetes.api.model.batch.v1.Job)2 DefaultJobState (com.baidu.hugegraph.computer.driver.DefaultJobState)1 JobObserver (com.baidu.hugegraph.computer.driver.JobObserver)1 CommonComponentState (com.baidu.hugegraph.computer.k8s.crd.model.CommonComponentState)1 ComponentState (com.baidu.hugegraph.computer.k8s.crd.model.ComponentState)1 ComponentStateBuilder (com.baidu.hugegraph.computer.k8s.crd.model.ComponentStateBuilder)1 JobComponentState (com.baidu.hugegraph.computer.k8s.crd.model.JobComponentState)1 ContainerPort (io.fabric8.kubernetes.api.model.ContainerPort)1 ObjectMeta (io.fabric8.kubernetes.api.model.ObjectMeta)1 ObjectMetaBuilder (io.fabric8.kubernetes.api.model.ObjectMetaBuilder)1 Pod (io.fabric8.kubernetes.api.model.Pod)1 WatcherException (io.fabric8.kubernetes.client.WatcherException)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 MutableBoolean (org.apache.commons.lang3.mutable.MutableBoolean)1