Search in sources :

Example 1 with Worker

use of org.bf2.srs.fleetmanager.execution.manager.Worker in project kas-fleetshard by bf2fc6cc711aee1a0c2a.

the class InstanceProfiler method setup.

private void setup() throws Exception {
    readResults();
    if (profilingResult.name == null) {
        profilingResult.name = "profile-" + Environment.DATE_FORMAT.format(LocalDateTime.now());
    }
    logDir = new File("target", profilingResult.name);
    Files.createDirectories(logDir.toPath());
    kafkaCluster = KubeClusterResource.connectToKubeCluster(PerformanceEnvironment.KAFKA_KUBECONFIG);
    profilingResult.kafkaNodeType = kafkaCluster.getWorkerNodes().get(0).getMetadata().getLabels().get("node.kubernetes.io/instance-type");
    kafkaProvisioner = ManagedKafkaProvisioner.create(kafkaCluster);
    kafkaProvisioner.setup();
    omb = new OMB(KubeClusterResource.connectToKubeCluster(PerformanceEnvironment.OMB_KUBECONFIG));
    omb.install(kafkaProvisioner.getTlsConfig());
    // TODO: if there is an existing result, make sure it's the same test setup
    profilingResult.ombNodeType = omb.getOmbCluster().getWorkerNodes().get(0).getMetadata().getLabels().get("node.kubernetes.io/instance-type");
    profilingResult.ombWorkerNodes = omb.getOmbCluster().getWorkerNodes().size();
    AvailableResources resources = getMinAvailableResources(omb.getOmbCluster().getWorkerNodes().stream());
    // use all available resources on the worker nodes with 2 workers per node
    // if (resources.memoryBytes > 16*ONE_GB || resources.memoryBytes < 8*ONE_GB) {
    // throw new IllegalStateException("Client instance types are expected to have 16 GB");
    // }
    // assume instead resources that will fit on 2xlarge or xlarge
    resources.cpuMillis = Math.min(6400, resources.cpuMillis);
    resources.memoryBytes = Math.min(12 * ONE_GB, resources.memoryBytes);
    omb.setWorkerCpu(Quantity.parse(resources.cpuMillis / 2 + "m"));
    omb.setWorkerContainerMemory(Quantity.parse(String.valueOf(resources.memoryBytes / 2)));
    profilingResult.ombWorkerCpu = omb.getWorkerCpu();
    profilingResult.ombWorkerMemory = omb.getWorkerContainerMemory();
    LOGGER.info("OMB Workers will use {} cpu and {} memory requests", omb.getWorkerCpu(), omb.getWorkerContainerMemory());
    if (profilingResult.completedStep == null) {
        installedProvisioner = true;
        kafkaProvisioner.install();
        writeResults(Step.SETUP);
    }
}
Also used : AvailableResources(org.bf2.performance.TestUtils.AvailableResources) File(java.io.File)

Example 2 with Worker

use of org.bf2.srs.fleetmanager.execution.manager.Worker in project srs-fleet-manager by bf2fc6cc711aee1a0c2a.

the class JobWrapper method execute.

@Override
@SneakyThrows
@ActivateRequestContext
public void execute(JobExecutionContext quartzJobContext) {
    Task task = loadTask(quartzJobContext);
    List<Worker> selectedWorkers = workers.stream().filter(w -> w.supports(task) && !workerExclusions.contains(w.getClass())).collect(toList());
    for (Worker worker : selectedWorkers) {
        WorkerContextImpl wCtx = loadWorkerContext(quartzJobContext, worker, task);
        Instant next = null;
        Exception lastException = null;
        try {
            log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Executing task.", task, worker, wCtx);
            worker.execute(task, wCtx);
            wCtx.getDelayedActions().forEach(Runnable::run);
            // OK vvv
            // Reset retry counter
            wCtx.setRetryAttempts(0);
            // Reset min retry counter
            wCtx.setMinRetries(task.getSchedule().getMinRetries());
            // Normal rescheduling
            next = nextExecution(task);
        } catch (Exception anEx) {
            // TODO Throwable?
            lastException = anEx;
            if (anEx instanceof RetryExecutionControlException) {
                log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Task requested a retry.", task, worker, wCtx, anEx);
                RetryExecutionControlException ex = (RetryExecutionControlException) anEx;
                if (ex.isForce() && wCtx.getMinRetries() < Integer.MAX_VALUE) {
                    // Make space for forced retry, no more than Integer.MAX_VALUE
                    wCtx.setMinRetries(wCtx.getMinRetries() + 1);
                    next = Instant.now().plus(Duration.ofSeconds(1));
                }
                if (ex.getMinRetries() > wCtx.getMinRetries()) {
                    wCtx.setMinRetries(ex.getMinRetries());
                }
                lastException = null;
            }
            if (wCtx.getRetryAttempts() < wCtx.getMinRetries() && (next == null)) {
                // Reschedule if the minRetries is not reached
                next = Instant.now().plus(backoff(wCtx.getRetryAttempts()));
            }
            if (anEx instanceof StopExecutionControlException) {
                log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Task requested a stop.", task, worker, wCtx, anEx);
                // Unschedule
                next = null;
                lastException = null;
            }
            if (lastException != null) {
                log.warn("Task Manager (task = {}, worker = {}, workerContext = {}, nextExecution = {}): Task threw an exception during execution: {}", task, worker, wCtx, next, anEx);
            }
            wCtx.setRetryAttempts(wCtx.getRetryAttempts() + 1);
        } finally {
            // Unlikely used
            wCtx.setDelayedActions(new ArrayList<>(0));
            saveWorkerContext(quartzJobContext, wCtx, worker);
            saveTask(quartzJobContext, task);
            // Scheduling
            if (next != null) {
                if (wCtx.getRetryAttempts() == wCtx.getMinRetries()) {
                    log.info("Task Manager (task = {}, worker = {}, workerContext = {}): Last rescheduling at {}.", task, worker, wCtx, next);
                } else {
                    log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Rescheduling task at {}.", task, worker, wCtx, next);
                }
                taskManager.rerigger(task, next);
            } else {
                try {
                    log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Executing finallyExecute. Last exception = {}", task, worker, wCtx, lastException);
                    worker.finallyExecute(task, wCtx, ofNullable(lastException));
                    wCtx.getDelayedActions().forEach(Runnable::run);
                } catch (Exception ex) {
                    log.warn("Task Manager (task = {}, worker = {}, workerContext = {}): Ignoring an exception thrown in finallyExecute: {}", task, worker, wCtx, ex);
                } finally {
                    log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Removing task.", task, worker, wCtx);
                    taskManager.remove(task);
                }
            }
        }
    }
}
Also used : Task(org.bf2.srs.fleetmanager.execution.manager.Task) Instant.now(java.time.Instant.now) SneakyThrows(lombok.SneakyThrows) OperationContext(org.bf2.srs.fleetmanager.operation.OperationContext) LoggerFactory(org.slf4j.LoggerFactory) Job(org.quartz.Job) Duration.ofSeconds(java.time.Duration.ofSeconds) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) WorkerContext(org.bf2.srs.fleetmanager.execution.manager.WorkerContext) DisallowConcurrentExecution(org.quartz.DisallowConcurrentExecution) Duration(java.time.Duration) Objects.requireNonNull(java.util.Objects.requireNonNull) PersistJobDataAfterExecution(org.quartz.PersistJobDataAfterExecution) SerDesObjectMapperProducer(org.bf2.srs.fleetmanager.common.SerDesObjectMapperProducer) Instance(javax.enterprise.inject.Instance) JobExecutionContext(org.quartz.JobExecutionContext) Logger(org.slf4j.Logger) Optional.ofNullable(java.util.Optional.ofNullable) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) QuartzIDs.jobDetailKeyForTask(org.bf2.srs.fleetmanager.execution.manager.impl.QuartzIDs.jobDetailKeyForTask) Instant(java.time.Instant) QuartzIDs.jobDetailKeyForWorker(org.bf2.srs.fleetmanager.execution.manager.impl.QuartzIDs.jobDetailKeyForWorker) ActivateRequestContext(javax.enterprise.context.control.ActivateRequestContext) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) Worker(org.bf2.srs.fleetmanager.execution.manager.Worker) ApplicationScoped(javax.enterprise.context.ApplicationScoped) ConfigProperty(org.eclipse.microprofile.config.inject.ConfigProperty) Task(org.bf2.srs.fleetmanager.execution.manager.Task) QuartzIDs.jobDetailKeyForTask(org.bf2.srs.fleetmanager.execution.manager.impl.QuartzIDs.jobDetailKeyForTask) Instant(java.time.Instant) QuartzIDs.jobDetailKeyForWorker(org.bf2.srs.fleetmanager.execution.manager.impl.QuartzIDs.jobDetailKeyForWorker) Worker(org.bf2.srs.fleetmanager.execution.manager.Worker) ActivateRequestContext(javax.enterprise.context.control.ActivateRequestContext) SneakyThrows(lombok.SneakyThrows)

Example 3 with Worker

use of org.bf2.srs.fleetmanager.execution.manager.Worker in project kas-fleetshard by bf2fc6cc711aee1a0c2a.

the class OMB method createWorker.

private void createWorker(String jvmOpts, String name, Node node) throws IOException {
    KubeClient kubeClient = ombCluster.kubeClient();
    DeploymentBuilder deploymentBuilder = new DeploymentBuilder().editOrNewMetadata().withName(name).withNamespace(Constants.OMB_NAMESPACE).addToLabels("app", "worker").endMetadata().editOrNewSpec().withReplicas(1).editOrNewSelector().addToMatchLabels("worker", name).endSelector().editOrNewTemplate().editOrNewMetadata().addToLabels("worker", name).addToLabels("app", "worker").endMetadata().editOrNewSpec().addNewContainer().withName("worker").withImage(Constants.OMB_WORKER_IMAGE).withResources(new ResourceRequirementsBuilder().withLimits(getResourceLimits()).withRequests(getResourceLimits()).build()).addToCommand("sh", "-c").addToEnv(new EnvVar("_JAVA_OPTIONS", jvmOpts, null)).addToEnv(envVars.toArray(new EnvVar[0])).addToArgs("cd /tmp/src; ./bin/benchmark-worker").addToPorts(new ContainerPortBuilder().withContainerPort(8080).build(), new ContainerPortBuilder().withContainerPort(8081).build()).withLivenessProbe(new ProbeBuilder().withInitialDelaySeconds(10).withHttpGet(new HTTPGetActionBuilder().withPort(new IntOrString(8080)).withPath("counters-stats").build()).build()).addNewVolumeMount().withName("ca").withMountPath("/cert").withReadOnly(true).endVolumeMount().endContainer().withTerminationGracePeriodSeconds(15L).addNewVolume().withName("ca").editOrNewSecret().withSecretName("ext-listener-crt").endSecret().endVolume().endSpec().endTemplate().endSpec();
    if (node != null) {
        deploymentBuilder.editSpec().editTemplate().editSpec().withNodeSelector(Collections.singletonMap("kubernetes.io/hostname", node.getMetadata().getLabels().get("kubernetes.io/hostname"))).endSpec().endTemplate().endSpec();
    }
    kubeClient.client().apps().deployments().inNamespace(Constants.OMB_NAMESPACE).createOrReplace(deploymentBuilder.build());
    kubeClient.client().services().inNamespace(Constants.OMB_NAMESPACE).createOrReplace(new ServiceBuilder().editOrNewMetadata().withName(name).withNamespace(Constants.OMB_NAMESPACE).addToLabels("app", "worker").endMetadata().editOrNewSpec().addToSelector("worker", name).addNewPort().withPort(80).withTargetPort(new IntOrString(8080)).withProtocol("TCP").endPort().endSpec().build());
    kubeClient.client().adapt(OpenShiftClient.class).routes().inNamespace(Constants.OMB_NAMESPACE).createOrReplace(new RouteBuilder().editOrNewMetadata().withName(name).withNamespace(Constants.OMB_NAMESPACE).withAnnotations(Map.of("haproxy.router.openshift.io/timeout", "360s")).addToLabels("app", "worker").addToLabels("app.kubernetes.io/name", name).endMetadata().editOrNewSpec().editOrNewTo().withKind("Service").withName(name).endTo().endSpec().build());
}
Also used : HTTPGetActionBuilder(io.fabric8.kubernetes.api.model.HTTPGetActionBuilder) ProbeBuilder(io.fabric8.kubernetes.api.model.ProbeBuilder) KubeClient(org.bf2.test.k8s.KubeClient) RouteBuilder(io.fabric8.openshift.api.model.RouteBuilder) IntOrString(io.fabric8.kubernetes.api.model.IntOrString) ResourceRequirementsBuilder(io.fabric8.kubernetes.api.model.ResourceRequirementsBuilder) ContainerPortBuilder(io.fabric8.kubernetes.api.model.ContainerPortBuilder) OpenShiftClient(io.fabric8.openshift.client.OpenShiftClient) EnvVar(io.fabric8.kubernetes.api.model.EnvVar) DeploymentBuilder(io.fabric8.kubernetes.api.model.apps.DeploymentBuilder) ServiceBuilder(io.fabric8.kubernetes.api.model.ServiceBuilder)

Aggregations

ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ContainerPortBuilder (io.fabric8.kubernetes.api.model.ContainerPortBuilder)1 EnvVar (io.fabric8.kubernetes.api.model.EnvVar)1 HTTPGetActionBuilder (io.fabric8.kubernetes.api.model.HTTPGetActionBuilder)1 IntOrString (io.fabric8.kubernetes.api.model.IntOrString)1 ProbeBuilder (io.fabric8.kubernetes.api.model.ProbeBuilder)1 ResourceRequirementsBuilder (io.fabric8.kubernetes.api.model.ResourceRequirementsBuilder)1 ServiceBuilder (io.fabric8.kubernetes.api.model.ServiceBuilder)1 DeploymentBuilder (io.fabric8.kubernetes.api.model.apps.DeploymentBuilder)1 RouteBuilder (io.fabric8.openshift.api.model.RouteBuilder)1 OpenShiftClient (io.fabric8.openshift.client.OpenShiftClient)1 File (java.io.File)1 Duration (java.time.Duration)1 Duration.ofSeconds (java.time.Duration.ofSeconds)1 Instant (java.time.Instant)1 Instant.now (java.time.Instant.now)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Objects.requireNonNull (java.util.Objects.requireNonNull)1 Optional.ofNullable (java.util.Optional.ofNullable)1