Search in sources :

Example 91 with Job

use of com.google.cloud.talent.v4.Job in project strimzi-kafka-operator by strimzi.

the class JobUtils method logCurrentJobStatus.

/**
 * Log actual status of Job with pods.
 * @param jobName - name of the job, for which we should scrape status
 * @param namespace - namespace/project where is job running
 */
public static void logCurrentJobStatus(String jobName, String namespace) {
    Job currentJob = kubeClient().getJob(namespace, jobName);
    if (currentJob != null && currentJob.getStatus() != null) {
        List<String> log = new ArrayList<>(asList(Constants.JOB, " status:\n"));
        List<JobCondition> conditions = currentJob.getStatus().getConditions();
        log.add("\tActive: " + currentJob.getStatus().getActive());
        log.add("\n\tFailed: " + currentJob.getStatus().getFailed());
        log.add("\n\tReady: " + currentJob.getStatus().getReady());
        log.add("\n\tSucceeded: " + currentJob.getStatus().getSucceeded());
        if (conditions != null) {
            List<String> conditionList = new ArrayList<>();
            for (JobCondition condition : conditions) {
                if (condition.getMessage() != null) {
                    conditionList.add("\t\tType: " + condition.getType() + "\n");
                    conditionList.add("\t\tMessage: " + condition.getMessage() + "\n");
                }
            }
            if (!conditionList.isEmpty()) {
                log.add("\n\tConditions:\n");
                log.addAll(conditionList);
            }
        }
        log.add("\n\nPods with conditions and messages:\n\n");
        for (Pod pod : kubeClient().namespace(currentJob.getMetadata().getNamespace()).listPodsByPrefixInName(jobName)) {
            log.add(pod.getMetadata().getName() + ":");
            List<String> podConditions = new ArrayList<>();
            for (PodCondition podCondition : pod.getStatus().getConditions()) {
                if (podCondition.getMessage() != null) {
                    podConditions.add("\n\tType: " + podCondition.getType() + "\n");
                    podConditions.add("\tMessage: " + podCondition.getMessage() + "\n");
                }
            }
            if (podConditions.isEmpty()) {
                log.add("\n\t<EMPTY>");
            } else {
                log.addAll(podConditions);
            }
            log.add("\n\n");
        }
        LOGGER.info("{}", String.join("", log).strip());
    }
}
Also used : JobCondition(io.fabric8.kubernetes.api.model.batch.v1.JobCondition) Pod(io.fabric8.kubernetes.api.model.Pod) ArrayList(java.util.ArrayList) Job(io.fabric8.kubernetes.api.model.batch.v1.Job) PodCondition(io.fabric8.kubernetes.api.model.PodCondition)

Example 92 with Job

use of com.google.cloud.talent.v4.Job in project strimzi-kafka-operator by strimzi.

the class OauthAbstractST method tearDownEach.

@AfterEach
void tearDownEach(ExtensionContext extensionContext) throws Exception {
    List<Job> clusterJobList = kubeClient().getJobList().getItems().stream().filter(job -> job.getMetadata().getName().contains(mapWithClusterNames.get(extensionContext.getDisplayName()))).collect(Collectors.toList());
    for (Job job : clusterJobList) {
        LOGGER.info("Deleting {} job", job.getMetadata().getName());
        JobUtils.deleteJobWithWait(job.getMetadata().getNamespace(), job.getMetadata().getName());
    }
}
Also used : AbstractST(io.strimzi.systemtest.AbstractST) CoreMatchers(org.hamcrest.CoreMatchers) GenericKafkaListener(io.strimzi.api.kafka.model.listener.arraylistener.GenericKafkaListener) HashMap(java.util.HashMap) ExtensionContext(org.junit.jupiter.api.extension.ExtensionContext) Function(java.util.function.Function) KeycloakInstance(io.strimzi.systemtest.keycloak.KeycloakInstance) SecretUtils(io.strimzi.systemtest.utils.kubeUtils.objects.SecretUtils) Map(java.util.Map) DefaultNetworkPolicy(io.strimzi.systemtest.enums.DefaultNetworkPolicy) Tag(org.junit.jupiter.api.Tag) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Job(io.fabric8.kubernetes.api.model.batch.v1.Job) JobUtils(io.strimzi.systemtest.utils.kubeUtils.controllers.JobUtils) CollectorElement(io.strimzi.test.logs.CollectorElement) Constants(io.strimzi.systemtest.Constants) GenericKafkaListenerBuilder(io.strimzi.api.kafka.model.listener.arraylistener.GenericKafkaListenerBuilder) OAUTH(io.strimzi.systemtest.Constants.OAUTH) Collectors(java.util.stream.Collectors) NetworkPolicyTemplates(io.strimzi.systemtest.templates.kubernetes.NetworkPolicyTemplates) KubeClusterResource.kubeClient(io.strimzi.test.k8s.KubeClusterResource.kubeClient) KeycloakUtils(io.strimzi.systemtest.utils.specific.KeycloakUtils) AfterEach(org.junit.jupiter.api.AfterEach) Base64(java.util.Base64) List(java.util.List) CertSecretSourceBuilder(io.strimzi.api.kafka.model.CertSecretSourceBuilder) Logger(org.apache.logging.log4j.Logger) KafkaListenerType(io.strimzi.api.kafka.model.listener.arraylistener.KafkaListenerType) LogManager(org.apache.logging.log4j.LogManager) REGRESSION(io.strimzi.systemtest.Constants.REGRESSION) Job(io.fabric8.kubernetes.api.model.batch.v1.Job) AfterEach(org.junit.jupiter.api.AfterEach)

Example 93 with Job

use of com.google.cloud.talent.v4.Job in project cdap by cdapio.

the class DataprocRuntimeJobManager method launch.

@Override
public void launch(RuntimeJobInfo runtimeJobInfo) throws Exception {
    String bucket = DataprocUtils.getBucketName(this.bucket);
    ProgramRunInfo runInfo = runtimeJobInfo.getProgramRunInfo();
    LOG.debug("Launching run {} with following configurations: cluster {}, project {}, region {}, bucket {}.", runInfo.getRun(), clusterName, projectId, region, bucket);
    // TODO: CDAP-16408 use fixed directory for caching twill, application, artifact jars
    File tempDir = Files.createTempDirectory("dataproc.launcher").toFile();
    // on dataproc bucket the run root will be <bucket>/cdap-job/<runid>/. All the files for this run will be copied
    // under that base dir.
    String runRootPath = getPath(DataprocUtils.CDAP_GCS_ROOT, runInfo.getRun());
    try {
        // step 1: build twill.jar and launcher.jar and add them to files to be copied to gcs
        List<LocalFile> localFiles = getRuntimeLocalFiles(runtimeJobInfo.getLocalizeFiles(), tempDir);
        // step 2: upload all the necessary files to gcs so that those files are available to dataproc job
        List<Future<LocalFile>> uploadFutures = new ArrayList<>();
        for (LocalFile fileToUpload : localFiles) {
            String targetFilePath = getPath(runRootPath, fileToUpload.getName());
            uploadFutures.add(provisionerContext.execute(() -> uploadFile(bucket, targetFilePath, fileToUpload)).toCompletableFuture());
        }
        List<LocalFile> uploadedFiles = new ArrayList<>();
        for (Future<LocalFile> uploadFuture : uploadFutures) {
            uploadedFiles.add(uploadFuture.get());
        }
        // step 3: build the hadoop job request to be submitted to dataproc
        SubmitJobRequest request = getSubmitJobRequest(runtimeJobInfo, uploadedFiles);
        // step 4: submit hadoop job to dataproc
        try {
            Job job = getJobControllerClient().submitJob(request);
            LOG.debug("Successfully submitted hadoop job {} to cluster {}.", job.getReference().getJobId(), clusterName);
        } catch (AlreadyExistsException ex) {
            // the job id already exists, ignore the job.
            LOG.warn("The dataproc job {} already exists. Ignoring resubmission of the job.", request.getJob().getReference().getJobId());
        }
        DataprocUtils.emitMetric(provisionerContext, region, "provisioner.submitJob.response.count");
    } catch (Exception e) {
        // delete all uploaded gcs files in case of exception
        DataprocUtils.deleteGCSPath(getStorageClient(), bucket, runRootPath);
        DataprocUtils.emitMetric(provisionerContext, region, "provisioner.submitJob.response.count", e);
        throw new Exception(String.format("Error while launching job %s on cluster %s", getJobId(runInfo), clusterName), e);
    } finally {
        // delete local temp directory
        deleteDirectoryContents(tempDir);
    }
}
Also used : AlreadyExistsException(com.google.api.gax.rpc.AlreadyExistsException) ArrayList(java.util.ArrayList) SubmitJobRequest(com.google.cloud.dataproc.v1beta2.SubmitJobRequest) AlreadyExistsException(com.google.api.gax.rpc.AlreadyExistsException) IOException(java.io.IOException) ApiException(com.google.api.gax.rpc.ApiException) StorageException(com.google.cloud.storage.StorageException) DefaultLocalFile(org.apache.twill.internal.DefaultLocalFile) LocalFile(org.apache.twill.api.LocalFile) Future(java.util.concurrent.Future) HadoopJob(com.google.cloud.dataproc.v1beta2.HadoopJob) Job(com.google.cloud.dataproc.v1beta2.Job) DefaultLocalFile(org.apache.twill.internal.DefaultLocalFile) LocalFile(org.apache.twill.api.LocalFile) File(java.io.File) ProgramRunInfo(io.cdap.cdap.runtime.spi.ProgramRunInfo)

Example 94 with Job

use of com.google.cloud.talent.v4.Job in project pravega by pravega.

the class RemoteSequential method isTestRunning.

private boolean isTestRunning(final String jobId, final Metronome client) {
    Job jobStatus = client.getJob(jobId);
    boolean isRunning = false;
    if (jobStatus.getHistory() == null) {
        isRunning = true;
    } else if ((jobStatus.getHistory().getSuccessCount() == 0) && (jobStatus.getHistory().getFailureCount() == 0)) {
        isRunning = true;
    }
    return isRunning;
}
Also used : Job(io.pravega.test.system.framework.metronome.model.v1.Job)

Example 95 with Job

use of com.google.cloud.talent.v4.Job in project pravega by pravega.

the class RemoteSequential method startTestExecution.

@Override
public CompletableFuture<Void> startTestExecution(Method testMethod) {
    Exceptions.handleInterrupted(() -> TimeUnit.SECONDS.sleep(60));
    // This will be removed once issue https://github.com/pravega/pravega/issues/1665 is resolved.
    log.debug("Starting test execution for method: {}", testMethod);
    final Metronome client = AuthEnabledMetronomeClient.getClient();
    String className = testMethod.getDeclaringClass().getName();
    String methodName = testMethod.getName();
    // All jobIds should have lowercase for metronome.
    String jobId = (methodName + ".testJob").toLowerCase();
    return CompletableFuture.runAsync(() -> {
        client.createJob(newJob(jobId, className, methodName));
        Response response = client.triggerJobRun(jobId);
        if (response.status() != CREATED.getStatusCode()) {
            throw new TestFrameworkException(TestFrameworkException.Type.ConnectionFailed, "Error while starting " + "test " + testMethod);
        } else {
            log.info("Created job succeeded with: " + response.toString());
        }
    }).thenCompose(v2 -> waitForJobCompletion(jobId, client)).<Void>thenApply(v1 -> {
        if (client.getJob(jobId).getHistory().getFailureCount() != 0) {
            throw new AssertionError("Test failed, detailed logs can be found at " + "https://MasterIP/mesos, under metronome framework tasks. MethodName: " + methodName);
        }
        return null;
    }).whenComplete((v, ex) -> {
        // deletejob once execution is complete.
        deleteJob(jobId, client);
        if (ex != null) {
            log.error("Error while executing the test. ClassName: {}, MethodName: {}", className, methodName);
        }
    });
}
Also used : Response(feign.Response) NotImplementedException(org.apache.commons.lang3.NotImplementedException) Response(feign.Response) Job(io.pravega.test.system.framework.metronome.model.v1.Job) Exceptions(io.pravega.common.Exceptions) Restart(io.pravega.test.system.framework.metronome.model.v1.Restart) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Metronome(io.pravega.test.system.framework.metronome.Metronome) Run(io.pravega.test.system.framework.metronome.model.v1.Run) TimeUnit(java.util.concurrent.TimeUnit) Slf4j(lombok.extern.slf4j.Slf4j) MetronomeException(io.pravega.test.system.framework.metronome.MetronomeException) Duration(java.time.Duration) Map(java.util.Map) Artifact(io.pravega.test.system.framework.metronome.model.v1.Artifact) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutorServiceHelpers(io.pravega.common.concurrent.ExecutorServiceHelpers) Method(java.lang.reflect.Method) Collections(java.util.Collections) Futures(io.pravega.common.concurrent.Futures) AuthEnabledMetronomeClient(io.pravega.test.system.framework.metronome.AuthEnabledMetronomeClient) CREATED(javax.ws.rs.core.Response.Status.CREATED) Metronome(io.pravega.test.system.framework.metronome.Metronome)

Aggregations

Job (org.pentaho.platform.api.scheduler2.Job)94 Test (org.junit.Test)89 Job (io.fabric8.kubernetes.api.model.batch.v1.Job)38 Serializable (java.io.Serializable)25 ArrayList (java.util.ArrayList)24 SimpleJobTrigger (org.pentaho.platform.api.scheduler2.SimpleJobTrigger)21 Job (com.google.cloud.talent.v4beta1.Job)20 HashMap (java.util.HashMap)20 JobScheduleRequest (org.pentaho.platform.web.http.api.resources.JobScheduleRequest)19 ComplexJobTrigger (org.pentaho.platform.api.scheduler2.ComplexJobTrigger)18 SchedulerException (org.pentaho.platform.api.scheduler2.SchedulerException)17 JobServiceClient (com.google.cloud.talent.v4beta1.JobServiceClient)16 Date (java.util.Date)14 IJobFilter (org.pentaho.platform.api.scheduler2.IJobFilter)14 Job (com.google.cloud.video.transcoder.v1.Job)13 TranscoderServiceClient (com.google.cloud.video.transcoder.v1.TranscoderServiceClient)13 JobBuilder (io.fabric8.kubernetes.api.model.batch.v1.JobBuilder)13 IJobTrigger (org.pentaho.platform.api.scheduler2.IJobTrigger)12 Map (java.util.Map)11 Test (org.junit.jupiter.api.Test)10