Search in sources :

Example 1 with TaskRunAttempt

use of io.kestra.core.models.executions.TaskRunAttempt in project kestra by kestra-io.

the class RunContextTest method metrics.

@SuppressWarnings("OptionalGetWithoutIsPresent")
@Test
void metrics() throws TimeoutException {
    Execution execution = runnerUtils.runOne("io.kestra.tests", "return");
    TaskRunAttempt taskRunAttempt = execution.getTaskRunList().get(1).getAttempts().get(0);
    Counter length = (Counter) taskRunAttempt.findMetrics("length").get();
    Timer duration = (Timer) taskRunAttempt.findMetrics("duration").get();
    assertThat(execution.getTaskRunList(), hasSize(3));
    assertThat(length.getValue(), is(7.0D));
    assertThat(duration.getValue().getNano(), is(greaterThan(0)));
    assertThat(duration.getTags().get("format"), is("{{task.id}}"));
}
Also used : TaskRunAttempt(io.kestra.core.models.executions.TaskRunAttempt) Execution(io.kestra.core.models.executions.Execution) Counter(io.kestra.core.models.executions.metrics.Counter) Timer(io.kestra.core.models.executions.metrics.Timer) Test(org.junit.jupiter.api.Test)

Example 2 with TaskRunAttempt

use of io.kestra.core.models.executions.TaskRunAttempt in project kestra by kestra-io.

the class Worker method run.

private void run(WorkerTask workerTask) throws QueueException {
    metricRegistry.counter(MetricRegistry.METRIC_WORKER_STARTED_COUNT, metricRegistry.tags(workerTask)).increment();
    if (workerTask.getTaskRun().getState().getCurrent() == State.Type.CREATED) {
        metricRegistry.timer(MetricRegistry.METRIC_WORKER_QUEUED_DURATION, metricRegistry.tags(workerTask)).record(Duration.between(workerTask.getTaskRun().getState().getStartDate(), now()));
    }
    workerTask.logger().info("[namespace: {}] [flow: {}] [task: {}] [execution: {}] [taskrun: {}] [value: {}] Type {} started", workerTask.getTaskRun().getNamespace(), workerTask.getTaskRun().getFlowId(), workerTask.getTaskRun().getTaskId(), workerTask.getTaskRun().getExecutionId(), workerTask.getTaskRun().getId(), workerTask.getTaskRun().getValue(), workerTask.getTask().getClass().getSimpleName());
    if (workerTask.logger().isDebugEnabled()) {
        workerTask.logger().debug("Variables\n{}", JacksonMapper.log(workerTask.getRunContext().getVariables()));
    }
    workerTask = workerTask.withTaskRun(workerTask.getTaskRun().withState(State.Type.RUNNING));
    this.workerTaskResultQueue.emit(new WorkerTaskResult(workerTask));
    if (workerTask.getTask() instanceof RunnableTask) {
        // killed cased
        if (killedExecution.contains(workerTask.getTaskRun().getExecutionId())) {
            workerTask = workerTask.withTaskRun(workerTask.getTaskRun().withState(State.Type.KILLED));
            this.workerTaskResultQueue.emit(new WorkerTaskResult(workerTask));
            this.logTerminated(workerTask);
            return;
        }
        AtomicReference<WorkerTask> current = new AtomicReference<>(workerTask);
        // run
        WorkerTask finalWorkerTask = Failsafe.with(AbstractRetry.<WorkerTask>retryPolicy(workerTask.getTask().getRetry()).handleResultIf(result -> result.getTaskRun().lastAttempt() != null && Objects.requireNonNull(result.getTaskRun().lastAttempt()).getState().getCurrent() == State.Type.FAILED).onRetry(e -> {
            WorkerTask lastResult = e.getLastResult();
            lastResult = lastResult.getRunContext().cleanup(lastResult);
            current.set(lastResult);
            metricRegistry.counter(MetricRegistry.METRIC_WORKER_RETRYED_COUNT, metricRegistry.tags(current.get(), MetricRegistry.TAG_ATTEMPT_COUNT, String.valueOf(e.getAttemptCount()))).increment();
            this.workerTaskResultQueue.emit(new WorkerTaskResult(lastResult));
        })).get(() -> this.runAttempt(current.get()));
        finalWorkerTask = finalWorkerTask.getRunContext().cleanup(finalWorkerTask);
        // get last state
        TaskRunAttempt lastAttempt = finalWorkerTask.getTaskRun().lastAttempt();
        if (lastAttempt == null) {
            throw new IllegalStateException("Can find lastAttempt on taskRun '" + finalWorkerTask.getTaskRun().toString(true) + "'");
        }
        State.Type state = lastAttempt.getState().getCurrent();
        if (workerTask.getTask().getRetry() != null && workerTask.getTask().getRetry().getWarningOnRetry() && finalWorkerTask.getTaskRun().getAttempts().size() > 0 && state == State.Type.SUCCESS) {
            state = State.Type.WARNING;
        }
        // emit
        finalWorkerTask = finalWorkerTask.withTaskRun(finalWorkerTask.getTaskRun().withState(state));
        // changing status must work in order to finish current task (except if we are near the upper bound size).
        try {
            this.workerTaskResultQueue.emit(new WorkerTaskResult(finalWorkerTask));
        } catch (QueueException e) {
            finalWorkerTask = workerTask.withTaskRun(workerTask.getTaskRun().withState(State.Type.FAILED));
            this.workerTaskResultQueue.emit(new WorkerTaskResult(finalWorkerTask));
        } finally {
            this.logTerminated(finalWorkerTask);
        }
    }
}
Also used : TaskRun(io.kestra.core.models.executions.TaskRun) java.util(java.util) Getter(lombok.Getter) QueueInterface(io.kestra.core.queues.QueueInterface) JacksonMapper(io.kestra.core.serializers.JacksonMapper) ZonedDateTime(java.time.ZonedDateTime) Hashing(com.google.common.hash.Hashing) AtomicReference(java.util.concurrent.atomic.AtomicReference) State(io.kestra.core.models.flows.State) Timeout(net.jodah.failsafe.Timeout) ApplicationContext(io.micronaut.context.ApplicationContext) ImmutableList(com.google.common.collect.ImmutableList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TaskRunAttempt(io.kestra.core.models.executions.TaskRunAttempt) Output(io.kestra.core.models.tasks.Output) AbstractRetry(io.kestra.core.models.tasks.retrys.AbstractRetry) Duration(java.time.Duration) ExecutionKilled(io.kestra.core.models.executions.ExecutionKilled) ExecutorService(java.util.concurrent.ExecutorService) Charsets(com.google.common.base.Charsets) Synchronized(lombok.Synchronized) WorkerTaskQueueInterface(io.kestra.core.queues.WorkerTaskQueueInterface) Logger(org.slf4j.Logger) ImmutableMap(com.google.common.collect.ImmutableMap) QueueFactoryInterface(io.kestra.core.queues.QueueFactoryInterface) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Qualifiers(io.micronaut.inject.qualifiers.Qualifiers) IOException(java.io.IOException) Failsafe(net.jodah.failsafe.Failsafe) TimeUnit(java.util.concurrent.TimeUnit) Slf4j(lombok.extern.slf4j.Slf4j) ChronoUnit(java.time.temporal.ChronoUnit) Await(io.kestra.core.utils.Await) ExecutorsUtils(io.kestra.core.utils.ExecutorsUtils) QueueException(io.kestra.core.queues.QueueException) TimeoutExceededException(io.kestra.core.exceptions.TimeoutExceededException) Closeable(java.io.Closeable) MetricRegistry(io.kestra.core.metrics.MetricRegistry) RunnableTask(io.kestra.core.models.tasks.RunnableTask) TaskRunAttempt(io.kestra.core.models.executions.TaskRunAttempt) State(io.kestra.core.models.flows.State) QueueException(io.kestra.core.queues.QueueException) AtomicReference(java.util.concurrent.atomic.AtomicReference) RunnableTask(io.kestra.core.models.tasks.RunnableTask)

Example 3 with TaskRunAttempt

use of io.kestra.core.models.executions.TaskRunAttempt in project kestra by kestra-io.

the class ExecutionService method markAs.

public Execution markAs(final Execution execution, String taskRunId, State.Type newState) throws Exception {
    if (!execution.getState().isTerninated()) {
        throw new IllegalStateException("Execution must be terminated to be restarted, " + "current state is '" + execution.getState().getCurrent() + "' !");
    }
    final Flow flow = flowRepositoryInterface.findByExecution(execution);
    Set<String> taskRunToRestart = this.taskRunWithAncestors(execution, execution.getTaskRunList().stream().filter(taskRun -> taskRun.getId().equals(taskRunId)).collect(Collectors.toList()));
    if (taskRunToRestart.size() == 0) {
        throw new IllegalArgumentException("No task found to restart execution from !");
    }
    Execution newExecution = execution;
    for (String s : taskRunToRestart) {
        TaskRun originalTaskRun = newExecution.findTaskRunByTaskRunId(s);
        boolean isFlowable = flow.findTaskByTaskId(originalTaskRun.getTaskId()).isFlowable();
        if (!isFlowable || s.equals(taskRunId)) {
            TaskRun newTaskRun = originalTaskRun.withState(newState);
            if (originalTaskRun.getAttempts() != null && originalTaskRun.getAttempts().size() > 0) {
                ArrayList<TaskRunAttempt> attempts = new ArrayList<>(originalTaskRun.getAttempts());
                attempts.set(attempts.size() - 1, attempts.get(attempts.size() - 1).withState(newState));
                newTaskRun = newTaskRun.withAttempts(attempts);
            }
            newExecution = newExecution.withTaskRun(newTaskRun);
        } else {
            newExecution = newExecution.withTaskRun(originalTaskRun.withState(State.Type.RUNNING));
        }
    }
    return newExecution.withState(State.Type.RESTARTED);
}
Also used : TaskRunAttempt(io.kestra.core.models.executions.TaskRunAttempt) TaskRun(io.kestra.core.models.executions.TaskRun) Execution(io.kestra.core.models.executions.Execution) Flow(io.kestra.core.models.flows.Flow)

Example 4 with TaskRunAttempt

use of io.kestra.core.models.executions.TaskRunAttempt in project kestra by kestra-io.

the class Worker method runAttempt.

private WorkerTask runAttempt(WorkerTask workerTask) {
    RunnableTask<?> task = (RunnableTask<?>) workerTask.getTask();
    RunContext runContext = workerTask.getRunContext().forWorker(this.applicationContext, workerTask.getTaskRun());
    Logger logger = runContext.logger();
    TaskRunAttempt.TaskRunAttemptBuilder builder = TaskRunAttempt.builder().state(new State().withState(State.Type.RUNNING));
    AtomicInteger metricRunningCount = getMetricRunningCount(workerTask);
    metricRunningCount.incrementAndGet();
    WorkerThread workerThread = new WorkerThread(logger, workerTask, task, runContext, metricRegistry);
    workerThread.start();
    // emit attempts
    this.workerTaskResultQueue.emit(new WorkerTaskResult(workerTask.withTaskRun(workerTask.getTaskRun().withAttempts(this.addAttempt(workerTask, builder.build())))));
    // run it
    State.Type state;
    try {
        synchronized (this) {
            workerThreadReferences.add(workerThread);
        }
        workerThread.join();
        state = workerThread.getTaskState();
    } catch (InterruptedException e) {
        logger.error("Failed to join WorkerThread {}", e.getMessage(), e);
        state = State.Type.FAILED;
    } finally {
        synchronized (this) {
            workerThreadReferences.remove(workerThread);
        }
    }
    metricRunningCount.decrementAndGet();
    // attempt
    TaskRunAttempt taskRunAttempt = builder.metrics(runContext.metrics()).build().withState(state);
    // logs
    if (workerThread.getTaskOutput() != null) {
        log.debug("Outputs\n{}", JacksonMapper.log(workerThread.getTaskOutput()));
    }
    if (runContext.metrics().size() > 0) {
        log.trace("Metrics\n{}", JacksonMapper.log(runContext.metrics()));
    }
    // save outputs
    List<TaskRunAttempt> attempts = this.addAttempt(workerTask, taskRunAttempt);
    TaskRun taskRun = workerTask.getTaskRun().withAttempts(attempts);
    try {
        taskRun = taskRun.withOutputs(workerThread.getTaskOutput() != null ? workerThread.getTaskOutput().toMap() : ImmutableMap.of());
    } catch (Exception e) {
        logger.warn("Unable to save output on taskRun '{}'", taskRun, e);
    }
    return workerTask.withTaskRun(taskRun);
}
Also used : Logger(org.slf4j.Logger) IOException(java.io.IOException) QueueException(io.kestra.core.queues.QueueException) TimeoutExceededException(io.kestra.core.exceptions.TimeoutExceededException) TaskRunAttempt(io.kestra.core.models.executions.TaskRunAttempt) TaskRun(io.kestra.core.models.executions.TaskRun) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) State(io.kestra.core.models.flows.State) RunnableTask(io.kestra.core.models.tasks.RunnableTask)

Aggregations

TaskRunAttempt (io.kestra.core.models.executions.TaskRunAttempt)4 TaskRun (io.kestra.core.models.executions.TaskRun)3 TimeoutExceededException (io.kestra.core.exceptions.TimeoutExceededException)2 Execution (io.kestra.core.models.executions.Execution)2 State (io.kestra.core.models.flows.State)2 RunnableTask (io.kestra.core.models.tasks.RunnableTask)2 QueueException (io.kestra.core.queues.QueueException)2 IOException (java.io.IOException)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Charsets (com.google.common.base.Charsets)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Hashing (com.google.common.hash.Hashing)1 MetricRegistry (io.kestra.core.metrics.MetricRegistry)1 ExecutionKilled (io.kestra.core.models.executions.ExecutionKilled)1 Counter (io.kestra.core.models.executions.metrics.Counter)1 Timer (io.kestra.core.models.executions.metrics.Timer)1 Flow (io.kestra.core.models.flows.Flow)1 Output (io.kestra.core.models.tasks.Output)1 AbstractRetry (io.kestra.core.models.tasks.retrys.AbstractRetry)1