Search in sources :

Example 21 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by apache.

the class QuotableWhiteSpaceSplitter method run.

@Override
public ListenableFuture<TaskStatus> run(final Task task) {
    synchronized (tasks) {
        tasks.computeIfAbsent(task.getId(), k -> new ForkingTaskRunnerWorkItem(task, exec.submit(new Callable<TaskStatus>() {

            @Override
            public TaskStatus call() {
                final String attemptUUID = UUID.randomUUID().toString();
                final File taskDir = taskConfig.getTaskDir(task.getId());
                final File attemptDir = new File(taskDir, attemptUUID);
                final ProcessHolder processHolder;
                final String childHost = node.getHost();
                int childPort = -1;
                int tlsChildPort = -1;
                if (node.isEnablePlaintextPort()) {
                    childPort = portFinder.findUnusedPort();
                }
                if (node.isEnableTlsPort()) {
                    tlsChildPort = portFinder.findUnusedPort();
                }
                final TaskLocation taskLocation = TaskLocation.create(childHost, childPort, tlsChildPort);
                try {
                    final Closer closer = Closer.create();
                    try {
                        FileUtils.mkdirp(attemptDir);
                        final File taskFile = new File(taskDir, "task.json");
                        final File statusFile = new File(attemptDir, "status.json");
                        final File logFile = new File(taskDir, "log");
                        final File reportsFile = new File(attemptDir, "report.json");
                        // time to adjust process holders
                        synchronized (tasks) {
                            final ForkingTaskRunnerWorkItem taskWorkItem = tasks.get(task.getId());
                            if (taskWorkItem == null) {
                                LOGGER.makeAlert("TaskInfo disappeared!").addData("task", task.getId()).emit();
                                throw new ISE("TaskInfo disappeared for task[%s]!", task.getId());
                            }
                            if (taskWorkItem.shutdown) {
                                throw new IllegalStateException("Task has been shut down!");
                            }
                            if (taskWorkItem.processHolder != null) {
                                LOGGER.makeAlert("TaskInfo already has a processHolder").addData("task", task.getId()).emit();
                                throw new ISE("TaskInfo already has processHolder for task[%s]!", task.getId());
                            }
                            final List<String> command = new ArrayList<>();
                            final String taskClasspath;
                            if (task.getClasspathPrefix() != null && !task.getClasspathPrefix().isEmpty()) {
                                taskClasspath = Joiner.on(File.pathSeparator).join(task.getClasspathPrefix(), config.getClasspath());
                            } else {
                                taskClasspath = config.getClasspath();
                            }
                            command.add(config.getJavaCommand());
                            command.add("-cp");
                            command.add(taskClasspath);
                            Iterables.addAll(command, new QuotableWhiteSpaceSplitter(config.getJavaOpts()));
                            Iterables.addAll(command, config.getJavaOptsArray());
                            // Override task specific javaOpts
                            Object taskJavaOpts = task.getContextValue(ForkingTaskRunnerConfig.JAVA_OPTS_PROPERTY);
                            if (taskJavaOpts != null) {
                                Iterables.addAll(command, new QuotableWhiteSpaceSplitter((String) taskJavaOpts));
                            }
                            for (String propName : props.stringPropertyNames()) {
                                for (String allowedPrefix : config.getAllowedPrefixes()) {
                                    // See https://github.com/apache/druid/issues/1841
                                    if (propName.startsWith(allowedPrefix) && !ForkingTaskRunnerConfig.JAVA_OPTS_PROPERTY.equals(propName) && !ForkingTaskRunnerConfig.JAVA_OPTS_ARRAY_PROPERTY.equals(propName)) {
                                        command.add(StringUtils.format("-D%s=%s", propName, props.getProperty(propName)));
                                    }
                                }
                            }
                            // Override child JVM specific properties
                            for (String propName : props.stringPropertyNames()) {
                                if (propName.startsWith(CHILD_PROPERTY_PREFIX)) {
                                    command.add(StringUtils.format("-D%s=%s", propName.substring(CHILD_PROPERTY_PREFIX.length()), props.getProperty(propName)));
                                }
                            }
                            // Override task specific properties
                            final Map<String, Object> context = task.getContext();
                            if (context != null) {
                                for (String propName : context.keySet()) {
                                    if (propName.startsWith(CHILD_PROPERTY_PREFIX)) {
                                        command.add(StringUtils.format("-D%s=%s", propName.substring(CHILD_PROPERTY_PREFIX.length()), task.getContextValue(propName)));
                                    }
                                }
                            }
                            // Add dataSource, taskId and taskType for metrics or logging
                            command.add(StringUtils.format("-D%s%s=%s", MonitorsConfig.METRIC_DIMENSION_PREFIX, DruidMetrics.DATASOURCE, task.getDataSource()));
                            command.add(StringUtils.format("-D%s%s=%s", MonitorsConfig.METRIC_DIMENSION_PREFIX, DruidMetrics.TASK_ID, task.getId()));
                            command.add(StringUtils.format("-D%s%s=%s", MonitorsConfig.METRIC_DIMENSION_PREFIX, DruidMetrics.TASK_TYPE, task.getType()));
                            command.add(StringUtils.format("-Ddruid.host=%s", childHost));
                            command.add(StringUtils.format("-Ddruid.plaintextPort=%d", childPort));
                            command.add(StringUtils.format("-Ddruid.tlsPort=%d", tlsChildPort));
                            // Let tasks know where they are running on.
                            // This information is used in native parallel indexing with shuffle.
                            command.add(StringUtils.format("-Ddruid.task.executor.service=%s", node.getServiceName()));
                            command.add(StringUtils.format("-Ddruid.task.executor.host=%s", node.getHost()));
                            command.add(StringUtils.format("-Ddruid.task.executor.plaintextPort=%d", node.getPlaintextPort()));
                            command.add(StringUtils.format("-Ddruid.task.executor.enablePlaintextPort=%s", node.isEnablePlaintextPort()));
                            command.add(StringUtils.format("-Ddruid.task.executor.tlsPort=%d", node.getTlsPort()));
                            command.add(StringUtils.format("-Ddruid.task.executor.enableTlsPort=%s", node.isEnableTlsPort()));
                            // These are not enabled per default to allow the user to either set or not set them
                            // Users are highly suggested to be set in druid.indexer.runner.javaOpts
                            // See org.apache.druid.concurrent.TaskThreadPriority#getThreadPriorityFromTaskPriority(int)
                            // for more information
                            // command.add("-XX:+UseThreadPriorities");
                            // command.add("-XX:ThreadPriorityPolicy=42");
                            command.add("org.apache.druid.cli.Main");
                            command.add("internal");
                            command.add("peon");
                            command.add(taskFile.toString());
                            command.add(statusFile.toString());
                            command.add(reportsFile.toString());
                            String nodeType = task.getNodeType();
                            if (nodeType != null) {
                                command.add("--nodeType");
                                command.add(nodeType);
                            }
                            // join queries
                            if (task.supportsQueries()) {
                                command.add("--loadBroadcastSegments");
                                command.add("true");
                            }
                            if (!taskFile.exists()) {
                                jsonMapper.writeValue(taskFile, task);
                            }
                            LOGGER.info("Running command: %s", getMaskedCommand(startupLoggingConfig.getMaskProperties(), command));
                            taskWorkItem.processHolder = runTaskProcess(command, logFile, taskLocation);
                            processHolder = taskWorkItem.processHolder;
                            processHolder.registerWithCloser(closer);
                        }
                        TaskRunnerUtils.notifyLocationChanged(listeners, task.getId(), taskLocation);
                        TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.running(task.getId()));
                        LOGGER.info("Logging task %s output to: %s", task.getId(), logFile);
                        final int exitCode = waitForTaskProcessToComplete(task, processHolder, logFile, reportsFile);
                        final TaskStatus status;
                        if (exitCode == 0) {
                            LOGGER.info("Process exited successfully for task: %s", task.getId());
                            // Process exited successfully
                            status = jsonMapper.readValue(statusFile, TaskStatus.class);
                        } else {
                            LOGGER.error("Process exited with code[%d] for task: %s", exitCode, task.getId());
                            // Process exited unsuccessfully
                            status = TaskStatus.failure(task.getId(), StringUtils.format("Task execution process exited unsuccessfully with code[%s]. " + "See middleManager logs for more details.", exitCode));
                        }
                        TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), status);
                        return status;
                    } catch (Throwable t) {
                        throw closer.rethrow(t);
                    } finally {
                        closer.close();
                    }
                } catch (Throwable t) {
                    LOGGER.info(t, "Exception caught during execution");
                    throw new RuntimeException(t);
                } finally {
                    try {
                        synchronized (tasks) {
                            final ForkingTaskRunnerWorkItem taskWorkItem = tasks.remove(task.getId());
                            if (taskWorkItem != null && taskWorkItem.processHolder != null) {
                                taskWorkItem.processHolder.shutdown();
                            }
                            if (!stopping) {
                                saveRunningTasks();
                            }
                        }
                        if (node.isEnablePlaintextPort()) {
                            portFinder.markPortUnused(childPort);
                        }
                        if (node.isEnableTlsPort()) {
                            portFinder.markPortUnused(tlsChildPort);
                        }
                        try {
                            if (!stopping && taskDir.exists()) {
                                FileUtils.deleteDirectory(taskDir);
                                LOGGER.info("Removing task directory: %s", taskDir);
                            }
                        } catch (Exception e) {
                            LOGGER.makeAlert(e, "Failed to delete task directory").addData("taskDir", taskDir.toString()).addData("task", task.getId()).emit();
                        }
                    } catch (Exception e) {
                        LOGGER.error(e, "Suppressing exception caught while cleaning up task");
                    }
                }
            }
        })));
        saveRunningTasks();
        return tasks.get(task.getId()).getResult();
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskLocation(org.apache.druid.indexer.TaskLocation) IOException(java.io.IOException) ISE(org.apache.druid.java.util.common.ISE) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 22 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by apache.

the class RemoteTaskRunner method streamTaskReports.

@Override
public Optional<ByteSource> streamTaskReports(final String taskId) {
    final ZkWorker zkWorker = findWorkerRunningTask(taskId);
    if (zkWorker == null) {
        // Worker is not running this task, it might be available in deep storage
        return Optional.absent();
    } else {
        TaskLocation taskLocation = runningTasks.get(taskId).getLocation();
        final URL url = TaskRunnerUtils.makeTaskLocationURL(taskLocation, "/druid/worker/v1/chat/%s/liveReports", taskId);
        return Optional.of(new ByteSource() {

            @Override
            public InputStream openStream() throws IOException {
                try {
                    return httpClient.go(new Request(HttpMethod.GET, url), new InputStreamResponseHandler()).get();
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                } catch (ExecutionException e) {
                    // Unwrap if possible
                    Throwables.propagateIfPossible(e.getCause(), IOException.class);
                    throw new RuntimeException(e);
                }
            }
        });
    }
}
Also used : InputStreamResponseHandler(org.apache.druid.java.util.http.client.response.InputStreamResponseHandler) InputStream(java.io.InputStream) Request(org.apache.druid.java.util.http.client.Request) ByteSource(com.google.common.io.ByteSource) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TaskLocation(org.apache.druid.indexer.TaskLocation) URL(java.net.URL)

Example 23 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by apache.

the class TaskQueue method notifyStatus.

/**
 * Notify this queue that some task has an updated status. If this update is valid, the status will be persisted in
 * the task storage facility. If the status is a completed status, the task will be unlocked and no further
 * updates will be accepted.
 *
 * @param task       task to update
 * @param taskStatus new task status
 *
 * @throws NullPointerException     if task or status is null
 * @throws IllegalArgumentException if the task ID does not match the status ID
 * @throws IllegalStateException    if this queue is currently shut down
 */
private void notifyStatus(final Task task, final TaskStatus taskStatus, String reasonFormat, Object... args) {
    giant.lock();
    TaskLocation taskLocation = TaskLocation.unknown();
    try {
        Preconditions.checkNotNull(task, "task");
        Preconditions.checkNotNull(taskStatus, "status");
        Preconditions.checkState(active, "Queue is not active!");
        Preconditions.checkArgument(task.getId().equals(taskStatus.getId()), "Mismatching task ids[%s/%s]", task.getId(), taskStatus.getId());
        // Inform taskRunner that this task can be shut down
        try {
            taskLocation = taskRunner.getTaskLocation(task.getId());
            taskRunner.shutdown(task.getId(), reasonFormat, args);
        } catch (Exception e) {
            log.warn(e, "TaskRunner failed to cleanup task after completion: %s", task.getId());
        }
        // Remove from running tasks
        int removed = 0;
        for (int i = tasks.size() - 1; i >= 0; i--) {
            if (tasks.get(i).getId().equals(task.getId())) {
                removed++;
                removeTaskInternal(tasks.get(i));
                break;
            }
        }
        if (removed == 0) {
            log.warn("Unknown task completed: %s", task.getId());
        } else if (removed > 1) {
            log.makeAlert("Removed multiple copies of task").addData("count", removed).addData("task", task.getId()).emit();
        }
        // Remove from futures list
        taskFutures.remove(task.getId());
        if (removed > 0) {
            // If we thought this task should be running, save status to DB
            try {
                final Optional<TaskStatus> previousStatus = taskStorage.getStatus(task.getId());
                if (!previousStatus.isPresent() || !previousStatus.get().isRunnable()) {
                    log.makeAlert("Ignoring notification for already-complete task").addData("task", task.getId()).emit();
                } else {
                    taskStorage.setStatus(taskStatus.withLocation(taskLocation));
                    log.info("Task done: %s", task);
                    managementMayBeNecessary.signalAll();
                }
            } catch (Exception e) {
                log.makeAlert(e, "Failed to persist status for task").addData("task", task.getId()).addData("statusCode", taskStatus.getStatusCode()).emit();
            }
        }
    } finally {
        giant.unlock();
    }
}
Also used : TaskStatus(org.apache.druid.indexer.TaskStatus) TaskLocation(org.apache.druid.indexer.TaskLocation) EntryExistsException(org.apache.druid.metadata.EntryExistsException) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException)

Example 24 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by apache.

the class ThreadingTaskRunner method run.

@Override
public ListenableFuture<TaskStatus> run(Task task) {
    synchronized (tasks) {
        tasks.computeIfAbsent(task.getId(), k -> new ThreadingTaskRunnerWorkItem(task, taskExecutor.submit(new Callable<TaskStatus>() {

            @Override
            public TaskStatus call() {
                final String attemptUUID = UUID.randomUUID().toString();
                final File taskDir = taskConfig.getTaskDir(task.getId());
                final File attemptDir = new File(taskDir, attemptUUID);
                final TaskLocation taskLocation = TaskLocation.create(node.getHost(), node.getPlaintextPort(), node.getTlsPort());
                final ThreadingTaskRunnerWorkItem taskWorkItem;
                try {
                    FileUtils.mkdirp(attemptDir);
                    final File taskFile = new File(taskDir, "task.json");
                    final File reportsFile = new File(attemptDir, "report.json");
                    taskReportFileWriter.add(task.getId(), reportsFile);
                    // time to adjust process holders
                    synchronized (tasks) {
                        taskWorkItem = tasks.get(task.getId());
                        if (taskWorkItem == null) {
                            LOGGER.makeAlert("TaskInfo disappeared").addData("task", task.getId()).emit();
                            throw new ISE("TaskInfo disappeared for task[%s]!", task.getId());
                        }
                        if (taskWorkItem.shutdown) {
                            throw new IllegalStateException("Task has been shut down!");
                        }
                    }
                    if (!taskFile.exists()) {
                        jsonMapper.writeValue(taskFile, task);
                    }
                    // This will block for a while. So we append the thread information with more details
                    final String priorThreadName = Thread.currentThread().getName();
                    Thread.currentThread().setName(StringUtils.format("[%s]-%s", task.getId(), priorThreadName));
                    TaskStatus taskStatus;
                    final TaskToolbox toolbox = toolboxFactory.build(task);
                    TaskRunnerUtils.notifyLocationChanged(listeners, task.getId(), taskLocation);
                    TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.running(task.getId()));
                    taskWorkItem.setState(RunnerTaskState.RUNNING);
                    try {
                        taskStatus = task.run(toolbox);
                    } catch (Throwable t) {
                        LOGGER.error(t, "Exception caught while running the task.");
                        taskStatus = TaskStatus.failure(task.getId(), "Failed with an exception. See indexer logs for more details.");
                    } finally {
                        taskWorkItem.setState(RunnerTaskState.NONE);
                        Thread.currentThread().setName(priorThreadName);
                        if (reportsFile.exists()) {
                            taskLogPusher.pushTaskReports(task.getId(), reportsFile);
                        }
                    }
                    TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), taskStatus);
                    return taskStatus;
                } catch (Throwable t) {
                    LOGGER.error(t, "Exception caught during execution");
                    throw new RuntimeException(t);
                } finally {
                    try {
                        taskReportFileWriter.delete(task.getId());
                        appenderatorsManager.removeAppenderatorsForTask(task.getId(), task.getDataSource());
                        synchronized (tasks) {
                            tasks.remove(task.getId());
                            if (!stopping) {
                                saveRunningTasks();
                            }
                        }
                        try {
                            if (!stopping && taskDir.exists()) {
                                FileUtils.deleteDirectory(taskDir);
                                LOGGER.info("Removed task directory: %s", taskDir);
                            }
                        } catch (Exception e) {
                            LOGGER.makeAlert(e, "Failed to delete task directory").addData("taskDir", taskDir.toString()).addData("task", task.getId()).emit();
                        }
                    } catch (Exception e) {
                        LOGGER.error(e, "Suppressing exception caught while cleaning up task");
                    }
                }
            }
        })));
        saveRunningTasks();
        return tasks.get(task.getId()).getResult();
    }
}
Also used : TaskStatus(org.apache.druid.indexer.TaskStatus) TaskLocation(org.apache.druid.indexer.TaskLocation) TimeoutException(java.util.concurrent.TimeoutException) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) ISE(org.apache.druid.java.util.common.ISE) File(java.io.File)

Example 25 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by apache.

the class HttpRemoteTaskRunner method streamTaskReports.

@Override
public Optional<ByteSource> streamTaskReports(String taskId) {
    // Read on tasks is safe
    @SuppressWarnings("GuardedBy") HttpRemoteTaskRunnerWorkItem taskRunnerWorkItem = tasks.get(taskId);
    Worker worker = null;
    if (taskRunnerWorkItem != null && taskRunnerWorkItem.getState() != HttpRemoteTaskRunnerWorkItem.State.COMPLETE) {
        worker = taskRunnerWorkItem.getWorker();
    }
    if (worker == null || !workers.containsKey(worker.getHost())) {
        // Worker is not running this task, it might be available in deep storage
        return Optional.absent();
    } else {
        // Worker is still running this task
        TaskLocation taskLocation = taskRunnerWorkItem.getLocation();
        final URL url = TaskRunnerUtils.makeTaskLocationURL(taskLocation, "/druid/worker/v1/chat/%s/liveReports", taskId);
        return Optional.of(new ByteSource() {

            @Override
            public InputStream openStream() throws IOException {
                try {
                    return httpClient.go(new Request(HttpMethod.GET, url), new InputStreamResponseHandler()).get();
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                } catch (ExecutionException e) {
                    // Unwrap if possible
                    Throwables.propagateIfPossible(e.getCause(), IOException.class);
                    throw new RuntimeException(e);
                }
            }
        });
    }
}
Also used : InputStream(java.io.InputStream) Request(org.apache.druid.java.util.http.client.Request) IOException(java.io.IOException) TaskLocation(org.apache.druid.indexer.TaskLocation) URL(java.net.URL) InputStreamResponseHandler(org.apache.druid.java.util.http.client.response.InputStreamResponseHandler) Worker(org.apache.druid.indexing.worker.Worker) ByteSource(com.google.common.io.ByteSource) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

TaskLocation (org.apache.druid.indexer.TaskLocation)66 Test (org.junit.Test)50 Task (org.apache.druid.indexing.common.task.Task)46 TaskRunnerListener (org.apache.druid.indexing.overlord.TaskRunnerListener)44 ArrayList (java.util.ArrayList)42 Collection (java.util.Collection)40 Executor (java.util.concurrent.Executor)40 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)40 ImmutableMap (com.google.common.collect.ImmutableMap)38 Map (java.util.Map)38 HashMap (java.util.HashMap)36 TreeMap (java.util.TreeMap)36 TaskStatus (org.apache.druid.indexer.TaskStatus)20 KafkaDataSourceMetadata (org.apache.druid.indexing.kafka.KafkaDataSourceMetadata)20 KafkaIndexTask (org.apache.druid.indexing.kafka.KafkaIndexTask)20 KinesisDataSourceMetadata (org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata)20 KinesisIndexTask (org.apache.druid.indexing.kinesis.KinesisIndexTask)20 DateTime (org.joda.time.DateTime)20 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)18 TaskReportData (org.apache.druid.indexing.seekablestream.supervisor.TaskReportData)12