Search in sources :

Example 1 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by druid-io.

the class IndexTaskClient method submitRequest.

/**
 * Sends an HTTP request to the task of the specified {@code taskId} and returns a response if it succeeded.
 */
protected <IntermediateType, FinalType> FinalType submitRequest(String taskId, // nullable if content is empty
@Nullable String mediaType, HttpMethod method, String encodedPathSuffix, @Nullable String encodedQueryString, byte[] content, HttpResponseHandler<IntermediateType, FinalType> responseHandler, boolean retry) throws IOException, ChannelException, NoTaskLocationException {
    final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
    while (true) {
        String path = StringUtils.format("%s/%s/%s", BASE_PATH, StringUtils.urlEncode(taskId), encodedPathSuffix);
        Optional<TaskStatus> status = taskInfoProvider.getTaskStatus(taskId);
        if (!status.isPresent() || !status.get().isRunnable()) {
            throw new TaskNotRunnableException(StringUtils.format("Aborting request because task [%s] is not runnable", taskId));
        }
        final TaskLocation location = taskInfoProvider.getTaskLocation(taskId);
        if (location.equals(TaskLocation.unknown())) {
            throw new NoTaskLocationException(StringUtils.format("No TaskLocation available for task [%s]", taskId));
        }
        final Request request = createRequest(taskId, location, path, encodedQueryString, method, mediaType, content);
        Either<StringFullResponseHolder, FinalType> response = null;
        try {
            // Netty throws some annoying exceptions if a connection can't be opened, which happens relatively frequently
            // for tasks that happen to still be starting up, so test the connection first to keep the logs clean.
            checkConnection(request.getUrl().getHost(), request.getUrl().getPort());
            response = submitRequest(request, responseHandler);
            if (response.isValue()) {
                return response.valueOrThrow();
            } else {
                final StringBuilder exceptionMessage = new StringBuilder();
                final HttpResponseStatus httpResponseStatus = response.error().getStatus();
                final String httpResponseContent = response.error().getContent();
                exceptionMessage.append("Received server error with status [").append(httpResponseStatus).append("]");
                if (!Strings.isNullOrEmpty(httpResponseContent)) {
                    final String choppedMessage = StringUtils.chop(StringUtils.nullToEmptyNonDruidDataString(httpResponseContent), 1000);
                    exceptionMessage.append("; first 1KB of body: ").append(choppedMessage);
                }
                if (httpResponseStatus.getCode() == 400) {
                    // don't bother retrying if it's a bad request
                    throw new IAE(exceptionMessage.toString());
                } else {
                    throw new IOE(exceptionMessage.toString());
                }
            }
        } catch (IOException | ChannelException e) {
            // Since workers are free to move tasks around to different ports, there is a chance that a task may have been
            // moved but our view of its location has not been updated yet from ZK. To detect this case, we send a header
            // identifying our expected recipient in the request; if this doesn't correspond to the worker we messaged, the
            // worker will return an HTTP 404 with its ID in the response header. If we get a mismatching task ID, then
            // we will wait for a short period then retry the request indefinitely, expecting the task's location to
            // eventually be updated.
            final Duration delay;
            if (response != null && !response.isValue() && response.error().getStatus().equals(HttpResponseStatus.NOT_FOUND)) {
                String headerId = StringUtils.urlDecode(response.error().getResponse().headers().get(ChatHandlerResource.TASK_ID_HEADER));
                if (headerId != null && !headerId.equals(taskId)) {
                    log.warn("Expected worker to have taskId [%s] but has taskId [%s], will retry in [%d]s", taskId, headerId, TASK_MISMATCH_RETRY_DELAY_SECONDS);
                    delay = Duration.standardSeconds(TASK_MISMATCH_RETRY_DELAY_SECONDS);
                } else {
                    delay = retryPolicy.getAndIncrementRetryDelay();
                }
            } else {
                delay = retryPolicy.getAndIncrementRetryDelay();
            }
            final String urlForLog = request.getUrl().toString();
            if (!retry) {
                // if retry=false, we probably aren't too concerned if the operation doesn't succeed (i.e. the request was
                // for informational purposes only); log at INFO instead of WARN.
                log.noStackTrace().info(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else if (delay == null) {
                // When retrying, log the final failure at WARN level, since it is likely to be bad news.
                log.warn(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else {
                try {
                    final long sleepTime = delay.getMillis();
                    // When retrying, log non-final failures at INFO level.
                    log.noStackTrace().info(e, "submitRequest failed for [%s]; will try again in [%s]", urlForLog, new Duration(sleepTime).toString());
                    Thread.sleep(sleepTime);
                } catch (InterruptedException e2) {
                    Thread.currentThread().interrupt();
                    e.addSuppressed(e2);
                    throw new RuntimeException(e);
                }
            }
        } catch (NoTaskLocationException e) {
            log.info("No TaskLocation available for task [%s], this task may not have been assigned to a worker yet " + "or may have already completed", taskId);
            throw e;
        } catch (Exception e) {
            log.warn(e, "Exception while sending request");
            throw e;
        }
    }
}
Also used : HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) Request(org.apache.druid.java.util.http.client.Request) Duration(org.joda.time.Duration) IOException(java.io.IOException) TaskStatus(org.apache.druid.indexer.TaskStatus) IAE(org.apache.druid.java.util.common.IAE) TaskLocation(org.apache.druid.indexer.TaskLocation) MalformedURLException(java.net.MalformedURLException) ChannelException(org.jboss.netty.channel.ChannelException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) IOE(org.apache.druid.java.util.common.IOE) ChannelException(org.jboss.netty.channel.ChannelException)

Example 2 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by druid-io.

the class QuotableWhiteSpaceSplitter method run.

@Override
public ListenableFuture<TaskStatus> run(final Task task) {
    synchronized (tasks) {
        tasks.computeIfAbsent(task.getId(), k -> new ForkingTaskRunnerWorkItem(task, exec.submit(new Callable<TaskStatus>() {

            @Override
            public TaskStatus call() {
                final String attemptUUID = UUID.randomUUID().toString();
                final File taskDir = taskConfig.getTaskDir(task.getId());
                final File attemptDir = new File(taskDir, attemptUUID);
                final ProcessHolder processHolder;
                final String childHost = node.getHost();
                int childPort = -1;
                int tlsChildPort = -1;
                if (node.isEnablePlaintextPort()) {
                    childPort = portFinder.findUnusedPort();
                }
                if (node.isEnableTlsPort()) {
                    tlsChildPort = portFinder.findUnusedPort();
                }
                final TaskLocation taskLocation = TaskLocation.create(childHost, childPort, tlsChildPort);
                try {
                    final Closer closer = Closer.create();
                    try {
                        FileUtils.mkdirp(attemptDir);
                        final File taskFile = new File(taskDir, "task.json");
                        final File statusFile = new File(attemptDir, "status.json");
                        final File logFile = new File(taskDir, "log");
                        final File reportsFile = new File(attemptDir, "report.json");
                        // time to adjust process holders
                        synchronized (tasks) {
                            final ForkingTaskRunnerWorkItem taskWorkItem = tasks.get(task.getId());
                            if (taskWorkItem == null) {
                                LOGGER.makeAlert("TaskInfo disappeared!").addData("task", task.getId()).emit();
                                throw new ISE("TaskInfo disappeared for task[%s]!", task.getId());
                            }
                            if (taskWorkItem.shutdown) {
                                throw new IllegalStateException("Task has been shut down!");
                            }
                            if (taskWorkItem.processHolder != null) {
                                LOGGER.makeAlert("TaskInfo already has a processHolder").addData("task", task.getId()).emit();
                                throw new ISE("TaskInfo already has processHolder for task[%s]!", task.getId());
                            }
                            final List<String> command = new ArrayList<>();
                            final String taskClasspath;
                            if (task.getClasspathPrefix() != null && !task.getClasspathPrefix().isEmpty()) {
                                taskClasspath = Joiner.on(File.pathSeparator).join(task.getClasspathPrefix(), config.getClasspath());
                            } else {
                                taskClasspath = config.getClasspath();
                            }
                            command.add(config.getJavaCommand());
                            command.add("-cp");
                            command.add(taskClasspath);
                            Iterables.addAll(command, new QuotableWhiteSpaceSplitter(config.getJavaOpts()));
                            Iterables.addAll(command, config.getJavaOptsArray());
                            // Override task specific javaOpts
                            Object taskJavaOpts = task.getContextValue(ForkingTaskRunnerConfig.JAVA_OPTS_PROPERTY);
                            if (taskJavaOpts != null) {
                                Iterables.addAll(command, new QuotableWhiteSpaceSplitter((String) taskJavaOpts));
                            }
                            for (String propName : props.stringPropertyNames()) {
                                for (String allowedPrefix : config.getAllowedPrefixes()) {
                                    // See https://github.com/apache/druid/issues/1841
                                    if (propName.startsWith(allowedPrefix) && !ForkingTaskRunnerConfig.JAVA_OPTS_PROPERTY.equals(propName) && !ForkingTaskRunnerConfig.JAVA_OPTS_ARRAY_PROPERTY.equals(propName)) {
                                        command.add(StringUtils.format("-D%s=%s", propName, props.getProperty(propName)));
                                    }
                                }
                            }
                            // Override child JVM specific properties
                            for (String propName : props.stringPropertyNames()) {
                                if (propName.startsWith(CHILD_PROPERTY_PREFIX)) {
                                    command.add(StringUtils.format("-D%s=%s", propName.substring(CHILD_PROPERTY_PREFIX.length()), props.getProperty(propName)));
                                }
                            }
                            // Override task specific properties
                            final Map<String, Object> context = task.getContext();
                            if (context != null) {
                                for (String propName : context.keySet()) {
                                    if (propName.startsWith(CHILD_PROPERTY_PREFIX)) {
                                        command.add(StringUtils.format("-D%s=%s", propName.substring(CHILD_PROPERTY_PREFIX.length()), task.getContextValue(propName)));
                                    }
                                }
                            }
                            // Add dataSource, taskId and taskType for metrics or logging
                            command.add(StringUtils.format("-D%s%s=%s", MonitorsConfig.METRIC_DIMENSION_PREFIX, DruidMetrics.DATASOURCE, task.getDataSource()));
                            command.add(StringUtils.format("-D%s%s=%s", MonitorsConfig.METRIC_DIMENSION_PREFIX, DruidMetrics.TASK_ID, task.getId()));
                            command.add(StringUtils.format("-D%s%s=%s", MonitorsConfig.METRIC_DIMENSION_PREFIX, DruidMetrics.TASK_TYPE, task.getType()));
                            command.add(StringUtils.format("-Ddruid.host=%s", childHost));
                            command.add(StringUtils.format("-Ddruid.plaintextPort=%d", childPort));
                            command.add(StringUtils.format("-Ddruid.tlsPort=%d", tlsChildPort));
                            // Let tasks know where they are running on.
                            // This information is used in native parallel indexing with shuffle.
                            command.add(StringUtils.format("-Ddruid.task.executor.service=%s", node.getServiceName()));
                            command.add(StringUtils.format("-Ddruid.task.executor.host=%s", node.getHost()));
                            command.add(StringUtils.format("-Ddruid.task.executor.plaintextPort=%d", node.getPlaintextPort()));
                            command.add(StringUtils.format("-Ddruid.task.executor.enablePlaintextPort=%s", node.isEnablePlaintextPort()));
                            command.add(StringUtils.format("-Ddruid.task.executor.tlsPort=%d", node.getTlsPort()));
                            command.add(StringUtils.format("-Ddruid.task.executor.enableTlsPort=%s", node.isEnableTlsPort()));
                            // These are not enabled per default to allow the user to either set or not set them
                            // Users are highly suggested to be set in druid.indexer.runner.javaOpts
                            // See org.apache.druid.concurrent.TaskThreadPriority#getThreadPriorityFromTaskPriority(int)
                            // for more information
                            // command.add("-XX:+UseThreadPriorities");
                            // command.add("-XX:ThreadPriorityPolicy=42");
                            command.add("org.apache.druid.cli.Main");
                            command.add("internal");
                            command.add("peon");
                            command.add(taskFile.toString());
                            command.add(statusFile.toString());
                            command.add(reportsFile.toString());
                            String nodeType = task.getNodeType();
                            if (nodeType != null) {
                                command.add("--nodeType");
                                command.add(nodeType);
                            }
                            // join queries
                            if (task.supportsQueries()) {
                                command.add("--loadBroadcastSegments");
                                command.add("true");
                            }
                            if (!taskFile.exists()) {
                                jsonMapper.writeValue(taskFile, task);
                            }
                            LOGGER.info("Running command: %s", getMaskedCommand(startupLoggingConfig.getMaskProperties(), command));
                            taskWorkItem.processHolder = runTaskProcess(command, logFile, taskLocation);
                            processHolder = taskWorkItem.processHolder;
                            processHolder.registerWithCloser(closer);
                        }
                        TaskRunnerUtils.notifyLocationChanged(listeners, task.getId(), taskLocation);
                        TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.running(task.getId()));
                        LOGGER.info("Logging task %s output to: %s", task.getId(), logFile);
                        final int exitCode = waitForTaskProcessToComplete(task, processHolder, logFile, reportsFile);
                        final TaskStatus status;
                        if (exitCode == 0) {
                            LOGGER.info("Process exited successfully for task: %s", task.getId());
                            // Process exited successfully
                            status = jsonMapper.readValue(statusFile, TaskStatus.class);
                        } else {
                            LOGGER.error("Process exited with code[%d] for task: %s", exitCode, task.getId());
                            // Process exited unsuccessfully
                            status = TaskStatus.failure(task.getId(), StringUtils.format("Task execution process exited unsuccessfully with code[%s]. " + "See middleManager logs for more details.", exitCode));
                        }
                        TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), status);
                        return status;
                    } catch (Throwable t) {
                        throw closer.rethrow(t);
                    } finally {
                        closer.close();
                    }
                } catch (Throwable t) {
                    LOGGER.info(t, "Exception caught during execution");
                    throw new RuntimeException(t);
                } finally {
                    try {
                        synchronized (tasks) {
                            final ForkingTaskRunnerWorkItem taskWorkItem = tasks.remove(task.getId());
                            if (taskWorkItem != null && taskWorkItem.processHolder != null) {
                                taskWorkItem.processHolder.shutdown();
                            }
                            if (!stopping) {
                                saveRunningTasks();
                            }
                        }
                        if (node.isEnablePlaintextPort()) {
                            portFinder.markPortUnused(childPort);
                        }
                        if (node.isEnableTlsPort()) {
                            portFinder.markPortUnused(tlsChildPort);
                        }
                        try {
                            if (!stopping && taskDir.exists()) {
                                FileUtils.deleteDirectory(taskDir);
                                LOGGER.info("Removing task directory: %s", taskDir);
                            }
                        } catch (Exception e) {
                            LOGGER.makeAlert(e, "Failed to delete task directory").addData("taskDir", taskDir.toString()).addData("task", task.getId()).emit();
                        }
                    } catch (Exception e) {
                        LOGGER.error(e, "Suppressing exception caught while cleaning up task");
                    }
                }
            }
        })));
        saveRunningTasks();
        return tasks.get(task.getId()).getResult();
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskLocation(org.apache.druid.indexer.TaskLocation) IOException(java.io.IOException) ISE(org.apache.druid.java.util.common.ISE) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 3 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by druid-io.

the class HttpRemoteTaskRunner method streamTaskReports.

@Override
public Optional<ByteSource> streamTaskReports(String taskId) {
    // Read on tasks is safe
    @SuppressWarnings("GuardedBy") HttpRemoteTaskRunnerWorkItem taskRunnerWorkItem = tasks.get(taskId);
    Worker worker = null;
    if (taskRunnerWorkItem != null && taskRunnerWorkItem.getState() != HttpRemoteTaskRunnerWorkItem.State.COMPLETE) {
        worker = taskRunnerWorkItem.getWorker();
    }
    if (worker == null || !workers.containsKey(worker.getHost())) {
        // Worker is not running this task, it might be available in deep storage
        return Optional.absent();
    } else {
        // Worker is still running this task
        TaskLocation taskLocation = taskRunnerWorkItem.getLocation();
        final URL url = TaskRunnerUtils.makeTaskLocationURL(taskLocation, "/druid/worker/v1/chat/%s/liveReports", taskId);
        return Optional.of(new ByteSource() {

            @Override
            public InputStream openStream() throws IOException {
                try {
                    return httpClient.go(new Request(HttpMethod.GET, url), new InputStreamResponseHandler()).get();
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                } catch (ExecutionException e) {
                    // Unwrap if possible
                    Throwables.propagateIfPossible(e.getCause(), IOException.class);
                    throw new RuntimeException(e);
                }
            }
        });
    }
}
Also used : InputStream(java.io.InputStream) Request(org.apache.druid.java.util.http.client.Request) IOException(java.io.IOException) TaskLocation(org.apache.druid.indexer.TaskLocation) URL(java.net.URL) InputStreamResponseHandler(org.apache.druid.java.util.http.client.response.InputStreamResponseHandler) Worker(org.apache.druid.indexing.worker.Worker) ByteSource(com.google.common.io.ByteSource) ExecutionException(java.util.concurrent.ExecutionException)

Example 4 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by druid-io.

the class SingleTaskBackgroundRunnerTest method testStopNonRestorableTask.

@Test
public void testStopNonRestorableTask() throws InterruptedException {
    // latch to wait for SingleTaskBackgroundRunnerCallable to be executed before stopping the task
    // We need this latch because TaskRunnerListener is currently racy.
    // See https://github.com/apache/druid/issues/11445 for more details.
    CountDownLatch runLatch = new CountDownLatch(1);
    // statusChanged callback can be called by multiple threads.
    AtomicReference<TaskStatus> statusHolder = new AtomicReference<>();
    runner.registerListener(new TaskRunnerListener() {

        @Override
        public String getListenerId() {
            return "testStopNonRestorableTask";
        }

        @Override
        public void locationChanged(String taskId, TaskLocation newLocation) {
        // do nothing
        }

        @Override
        public void statusChanged(String taskId, TaskStatus status) {
            if (status.getStatusCode() == TaskState.RUNNING) {
                runLatch.countDown();
            } else {
                statusHolder.set(status);
            }
        }
    }, Execs.directExecutor());
    runner.run(new NoopTask(null, null, "datasource", // 10 sec
    10000, 0, null, null, null));
    Assert.assertTrue(runLatch.await(1, TimeUnit.SECONDS));
    runner.stop();
    Assert.assertEquals(TaskState.FAILED, statusHolder.get().getStatusCode());
    Assert.assertEquals("Canceled as task execution process stopped", statusHolder.get().getErrorMsg());
}
Also used : AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) TaskStatus(org.apache.druid.indexer.TaskStatus) NoopTask(org.apache.druid.indexing.common.task.NoopTask) TaskLocation(org.apache.druid.indexer.TaskLocation) Test(org.junit.Test)

Example 5 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by druid-io.

the class TaskRunnerUtilsTest method testMakeTaskLocationURL.

@Test
public void testMakeTaskLocationURL() {
    final URL url = TaskRunnerUtils.makeTaskLocationURL(new TaskLocation("1.2.3.4", 8090, 8290), "/druid/worker/v1/task/%s/log", "foo bar&");
    Assert.assertEquals("https://1.2.3.4:8290/druid/worker/v1/task/foo%20bar%26/log", url.toString());
}
Also used : URL(java.net.URL) TaskLocation(org.apache.druid.indexer.TaskLocation) Test(org.junit.Test)

Aggregations

TaskLocation (org.apache.druid.indexer.TaskLocation)33 Test (org.junit.Test)25 Task (org.apache.druid.indexing.common.task.Task)23 TaskRunnerListener (org.apache.druid.indexing.overlord.TaskRunnerListener)22 ArrayList (java.util.ArrayList)21 Collection (java.util.Collection)20 Executor (java.util.concurrent.Executor)20 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)20 ImmutableMap (com.google.common.collect.ImmutableMap)19 Map (java.util.Map)19 HashMap (java.util.HashMap)18 TreeMap (java.util.TreeMap)18 TaskStatus (org.apache.druid.indexer.TaskStatus)10 KafkaDataSourceMetadata (org.apache.druid.indexing.kafka.KafkaDataSourceMetadata)10 KafkaIndexTask (org.apache.druid.indexing.kafka.KafkaIndexTask)10 KinesisDataSourceMetadata (org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata)10 KinesisIndexTask (org.apache.druid.indexing.kinesis.KinesisIndexTask)10 DateTime (org.joda.time.DateTime)10 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)9 TaskReportData (org.apache.druid.indexing.seekablestream.supervisor.TaskReportData)6